In [255]:
import pandas as pd
import os
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
path = os.getcwd()
os.chdir(os.path.join(path, 'e:/school/dpav/vizdom/src'))
from crawlers.url_crawlers import get_our_world_in_data, get_our_world_in_data_attributes
from sklearn.linear_model import LassoCV, Lasso, Ridge, ElasticNet, LinearRegression
from sklearn.model_selection import RepeatedKFold
from filters.base_filters import country_based_interpolation, keep_columns_by_name, drop_rows_with_OWID, drop_rows_with_occurrence_number
import plotly.graph_objects as go
from numpy import arange
attributes = get_our_world_in_data_attributes.keys()

In [256]:
raw_data = get_our_world_in_data()
filtered_data = drop_rows_with_OWID(raw_data)
filtered_data = drop_rows_with_occurrence_number(filtered_data, 1)
filtered_data = keep_columns_by_name(filtered_data, attributes)

In [257]:
interpolated_data = country_based_interpolation(filtered_data)

attributes = list(get_our_world_in_data_attributes.keys())
attributes.remove('continent')
attributes.remove('iso_code')
attributes.remove('location')
attributes.remove('date')

In [258]:
norway = interpolated_data[interpolated_data['location']=='Norway']

norway = norway.set_index(norway['date'])
del norway['date']
y = norway['new_cases']
x = norway[attributes]

In [259]:

# train_data = norway.sample(frac=0.6, random_state=12)
# valid_and_test = norway.drop(train_data.index)
# valid_data = valid_and_test.sample(frac=0.5, random_state=62)
# test_data = valid_and_test.drop(valid_data.index)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 100)
y = y.to_frame()

In [260]:
y_train = pd.DataFrame(y_train)
x_train = pd.DataFrame(x_train)
x_test = pd.DataFrame(x_test)
x_train_sort=x_train.sort_values(by = 'date')
x_test_sort=x_test.sort_values(by = 'date')
y_train_sort=y_train.sort_values(by = 'date')

In [261]:
y_train_shift = y_train_sort.shift(-90, axis=0)


In [262]:
number_of_nans = y_train_shift.isnull().values.ravel().sum()

# x_train_sort_dropped = x_train_sort.drop(x_train_sort.tail(number_of_nans).index)
x_train_sort_dropped = x_train_sort.iloc[:-90, :]
y_train_shift_dropped = y_train_shift.dropna(axis=0)


In [263]:
steps_ridge = [
    ('scalar', StandardScaler()),
    ('poly', PolynomialFeatures(degree=4)),
    ('model', Ridge(alpha=10, fit_intercept=True))
]
steps_lasso = [
    ('scalar', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2)),
    ('model', Lasso(alpha=0.3, fit_intercept=True))
]
ridge_pipe = Pipeline(steps_ridge)
lasso_pipe = Pipeline(steps_lasso)

lasso_pipe.fit(x_train_sort_dropped, y_train_shift_dropped['new_cases'])
ridge_pipe.fit(x_train_sort_dropped, y_train_shift_dropped['new_cases'])

yhat0 = ridge_pipe.predict(x_train_sort)
yhat1 = ridge_pipe.predict(x_train_sort)


Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 8.604e+08, tolerance: 1.069e+06



In [264]:
# cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# lasso_cv = LassoCV(alphas=arange(0, 1, 0.01), cv=cv, n_jobs=-1)
# lasso = Lasso(alpha=6.475e-7, max_iter=100000)
# linear = LinearRegression() 
# ridge = Ridge(alpha=6.475e-7, max_iter=100000, solver='sag')
# elastic = ElasticNet(alpha = 6.475e-7, max_iter=100000)
# lasso_cv.fit(x_train_sort_dropped, y_train_shift_dropped['total_cases'])
# lasso.fit(x_train_sort_dropped, y_train_shift_dropped['total_cases'])
# linear.fit(x_train_sort_dropped, y_train_shift_dropped['total_cases'])
# elastic.fit(x_train_sort_dropped, y_train_shift_dropped['total_cases'])
# ridge.fit(x_train_sort_dropped, y_train_shift_dropped['total_cases']) 
# yhat0 = lasso_cv.predict(x_train_sort)
# yhat1 = lasso.predict(x_train_sort)
# yhat2 = linear.predict(x_train_sort)
# yhat3 = ridge.predict(x_train_sort)
# yhat4 = elastic.predict(x_train_sort)

In [265]:

y_train_shift = y_train_shift.shift(90, freq='D')
prediction_1 = pd.Series(yhat0)
rolling_mean_1 = prediction_1.rolling(31).mean()
prediction_2 = pd.Series(yhat1)
rolling_mean_2 = prediction_2.rolling(60).mean()
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=y_train_shift.index, y=yhat0,
    line_color='cyan',
    name='ridge',
))
fig.add_trace(go.Scatter(
    x=y_train_shift.index, y=yhat1,
    line_color='green',
    name='lasso',
))
# fig.add_trace(go.Scatter(
#     x=y_train_shift.index, y=rolling_mean_1,
#     line_color='red',
#     name='linear',
# ))
# fig.add_trace(go.Scatter(
#     x=y_train_shift.index, y=rolling_mean_2,
#     line_color='blue',
#     name='ridge',
# ))
# fig.add_trace(go.Scatter(
#     x=y_train_shift.index, y=yhat4,
#     line_color='orange',
#     name='elastic',
# ))

fig.add_trace(go.Scatter(
    x=y.index, y=y['new_cases'],
    line_color='yellow',
    name='original',
))

# fig.update_traces(mode='lines')
# fig
# plt.figure(figsize=(20, 10))
# plt.plot(x_train.index, y_train, 'b.',label = 'new death (train)')
# plt.plot(x_train.index, y_pred_mlr_train, 'r.',label = 'new death (prediction)')
# plt.legend()

# # plt.figure(figsize=(20, 10))
# plt.plot(x_test.index, y_test, 'bx',label = 'new death (test)')
# plt.plot(x_test.index, y_pred_mlr, 'rx',label = 'new death (prediction test)')
# plt.legend()


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed