In [None]:
#Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from seaborn import set_style
from datetime import datetime
set_style("whitegrid")

In [None]:
#Using full_fred toolkit to access FRED API Server
from full_fred.fred import Fred

In [None]:
#Getting API KEY
fred = Fred('FRED_API_KEY.txt')
fred.get_api_key_file()  

In [None]:
#Setting API Key
fred.set_api_key_file('FRED_API_KEY.txt')

In [None]:
#Querying the Data Series 
df_1 = fred.get_series_df('GS1')
df_2 = fred.get_series_df('GS2')
df_3 = fred.get_series_df('GS3')
df_5 = fred.get_series_df('GS5')
df_7 = fred.get_series_df('GS7')
df_10 = fred.get_series_df('GS10')

In [None]:
df_1.head()

In [None]:
#Dropping irrelevant cols
df_1 = df_1.drop(['realtime_start','realtime_end'],axis = 1)
df_2 = df_2.drop(['realtime_start','realtime_end'],axis = 1)
df_3 = df_3.drop(['realtime_start','realtime_end'],axis = 1)
df_5 = df_5.drop(['realtime_start','realtime_end'],axis = 1)
df_7 = df_7.drop(['realtime_start','realtime_end'],axis = 1)
df_10 = df_10.drop(['realtime_start','realtime_end'],axis = 1)

In [None]:
#Renaming columns to combing into single dataframe
df_1 = df_1.rename(columns = {'value' :'value_1'})
df_2 = df_2.rename(columns = {'value' :'value_2'})
df_3 = df_3.rename(columns = {'value' :'value_3'})
df_5 = df_5.rename(columns = {'value' :'value_5'})
df_7 = df_7.rename(columns = {'value' :'value_7'})
df_10 = df_10.rename(columns = {'value' :'value_10'})

In [None]:
#Merge into single Dataframe
comb_df = pd.merge(df_1,df_2,how='inner',on='date')
comb_df = pd.merge(comb_df,df_3,how='inner',on='date')
comb_df = pd.merge(comb_df,df_5,how='inner',on='date')
comb_df = pd.merge(comb_df,df_7,how='inner',on='date')
comb_df = pd.merge(comb_df,df_10,how='inner',on='date')

In [None]:
comb_df

In [None]:
#Sliding Window Function 
def sliding_window(arr,N):
    arr = np.array(arr)
    i = 0
    new_arr = []
    while(i+N<= len(arr)):
        new_arr.append(arr[i:i+N])
        i+=1
    return np.array(new_arr,dtype = 'float')

In [None]:
#Helper Funtion to get Column Names
def getColNamesforValue(col,N):
    return [(col+str(i)) for i in range(0,N)]

In [None]:
# 24 month Slinding Window Model
N=25
comb_df_sw = pd.DataFrame()
comb_df_sw[getColNamesforValue('value_1_',N)] = sliding_window(comb_df['value_1'],N)
comb_df_sw[getColNamesforValue('value_2_',N)] = sliding_window(comb_df['value_2'],N)
comb_df_sw = comb_df_sw.copy(deep=True)
comb_df_sw[getColNamesforValue('value_3_',N)] = sliding_window(comb_df['value_3'],N)
comb_df_sw[getColNamesforValue('value_5_',N)] = sliding_window(comb_df['value_5'],N)
comb_df_sw[getColNamesforValue('value_7_',N)] = sliding_window(comb_df['value_7'],N)
comb_df_sw[getColNamesforValue('value_10_',N)] = sliding_window(comb_df['value_10'],N)
comb_df_sw = comb_df_sw.copy(deep=True)

In [None]:
comb_df_sw

In [None]:
#comb_df_sw['date'] = comb_df.iloc[0:len(comb_df.index)+1-N]['date']

In [None]:
comb_df_sw

In [None]:
comb_df_sw['value_1_24']

In [None]:
#Preparing X and Y 
X_cols = []
Y_cols = []
for a in comb_df_sw.columns:
    if '24' in a:
        Y_cols.append(a)
    else:
        X_cols.append(a)

comb_df_sw_X = comb_df_sw[X_cols]
comb_df_sw_X

In [None]:
comb_df_sw_Y = comb_df_sw[Y_cols]
comb_df_sw_Y

In [None]:
k_l = int(len(comb_df_sw_X.index)*0.7)
k_v = int(len(comb_df_sw_X.index)*0.8)

comb_df_sw_TrainingX = comb_df_sw_X.iloc[0:k_l,:].copy(deep=True)
comb_df_sw_TrainingY = comb_df_sw_Y.iloc[0:k_l,:].copy(deep=True)
comb_df_sw_ValX = comb_df_sw_X.iloc[k_l:k_v,:].copy(deep=True)
comb_df_sw_ValY = comb_df_sw_Y.iloc[k_l:k_v,:].copy(deep=True)
comb_df_sw_TestX = comb_df_sw_X.iloc[k_v:,:].copy(deep=True)
comb_df_sw_TestY = comb_df_sw_X.iloc[k_v:,:].copy(deep=True)

In [None]:

from sklearn.metrics import mean_squared_error

In [None]:
#Helper Funtion to calculate mean_sq
def mean_sq_calculate(df_1,df_2):
    mean_sq = []
    mean_sq.append(mean_squared_error(df_1['value_1'],df_2['value_1_24']))
    mean_sq.append(mean_squared_error(df_1['value_2'],df_2['value_2_24']))
    mean_sq.append(mean_squared_error(df_1['value_3'],df_2['value_3_24']))
    mean_sq.append(mean_squared_error(df_1['value_5'],df_2['value_5_24']))
    mean_sq.append(mean_squared_error(df_1['value_7'],df_2['value_7_24']))
    mean_sq.append(mean_squared_error(df_1['value_10'],df_2['value_10_24']))
    return mean_sq
    

In [None]:
i = 0

mean_sq=[]
score = []
X_train = comb_df_sw_TrainingX
Y_train = comb_df_sw_TrainingY
X_ho = comb_df_sw_ValX
Y_ho = comb_df_sw_ValY


#Average of Sliding Window for that variable
avg_pred = pd.DataFrame()
avg_pred['value_1'] = X_ho.loc[:,'value_1_0':'value_1_23'].mean(axis=1)
avg_pred['value_2'] = X_ho.loc[:,'value_2_0':'value_2_23'].mean(axis=1)
avg_pred['value_3'] = X_ho.loc[:,'value_3_0':'value_3_23'].mean(axis=1)
avg_pred['value_5'] = X_ho.loc[:,'value_5_0':'value_5_23'].mean(axis=1)
avg_pred['value_7'] = X_ho.loc[:,'value_7_0':'value_7_23'].mean(axis=1)
avg_pred['value_10'] = X_ho.loc[:,'value_10_0':'value_10_23'].mean(axis=1)


avg_mean_sq = [i,'AVG']+mean_sq_calculate(avg_pred,Y_ho)
mean_sq.append(avg_mean_sq)

#Linear Regression
from sklearn.linear_model import LinearRegression
lin_reg = pd.DataFrame()

LinearRegression_1 = LinearRegression()
LinearRegression_1.fit(X_train,Y_train['value_1_24'])    

lin_reg['value_1'] = LinearRegression_1.predict(X_ho)

LinearRegression_2 = LinearRegression()
LinearRegression_2.fit(X_train,Y_train['value_2_24'])    

lin_reg['value_2'] = LinearRegression_2.predict(X_ho)

LinearRegression_3 = LinearRegression()
LinearRegression_3.fit(X_train,Y_train['value_3_24'])    

lin_reg['value_3'] = LinearRegression_3.predict(X_ho)

LinearRegression_5 = LinearRegression()
LinearRegression_5.fit(X_train,Y_train['value_5_24'])    

lin_reg['value_5'] = LinearRegression_5.predict(X_ho)

LinearRegression_7 = LinearRegression()
LinearRegression_7.fit(X_train,Y_train['value_7_24'])    

lin_reg['value_7'] = LinearRegression_7.predict(X_ho)

LinearRegression_10 = LinearRegression()
LinearRegression_10.fit(X_train,Y_train['value_10_24'])    

lin_reg['value_10'] = LinearRegression_10.predict(X_ho)


linreg_mean_sq = [i,'LinReg']+mean_sq_calculate(lin_reg,Y_ho)
mean_sq.append(linreg_mean_sq)

s = []
s.append(LinearRegression_1.score(X_ho,Y_ho['value_1_24']))
s.append(LinearRegression_2.score(X_ho,Y_ho['value_2_24']))
s.append(LinearRegression_3.score(X_ho,Y_ho['value_3_24']))
s.append(LinearRegression_5.score(X_ho,Y_ho['value_5_24']))
s.append(LinearRegression_7.score(X_ho,Y_ho['value_7_24']))
s.append(LinearRegression_10.score(X_ho,Y_ho['value_10_24']))

score.append(s)

i+=1



In [None]:
#This table compares Taking average vs Linear Regression on Sliding Window to Predict the next month's values. 
mean_sq = pd.DataFrame(mean_sq,columns=['N','Model','Value_1','Value_2','Value_3','Value_5','Value_7','Value_10'])
mean_sq

In [None]:
score = pd.DataFrame(score,columns=['Value_1','Value_2','Value_3','Value_5','Value_7','Value_10'])
score

In [None]:
#Using Lasso on df_1
#l1 = int(len(comb_df_sw_X)*0.7)
Lasso_X_train = comb_df_sw_X[0:k_l].copy(deep=True)
Lasso_X_test = comb_df_sw_X[k_l:k_v].copy(deep=True)
Lasso_Y_train = comb_df_sw_Y[0:k_l].copy(deep=True)
Lasso_Y_test = comb_df_sw_Y[k_l:k_v].copy(deep=True)

In [None]:
from sklearn.linear_model import Lasso
Lasso_model_1 = Lasso()
Lasso_model_1.fit(Lasso_X_train,Lasso_Y_train['value_1_24'])

In [None]:
Lasso_model_1.score(Lasso_X_test,Lasso_Y_test['value_1_24'].values,sample_weight=None)

In [None]:
coeff_DF_1 = pd.DataFrame([Lasso_X_train.columns,Lasso_model_1.coef_])
coeff_DF_1.T.sort_values(1,ascending = False).head(10)

In [None]:
#Using Lasso on df_2
Lasso_model_2 = Lasso()
Lasso_model_2.fit(Lasso_X_train,Lasso_Y_train['value_2_24'])

print(Lasso_model_2.score(Lasso_X_test,Lasso_Y_test['value_2_24'].values,sample_weight=None))

coeff_DF_2 = pd.DataFrame([Lasso_X_train.columns,Lasso_model_2.coef_])
coeff_DF_2.T.sort_values(1,ascending = False).head(10)

In [None]:
#Lasso on df_3
Lasso_model_3 = Lasso()
Lasso_model_3.fit(Lasso_X_train,Lasso_Y_train['value_3_24'])

print(Lasso_model_3.score(Lasso_X_test,Lasso_Y_test['value_3_24'].values,sample_weight=None))

coeff_DF_3 = pd.DataFrame([Lasso_X_train.columns,Lasso_model_3.coef_])
coeff_DF_3.T.sort_values(1,ascending = False).head(10)

In [None]:
#Lasso on df_5
Lasso_model_5 = Lasso()
Lasso_model_5.fit(Lasso_X_train,Lasso_Y_train['value_5_24'])

print(Lasso_model_5.score(Lasso_X_test,Lasso_Y_test['value_5_24'].values,sample_weight=None))

coeff_DF_5 = pd.DataFrame([Lasso_X_train.columns,Lasso_model_5.coef_])
coeff_DF_5.T.sort_values(1,ascending = False).head(10)

In [None]:
#Lasso on df_7
Lasso_model_7 = Lasso()
Lasso_model_7.fit(Lasso_X_train,Lasso_Y_train['value_7_24'])

print(Lasso_model_7.score(Lasso_X_test,Lasso_Y_test['value_7_24'].values,sample_weight=None))

coeff_DF_7 = pd.DataFrame([Lasso_X_train.columns,Lasso_model_7.coef_])
coeff_DF_7.T.sort_values(1,ascending = False).head(10)

In [None]:
#Lasso on df_10
Lasso_model_10 = Lasso()
Lasso_model_10.fit(Lasso_X_train,Lasso_Y_train['value_10_24'])

print(Lasso_model_10.score(Lasso_X_test,Lasso_Y_test['value_10_24'].values,sample_weight=None))

coeff_DF_10 = pd.DataFrame([Lasso_X_train.columns,Lasso_model_10.coef_])
coeff_DF_10.T.sort_values(1,ascending = False).head(10)

In [None]:
#Using Linear Regression on only the last month's entries since Lasso tells us that's only what matters.
i = 0

mean_sq=[]
score_1 = []

X_train = comb_df_sw_TrainingX
Y_train = comb_df_sw_TrainingY
X_ho = comb_df_sw_ValX
Y_ho = comb_df_sw_ValY



#Average of Sliding Window for that variable
avg_pred = pd.DataFrame()
avg_pred['value_1'] = X_ho.loc[:,'value_1_0':'value_1_23'].mean(axis=1)
avg_pred['value_2'] = X_ho.loc[:,'value_2_0':'value_2_23'].mean(axis=1)
avg_pred['value_3'] = X_ho.loc[:,'value_3_0':'value_3_23'].mean(axis=1)
avg_pred['value_5'] = X_ho.loc[:,'value_5_0':'value_5_23'].mean(axis=1)
avg_pred['value_7'] = X_ho.loc[:,'value_7_0':'value_7_23'].mean(axis=1)
avg_pred['value_10'] = X_ho.loc[:,'value_10_0':'value_10_23'].mean(axis=1)


avg_mean_sq = [i,'AVG']+mean_sq_calculate(avg_pred,Y_ho)
mean_sq.append(avg_mean_sq)

#Linear Regression
from sklearn.linear_model import LinearRegression
lin_reg = pd.DataFrame()

LinearRegression_1 = LinearRegression()
X_train_1 = X_train.loc[:,['value_1_23','value_2_23']]
LinearRegression_1.fit(X_train_1,Y_train['value_1_24'])    

lin_reg['value_1'] = LinearRegression_1.predict(X_ho.loc[:,['value_1_23','value_2_23']])

LinearRegression_2 = LinearRegression()
LinearRegression_2.fit(X_train.loc[:,['value_1_23','value_2_23']],Y_train['value_2_24'])    

lin_reg['value_2'] = LinearRegression_2.predict(X_ho.loc[:,['value_1_23','value_2_23']])

LinearRegression_3 = LinearRegression()
LinearRegression_3.fit(X_train.loc[:,['value_2_23','value_3_23']],Y_train['value_3_24'])    

lin_reg['value_3'] = LinearRegression_3.predict(X_ho.loc[:,['value_2_23','value_3_23']])

LinearRegression_5 = LinearRegression()
LinearRegression_5.fit(X_train.loc[:,['value_3_23','value_5_23']],Y_train['value_5_24'])    

lin_reg['value_5'] = LinearRegression_5.predict(X_ho.loc[:,['value_3_23','value_5_23']])

LinearRegression_7 = LinearRegression()
LinearRegression_7.fit(X_train.loc[:,['value_3_23','value_5_23','value_7_23']],Y_train['value_7_24'])    

lin_reg['value_7'] = LinearRegression_7.predict(X_ho.loc[:,['value_3_23','value_5_23','value_7_23']])

LinearRegression_10 = LinearRegression()
LinearRegression_10.fit(X_train.loc[:,['value_5_23','value_7_23','value_10_23']],Y_train['value_10_24'])    

lin_reg['value_10'] = LinearRegression_10.predict(X_ho.loc[:,['value_5_23','value_7_23','value_10_23']])

linreg_mean_sq = [i,'LinReg']+mean_sq_calculate(lin_reg,Y_ho)
mean_sq.append(linreg_mean_sq)

s = []
s.append(LinearRegression_1.score(X_ho.loc[:,['value_1_23','value_2_23']],Y_ho['value_1_24']))
s.append(LinearRegression_2.score(X_ho.loc[:,['value_1_23','value_2_23']],Y_ho['value_2_24']))
s.append(LinearRegression_3.score(X_ho.loc[:,['value_2_23','value_3_23']],Y_ho['value_3_24']))
s.append(LinearRegression_5.score(X_ho.loc[:,['value_3_23','value_5_23']],Y_ho['value_5_24']))
s.append(LinearRegression_7.score(X_ho.loc[:,['value_3_23','value_5_23','value_7_23']],Y_ho['value_7_24']))
s.append(LinearRegression_10.score(X_ho.loc[:,['value_5_23','value_7_23','value_10_23']],Y_ho['value_10_24']))

score_1.append(s)

i+=1



In [None]:
#This table compares Taking average vs Linear Regression on Sliding Window to Predict the next month's values. 
mean_sq = pd.DataFrame(mean_sq,columns=['N','Model','Value_1','Value_2','Value_3','Value_5','Value_7','Value_10'])
mean_sq

In [None]:
score_1 = pd.DataFrame(score_1,columns=['Value_1','Value_2','Value_3','Value_5','Value_7','Value_10'])
score_1

In [None]:
model_2_score = score_1.mean(axis=0)
model_1_score = score.mean(axis=0)
comp_model_scores = pd.concat([model_1_score,model_2_score],axis=1)
comp_model_scores['diff'] = comp_model_scores[0]-comp_model_scores[1]
comp_model_scores.rename(columns={0:'Model1'}, inplace=True)
comp_model_scores.rename(columns={1:'Model2'}, inplace=True)
comp_model_scores.rename(columns={'diff':'Model1-Model2'}, inplace=True)

In [None]:
#Arima Models

In [None]:
#Changing DataTypes
comb_df['date'] = pd.to_datetime(comb_df['date'],format='%Y-%m-%d')

for i in [1,2,3,5,7,10]:
    s = 'value_'+ str(i)
    comb_df[s] = comb_df[s].astype(float)


In [None]:
from matplotlib import pyplot
comb_df.set_index('date',inplace=True)
comb_df.plot()
pyplot.show()

In [None]:
pd.plotting.autocorrelation_plot(comb_df)

In [None]:
comb_df

In [None]:
from statsmodels.tsa.arima.model import ARIMA
from math import sqrt
df_1_train = df_1.iloc[:k_l].copy()
df_1_test = df_1.iloc[k_l:k_v].copy()
df_1['date'] = pd.to_datetime(df_1['date'],format='%Y-%m-%d')
df_1['value_1'] = df_1['value_1'].astype(float)
df_1_train = df_1_train.dropna().copy()

In [None]:
history = [float(x) for x in df_1_train.value_1]
predictions = list()
for t in range(len(df_1_test.index)):
    model = ARIMA(history, order=(2,1,1))
    model_fit = model.fit()
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = df_1_test.iat[t,1]
    history.append(obs)
    #print('predicted=%f, expected=%f' % (yhat, obs))
    print(f'predicted={str(yhat)}, expected={str(obs)}')


In [None]:
rmse_1 = mean_squared_error(df_1_test.value_1, predictions,squared=False)
Arima_scores = []
from sklearn.metrics import r2_score
r2_1 = r2_score(df_1_test.value_1, predictions)
Arima_scores.append(r2_1)
print('Test RMSE: %.3f' % rmse_1)
print(r2_1)
# plot forecasts against actual outcomes
pyplot.plot(list(df_1_test.value_1))
pyplot.plot(predictions, color='red')
pyplot.show()

In [None]:
comp_model_scores.rename(columns={0:'Model1',1:'Model2','diff':'Model1 - Model2'})

In [None]:
l2 = int(len(df_2)*0.8)
df_2['date'] = pd.to_datetime(df_2['date'],format='%Y-%m-%d')
df_2['value_2'] = df_2['value_2'].astype(float)
df_2_train = df_2.iloc[:l2].copy()
df_2_test = df_2.iloc[l2:].copy()
df_2_train = df_2_train.dropna().copy()

In [None]:
history_2 = [float(x) for x in df_2_train.value_2]
predictions_2 = list()
for t in range(len(df_2_test)):
    model_2 = ARIMA(history_2, order=(2,1,1))
    model_2_fit = model_2.fit()
    output = model_2_fit.forecast()
    yhat = output[0]
    predictions_2.append(yhat)
    obs = df_2_test.iat[t,1]
    history_2.append(float(obs))
    #print('predicted=%f, expected=%f' % (yhat, obs))
    print(f'predicted={str(yhat)}, expected={str(obs)}')


In [None]:
# evaluate forecasts
rmse_2 = mean_squared_error(df_2_test.value_2, predictions_2,squared=False)
r2_2 = r2_score(df_2_test.value_2, predictions_2)
Arima_scores.append(r2_2)
print('Test RMSE: %.3f' % rmse_2)
print(r2_2)
# plot forecasts against actual outcomes
pyplot.plot(list(df_2_test.value_2.astype(float)))
pyplot.plot(list(predictions_2), color='red')
pyplot.show()

In [None]:
l3 = int(len(df_3)*0.8)
df_3['date'] = pd.to_datetime(df_3['date'],format='%Y-%m-%d')
df_3['value_2'] = df_3['value_3'].astype(float)
df_3_train = df_3.iloc[:l3].copy()
df_3_test = df_3.iloc[l3:].copy()
df_3_train = df_3_train.dropna().copy()

In [None]:
history_3 = [float(x) for x in df_3_train.value_3]
predictions_3 = list()
for t in range(len(df_3_test)):
    model_3 = ARIMA(history_3, order=(2,1,1))
    model_3_fit = model_3.fit()
    output = model_3_fit.forecast()
    yhat = output[0]
    predictions_3.append(yhat)
    obs = df_3_test.iat[t,1]
    history_3.append(float(obs))
    #print('predicted=%f, expected=%f' % (yhat, obs))
    print(f'predicted={str(yhat)}, expected={str(obs)}')


In [None]:
# evaluate forecasts
rmse_3 = mean_squared_error(df_3_test.value_3, predictions_3,squared=False)
r2_3 = r2_score(df_3_test.value_3, predictions_3)
print('Test RMSE: %.3f' % rmse_3)
print(r2_3)
Arima_scores.append(r2_3)
# plot forecasts against actual outcomes
pyplot.plot(list(df_3_test.value_3.astype(float)))
pyplot.plot(list(predictions_3), color='red')
pyplot.show()

In [None]:
l5 = int(len(df_5)*0.8)
df_5['date'] = pd.to_datetime(df_5['date'],format='%Y-%m-%d')
df_5['value_2'] = df_5['value_5'].astype(float)
df_5_train = df_5.iloc[:l5].copy()
df_5_test = df_5.iloc[l5:].copy()
df_5_train = df_5_train.dropna().copy()

In [None]:
history_5 = [float(x) for x in df_5_train.value_5]
predictions_5 = list()
for t in range(len(df_5_test)):
    model_5 = ARIMA(history_5, order=(2,1,1))
    model_5_fit = model_5.fit()
    output = model_5_fit.forecast()
    yhat = output[0]
    predictions_5.append(yhat)
    obs = df_5_test.iat[t,1]
    history_5.append(float(obs))
    #print('predicted=%f, expected=%f' % (yhat, obs))
    print(f'predicted={str(yhat)}, expected={str(obs)}')


In [None]:
# evaluate forecasts
rmse_5 = mean_squared_error(df_5_test.value_5, predictions_5,squared=False)
r2_5 = r2_score(df_5_test.value_5, predictions_5)
print('Test RMSE: %.3f' % rmse_5)
print(r2_5)
Arima_scores.append(r2_5)
# plot forecasts against actual outcomes
pyplot.plot(list(df_5_test.value_5.astype(float)))
pyplot.plot(list(predictions_5), color='red')
pyplot.show()

In [None]:
l7 = int(len(df_7)*0.8)
df_7['date'] = pd.to_datetime(df_7['date'],format='%Y-%m-%d')
df_7['value_2'] = df_7['value_7'].astype(float)
df_7_train = df_7.iloc[:l7].copy()
df_7_test = df_7.iloc[l7:].copy()
df_7_train = df_7_train.dropna().copy()

In [None]:
history_7 = [float(x) for x in df_7_train.value_7]
predictions_7 = list()
for t in range(len(df_7_test)):
    model_7 = ARIMA(history_7, order=(2,1,1))
    model_7_fit = model_7.fit()
    output = model_7_fit.forecast()
    yhat = output[0]
    predictions_7.append(yhat)
    obs = df_7_test.iat[t,1]
    history_7.append(float(obs))
    #print('predicted=%f, expected=%f' % (yhat, obs))
    print(f'predicted={str(yhat)}, expected={str(obs)}')


In [None]:
# evaluate forecasts
rmse_7 = mean_squared_error(df_7_test.value_7, predictions_7,squared=False)
r2_7 = r2_score(df_7_test.value_7, predictions_7)
print('Test RMSE: %.3f' % rmse_7)
print(r2_7)
Arima_scores.append(r2_7)
# plot forecasts against actual outcomes
pyplot.plot(list(df_7_test.value_7.astype(float)))
pyplot.plot(list(predictions_7), color='red')
pyplot.show()

In [None]:
l10 = int(len(df_10)*0.8)
df_10['date'] = pd.to_datetime(df_10['date'],format='%Y-%m-%d')
df_10['value_10'] = df_10['value_10'].astype(float)
df_10_train = df_10.iloc[:l10].copy()
df_10_test = df_10.iloc[l10:].copy()
df_10_train = df_10_train.dropna().copy()

In [None]:
history_10 = [float(x) for x in df_10_train.value_10]
predictions_10 = list()
for t in range(len(df_10_test)):
    model_10 = ARIMA(history_10, order=(2,1,1))
    model_10_fit = model_10.fit()
    output = model_10_fit.forecast()
    yhat = output[0]
    predictions_10.append(yhat)
    obs = df_10_test.iat[t,1]
    history_10.append(obs)
    #print('predicted=%f, expected=%f' % (yhat, obs))
    print(f'predicted={str(yhat)}, expected={str(obs)}')


In [None]:
# evaluate forecasts
rmse_10 = mean_squared_error(df_10_test.value_10, predictions_10,squared=False)
r2_10 = r2_score(df_10_test.value_10, predictions_10)
print('Test RMSE: %.3f' % rmse_7)
print(r2_7)
Arima_scores.append(r2_10)
# plot forecasts against actual outcomes
pyplot.plot(list(df_10_test.value_10.astype(float)))
pyplot.plot(list(predictions_10), color='red')
pyplot.show()

In [None]:
comp_model_scores['Arima'] = Arima_scores
comp_model_scores['Model2-Arima'] = comp_model_scores['Model2']-comp_model_scores['Arima']
comp_model_scores