In [48]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import pmdarima as pm
from pmdarima.utils import diff
from sklearn.metrics import mean_squared_error
import pickle
import sklearn
import statsmodels.tsa.statespace as sm
from sklearn.preprocessing import StandardScaler

In [49]:
df = pd.read_csv('./../data/full_dataset_unscaled.csv')
df['zip'] = df['zip'].map(lambda x: str(x))
df['zip'] = df['zip'].map(lambda x: '0' + x if len(x)<5 else x)
df['datetime']=df['datetime'].map(lambda x: datetime.strptime(x, '%Y-%m-%d'))
df = df.set_index('datetime')

In [50]:
dropcols = ['percent_other_race','has_bike_sharing','bs_total_systems','percent_40_64','percent_0_17','percent_18_39',
       'percent_65+','percent_rental_units_vacant','percent_not_us_citizen','percent_less_highschool', 'percent_buildings_less_10_units',
 'percent_commute_30_to_59',
 'percent_commute_60_to_89',
 'percent_commute_90_more', 'percent_commute_less_30','percent_graduate_deg', 'percent_female','gini_index','percent_hispanic','percent_black','percent_bachelors','percent_asian','percent_new_city','percent_new_unit']

In [51]:
df = df.drop(dropcols, axis = 1)

In [52]:
df = df[df['year']>=2015]

In [53]:
acs_cols = set([
    'percent_white',
    'percent_black',
    'percent_asian',
    'percent_hispanic',
    'percent_native_am',
    'percent_other_race',
    'percent_0_17',
    'percent_18_39',
    'percent_40_64',
    'percent_65+',
    'percent_rental_units_vacant',
    'percent_rental_units_occupied',
    'percent_graduate_deg',
    'percent_bachelors',
    'percent_associates',
    'percent_highschool',
    'percent_less_highschool',
    'percent_commute_public_transport',
    'percent_commute_less_30',
    'percent_buildings_less_10_units',
    'percent_buildings_10_19_units',
    'percent_buildings_20_49_units',
    'percent_buildings_50+_units',
    'percent_commute_30_to_59',
    'percent_commute_60_to_89',
    'percent_commute_90_more',
    'percent_new_city',
    'percent_new_unit',
    'percent_units_owner_occupied',
    'median_building_age',
    'income_per_capita',
    'poverty_rate',
    'total_pop',
    'percent_workforce_unemployed',
    'percent_work_from_home',
    'median_age',
    'percent_female',
    'gini_index',
    'percent_not_us_citizen'])
bikeshare_cols = set([
    'bs_total_stations',
    'bs_total_systems',
    'has_bike_sharing'])
trends_cols = set([
    'gun range',
    'gun control',
    'gun violence',
    'job opportunities',
    'unemployment',
    'retirement',
    'layoff',
    'lgbt',
    'same sex marriage',
    'they',
    'pronouns',
    'black lives matter',
    'political correctness',
    'make america great again',
    'euthanasia',
    'getaway',
    'places to go',
    'flight tickets',
    'twitter',
    'hashtag',
    'fake news',
    'hurricane',
    'wildfire',
    'flood',
    'fire',
    "trader joe's",
    'whole foods',
    'lululemon',
    'thrift',
    'condos for rent',
    'duplex apartments for rent',
    'townhomes for rent',
    'townhouses for rent',
    'home for rent',
    'house for rent',
    'townhome for rent',
    'townhouse for rent',
    'apartment for rent',
    'studio for rent',
    '1 bedroom for rent',
    '3 bedroom for rent',
    'starbucks'])
bds_cols = set([
    'total_firms',
    'job_creation_rate',
    'job_destruction_rate',
    'startup_firms'])
tax_cols = ('state_local_perc')

In [54]:
acs_cols = acs_cols - set(dropcols)
bds_cols = bds_cols -set(dropcols)
trends_cols = trends_cols - set(dropcols)
bikeshare_cols = bikeshare_cols - set(dropcols)

In [55]:
def plot_arima(zip_code, pred_dict):
    fig = go.Figure()
    fig.add_trace(go.Line(x = y[y['zip']==zip_code].index, y = y[y['zip']==zip_code]['zri'], mode = 'lines'))
    fig.add_trace(go.Line(x = pred_dict[zip_code].index, y = pred_dict[zip_code], mode = 'lines'))
    fig.add_vline(x=y[y['zip']==zip_code].index[48], line_width=3, line_dash="dash", line_color="green")
    fig.show()

In [56]:
zips = list(df['zip'].unique())

y = df.drop(columns = ['year', 'month', 'City', 'State', 'Metro', 'CountyName']).loc[:,['zip', 'zri']].pivot(columns = 'zip')
x = df.drop(columns = ['year', 'month', 'City', 'State', 'Metro', 'CountyName', 'zri']).pivot(columns = 'zip').sort_index(axis = 1, level = 1)

In [57]:
actual_df = df[df['year']==2019].loc[:,['zip', 'zri']]

In [58]:
def calc_resids(pred_dict, actual_df):
    df = pd.DataFrame(pred_dict)
    df = df.iloc[47:,:].T.reset_index(drop=False).rename(columns = {'index':'zip'})
    df = df.melt(id_vars = 'zip', var_name = 'datetime', value_name = 'zri').sort_values(['zip', 'datetime'], ascending = True)
    df.set_index('datetime', inplace = True)
    resid_df = df.copy()
    resid_df['zri'] = resid_df['zri']-actual_df['zri']
    rmse = np.sqrt(((resid_df['zri']**2).sum().sum())/(1301*12))
    r2 = (df['zri'].corr(actual_df['zri']))**2
    return resid_df, rmse, r2

# ARIMA with no exogenous features

In [None]:
pred_dict_none = {}
coef_dict_none = {}

In [63]:
y.shape

(60, 1301)

In [64]:
y.T

Unnamed: 0_level_0,datetime,2015-01-01,2015-02-01,2015-03-01,2015-04-01,2015-05-01,2015-06-01,2015-07-01,2015-08-01,2015-09-01,2015-10-01,...,2019-03-01,2019-04-01,2019-05-01,2019-06-01,2019-07-01,2019-08-01,2019-09-01,2019-10-01,2019-11-01,2019-12-01
Unnamed: 0_level_1,zip,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
zri,01013,946.0,950.0,958.0,958.0,955.0,950.0,948.0,949.0,964.0,974.0,...,1106.0,1114.0,1116.0,1128.0,1138.0,1148.0,1155.0,1175.0,1161.0,1155.0
zri,01020,975.0,979.0,983.0,985.0,985.0,978.0,976.0,976.0,984.0,992.0,...,1131.0,1136.0,1137.0,1151.0,1167.0,1179.0,1188.0,1213.0,1209.0,1205.0
zri,01040,1005.0,1007.0,987.0,967.0,946.0,938.0,943.0,943.0,952.0,964.0,...,1129.0,1127.0,1123.0,1131.0,1137.0,1145.0,1149.0,1164.0,1136.0,1123.0
zri,01085,1017.0,1014.0,1003.0,987.0,970.0,961.0,968.0,990.0,1015.0,1032.0,...,1186.0,1185.0,1192.0,1199.0,1202.0,1201.0,1201.0,1192.0,1191.5,1191.0
zri,01104,958.0,968.0,960.0,949.0,940.0,941.0,955.0,969.0,985.0,1003.0,...,1125.0,1129.0,1130.0,1135.0,1141.0,1145.0,1154.0,1178.0,1188.0,1193.0
zri,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zri,99207,775.0,783.0,789.0,792.0,801.0,808.0,801.0,800.0,805.0,796.0,...,1058.0,1078.0,1091.0,1098.0,1102.0,1101.0,1096.0,1079.0,1100.0,1121.0
zri,99501,1364.0,1348.0,1327.0,1308.0,1289.0,1277.0,1271.0,1274.0,1282.0,1288.0,...,1225.0,1219.0,1222.0,1226.0,1230.0,1236.0,1246.0,1280.0,1285.0,1290.0
zri,99504,1569.0,1564.0,1553.0,1529.0,1502.0,1511.0,1514.0,1521.0,1524.0,1524.0,...,1416.0,1419.0,1423.0,1423.0,1421.0,1416.0,1417.0,1430.0,1446.0,1424.0
zri,99508,1386.0,1368.0,1350.0,1336.0,1321.0,1316.0,1304.0,1306.0,1314.0,1325.0,...,1255.0,1258.0,1265.0,1271.0,1274.0,1279.0,1282.0,1290.0,1289.0,1286.0


Unnamed: 0_level_0,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri,zri
zip,01013,01020,01040,01085,01104,01108,01109,01420,01440,01453,...,98409,98444,98466,98498,98499,99207,99501,99504,99508,99654
datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2015-01-01,946.0,975.0,1005.0,1017.0,958.0,1026.0,997.0,1037.0,1058.0,1079.0,...,941.0,904.0,1001.0,1046.0,983.0,775.0,1364.0,1569.0,1386.0,1253.0
2015-02-01,950.0,979.0,1007.0,1014.0,968.0,1028.0,1006.0,1054.0,1062.0,1087.0,...,944.0,905.0,1006.0,1046.0,974.0,783.0,1348.0,1564.0,1368.0,1270.0
2015-03-01,958.0,983.0,987.0,1003.0,960.0,1012.0,990.0,1053.0,1061.0,1086.0,...,950.0,908.0,1009.0,1048.0,968.0,789.0,1327.0,1553.0,1350.0,1253.0
2015-04-01,958.0,985.0,967.0,987.0,949.0,992.0,970.0,1059.0,1064.0,1081.0,...,952.0,913.0,1010.0,1068.0,973.0,792.0,1308.0,1529.0,1336.0,1245.0
2015-05-01,955.0,985.0,946.0,970.0,940.0,974.0,954.0,1070.0,1076.0,1086.0,...,957.0,916.0,1020.0,1092.0,980.0,801.0,1289.0,1502.0,1321.0,1247.0
2015-06-01,950.0,978.0,938.0,961.0,941.0,969.0,952.0,1080.0,1094.0,1097.0,...,973.0,923.0,1048.0,1113.0,994.0,808.0,1277.0,1511.0,1316.0,1232.0
2015-07-01,948.0,976.0,943.0,968.0,955.0,983.0,966.0,1088.0,1104.0,1110.0,...,987.0,931.0,1074.0,1134.0,1003.0,801.0,1271.0,1514.0,1304.0,1221.0
2015-08-01,949.0,976.0,943.0,990.0,969.0,992.0,980.0,1091.0,1101.0,1121.0,...,1001.0,937.0,1091.0,1146.0,1015.0,800.0,1274.0,1521.0,1306.0,1199.0
2015-09-01,964.0,984.0,952.0,1015.0,985.0,1011.0,998.0,1088.0,1091.0,1122.0,...,999.0,951.0,1099.0,1130.0,1020.0,805.0,1282.0,1524.0,1314.0,1188.0
2015-10-01,974.0,992.0,964.0,1032.0,1003.0,1032.0,1018.0,1093.0,1080.0,1130.0,...,987.0,953.0,1098.0,1138.0,1024.0,796.0,1288.0,1524.0,1325.0,1194.0


In [72]:
curr_model = sm.varmax.VARMAX(diff(y).T, order = (1,0)).fit()

KeyboardInterrupt: 

In [None]:
curr_model

In [None]:
start = datetime.now()
for item in zips:
    curr_y = y[y['zip']==item]
    curr_x = x[x['zip']==item].drop(columns = 'zip')
    curr_y_train = curr_y.iloc[0:48,:]
    curr_x_train = curr_x.iloc[0:48,:]
    
    
    curr_model = sm.varmax.VARMAX(curr_y_train['zri'], exog = None, order = (1,1,0), seasonal_order=(0, 0, 0, 0)).fit()
    
    pred_dict_none[item] = curr_model.predict(start = 1, end = 59, exog = None, dynamic = 47)
    coef_dict_none[item] = curr_model.params

elapsed = datetime.now()-start
print(elapsed)

In [None]:
plot_arima('10128', pred_dict_none)

In [None]:
resid_none, rmse_none, r2_none = calc_resids(pred_dict_none, actual_df)

In [None]:
print(rmse_none, r2_none)

In [None]:
resid_none.to_csv('./arima_resids_no_exog.csv', index = True)

In [None]:
coef_none = pd.DataFrame(coef_dict_none)

In [None]:
coef_avg = pd.DataFrame(coef_none.sum(axis=1)/1301) # average coefficients

In [None]:
coef_std = np.std(coef_none.T)

In [None]:
coef_none = pd.concat([coef_avg, coef_std], axis = 1)

In [None]:
coef_none.columns = ['coef', 'std']

In [None]:
coef_none

In [None]:
coef_none.to_csv('./simple_arima_coefs.csv', index = True)

# Bikeshare data only

In [None]:
pred_dict_bike = {}
coef_dict_bike = {}

In [None]:
x_bike = x.loc[:,['zip']+list(bikeshare_cols)]

In [None]:
start = datetime.now()
for item in zips:
    curr_y = y[y['zip']==item]
    curr_x = x_bike[x_bike['zip']==item].drop(columns = 'zip')
    curr_y_train = curr_y.iloc[0:48,:]
    curr_x_train = curr_x.iloc[0:48,:]
    curr_x_test = curr_x.iloc[48:,:]
    
    curr_model = sm.sarimax.SARIMAX(curr_y_train['zri'], exog = curr_x_train, order = (1,1,0), seasonal_order=(0, 0, 0, 0)).fit()
    
    pred_dict_bike[item] = curr_model.predict(start = 1, end=59, exog = curr_x_test, dynamic = 47)
    coef_dict_bike[item] = curr_model.params

elapsed = datetime.now()-start
print(elapsed)

In [None]:
plot_arima('10128', pred_dict_bike)

In [None]:
resid_bike, rmse_bike, r2_bike = calc_resids(pred_dict_bike, actual_df)

In [None]:
print(rmse_bike, r2_bike)

In [None]:
resid_bike.to_csv('./sarimax_resids_bike.csv', index = True)

In [None]:
coef_bike = pd.DataFrame(coef_dict_bike)

In [None]:
coef_avg = pd.DataFrame(coef_bike.sum(axis=1)/1301) # average coefficients

In [None]:
coef_std = np.std(coef_bike.T)

In [None]:
coef_bike = pd.concat([coef_avg, coef_std], axis = 1)

In [None]:
coef_bike.columns = ['coef', 'std']

In [None]:
coef_bike

In [None]:
coef_bike.to_csv('./sarimax_coef_bike.csv', index = True)

# ACS only

In [None]:
pred_dict_acs = {}
coef_dict_acs = {}

In [None]:
x_acs = x.loc[:,['zip']+list(acs_cols)]

In [None]:
start = datetime.now()
for item in zips:
    curr_y = y[y['zip']==item]
    curr_x = x_acs[x_acs['zip']==item].drop(columns = 'zip')
    curr_y_train = curr_y.iloc[0:48,:]
    curr_x_train = curr_x.iloc[0:48,:]
    curr_x_test = curr_x.iloc[48:,:]
    
    curr_model = sm.sarimax.SARIMAX(curr_y_train['zri'], exog = curr_x_train, order = (1,1,0), seasonal_order=(0, 0, 0, 0)).fit()
    
    pred_dict_acs[item] = curr_model.predict(start = 1, end = 59, exog = curr_x_test, dynamic = 47)
    coef_dict_acs[item] = curr_model.params

elapsed = datetime.now()-start
print(elapsed)

In [None]:
plot_arima('01013', pred_dict_acs)

In [None]:
resid_acs, rmse_acs, r2_acs = calc_resids(pred_dict_acs, actual_df)
print(rmse_acs, r2_acs)
resid_acs.to_csv('./sarimax_resids_acs.csv', index = True)
coef_acs = pd.DataFrame(coef_dict_acs)
coef_avg = pd.DataFrame(coef_acs.sum(axis=1)/1301) # average coefficients
coef_std = np.std(coef_acs.T)
coef_acs = pd.concat([coef_avg, coef_std], axis = 1)
coef_acs.columns = ['coef', 'std']
coef_acs.sort_values('coef', ascending = False)

In [None]:
coef_acs.to_csv('./sarimax_coef_acs.csv', index = True)

# Tax dataset

In [None]:
pred_dict_tax = {}
coef_dict_tax = {}

In [None]:
x_tax = x.loc[:,['zip']+tax_cols]

In [None]:
start = datetime.now()
for item in zips:
    curr_y = y[y['zip']==item]
    curr_x = x_tax[x_tax['zip']==item].drop(columns = 'zip')
    curr_y_train = curr_y.iloc[0:48,:]
    curr_x_train = curr_x.iloc[0:48,:]
    curr_x_test = curr_x.iloc[48:,:]
    
    curr_model = sm.sarimax.SARIMAX(curr_y_train['zri'], exog = curr_x_train, order = (1,1,0), seasonal_order=(0, 0, 0, 0)).fit()
    
    pred_dict_tax[item] = curr_model.predict(start = 1, end = 59, exog = curr_x_test, dynamic = 47)
    coef_dict_tax[item] = curr_model.params

elapsed = datetime.now()-start
print(elapsed)

In [None]:
plot_arima('10128', pred_dict_tax)

In [None]:
resid_tax, rmse_tax, r2_tax = calc_resids(pred_dict_tax, actual_df)
print(rmse_tax, r2_tax)
resid_tax.to_csv('./sarimax_resids_tax.csv', index = True)
coef_tax = pd.DataFrame(coef_dict_tax)
coef_avg = pd.DataFrame(coef_tax.sum(axis=1)/1301) # average coefficients
coef_std = np.std(coef_tax.T)
coef_tax = pd.concat([coef_avg, coef_std], axis = 1)
coef_tax.columns = ['coef', 'std']
coef_tax.sort_values('coef', ascending = False)

In [None]:
coef_tax.to_csv('./sarimax_coef_tax.csv', index = True)

# BDS dataset

In [None]:
pred_dict_bds = {}
coef_dict_bds = {}

In [None]:
x_bds = x.loc[:, ['zip']+bds_cols]

In [None]:
start = datetime.now()
for item in zips:
    curr_y = y[y['zip']==item]
    curr_x = x_bds[x_bds['zip']==item].drop(columns = 'zip')
    curr_y_train = curr_y.iloc[0:48,:]
    curr_x_train = curr_x.iloc[0:48,:]
    curr_x_test = curr_x.iloc[48:,:]
    
    curr_model = sm.sarimax.SARIMAX(curr_y_train['zri'], exog = curr_x_train, order = (1,1,0), seasonal_order=(0, 0, 0, 0)).fit()
    
    pred_dict_bds[item] = curr_model.predict(start = 1, end=59, exog = curr_x_test, dynamic = 47)
    coef_dict_bds[item] = curr_model.params

elapsed = datetime.now()-start
print(elapsed)

In [None]:
plot_arima('10128', pred_dict_bds)

In [None]:
resid_bds, rmse_bds, r2_bds = calc_resids(pred_dict_bds, actual_df)
print(rmse_bds, r2_bds)
resid_bds.to_csv('./sarimax_resids_bds.csv', index = True)
coef_bds = pd.DataFrame(coef_dict_bds)
coef_avg = pd.DataFrame(coef_bds.sum(axis=1)/1301) # average coefficients
coef_std = np.std(coef_bds.T)
coef_bds = pd.concat([coef_avg, coef_std], axis = 1)
coef_bds.columns = ['coef', 'std']
coef_bds.sort_values('coef', ascending = False)

In [None]:
coef_bds.to_csv('./sarimax_coef_bds.csv', index = True)

# Google trends

In [None]:
pred_dict_google = {}
coef_dict_google = {}

In [None]:
x_google = x.loc[:, ['zip']+trends_cols]

In [None]:
start = datetime.now()
for item in zips:
    curr_y = y[y['zip']==item]
    curr_x = x_google[x_google['zip']==item].drop(columns = 'zip')
    curr_y_train = curr_y.iloc[0:48,:]
    curr_x_train = curr_x.iloc[0:48,:]
    curr_x_test = curr_x.iloc[48:,:]
    
    curr_model = sm.sarimax.SARIMAX(curr_y_train['zri'], exog = curr_x_train, order = (1,1,0), seasonal_order=(0, 0, 0, 0)).fit()
    
    pred_dict_google[item] = curr_model.predict(start = 1, end=59, exog = curr_x_test, dynamic = 47)
    coef_dict_google[item] = curr_model.params

elapsed = datetime.now()-start
print(elapsed)

In [None]:
plot_arima('10128', pred_dict_google)

In [None]:
resid_google, rmse_google, r2_google = calc_resids(pred_dict_google, actual_df)
print(rmse_google, r2_google)
resid_google.to_csv('./sarimax_resids_google.csv', index = True)
coef_google = pd.DataFrame(coef_dict_google)
coef_avg = pd.DataFrame(coef_google.sum(axis=1)/1301) # average coefficients
coef_std = np.std(coef_google.T)
coef_google = pd.concat([coef_avg, coef_std], axis = 1)
coef_google.columns = ['coef', 'std']
coef_google.sort_values('coef', ascending = False)

In [None]:
coef_google.to_csv('./sarimax_coef_google.csv', index = True)

# Google and ACS

In [None]:
pred_dict_acsg = {}
coef_dict_acsg = {}

In [None]:
x_acsg = x.drop(columns = (bikeshare_cols + tax_cols + bds_cols))

In [None]:
start = datetime.now()
for item in zips:
    curr_y = y[y['zip']==item]
    curr_x = x_acsg[x_acsg['zip']==item].drop(columns = 'zip')
    curr_x_test = curr_x.iloc[48:,:]
    
    curr_model = sm.sarimax.SARIMAX(curr_y['zri'], exog = curr_x, order = (1,1,0), seasonal_order=(0, 0, 0, 0)).fit()
    
    pred_dict_acsg[item] = curr_model.predict(start = 1, exog = curr_x_test, dynamic = 47)
    coef_dict_acsg[item] = curr_model.params

elapsed = datetime.now()-start
print(elapsed)

In [None]:
plot_arima('99654', pred_dict_acsg)

In [None]:
resid_acsg, rmse_acsg, r2_acsg = calc_resids(pred_dict_acsg, actual_df)
print(rmse_acsg, r2_acsg)
resid_acsg.to_csv('./sarimax_resids_acsg.csv', index = True)
coef_acsg = pd.DataFrame(coef_dict_acsg)
coef_acsg = pd.DataFrame(coef_acsg.sum(axis=1)/1301) # average coefficients
print((rmse_none - rmse_acsg), (r2_acsg - r2_none))
coef_acsg.sort_values(0, ascending = False)

In [None]:
coef_acsg.to_csv('./sarimax_coef_acsg.csv', index = True)

# Everything

In [None]:
pred_dict_all = {}
coef_dict_all = {}

In [None]:
start = datetime.now()
for item in zips:
    curr_y = y[y['zip']==item]
    curr_x = x[x['zip']==item].drop(columns = 'zip')
    curr_x_test = curr_x.iloc[48:,:]
    
    curr_model = sm.sarimax.SARIMAX(curr_y['zri'], exog = curr_x, order = (1,1,0), seasonal_order=(0, 0, 0, 0)).fit()
    
    pred_dict_all[item] = curr_model.predict(start = 1, exog = curr_x_test, dynamic = 47)
    coef_dict_all[item] = curr_model.params

elapsed = datetime.now()-start
print(elapsed)

In [None]:
plot_arima('99654', pred_dict_all)

In [None]:
resid_all, rmse_all, r2_all = calc_resids(pred_dict_all, actual_df)
print(rmse_all, r2_all)
resid_all.to_csv('./sarimax_resids_all.csv', index = True)
coef_all = pd.DataFrame(coef_dict_all)
coef_all = pd.DataFrame(coef_all.sum(axis=1)/1301) # average coefficients
print((rmse_none - rmse_all), (r2_all - r2_none))
coef_all.sort_values(0, ascending = False)

In [None]:
coef_all.to_csv('./sarimax_coef_all.csv', index = True)