In [31]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


In [32]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [33]:
df_orig = pd.read_csv('../../../full_dataset_unscaled.csv')
df_orig = df_orig.rename(columns={"datetime":"year-month"})

In [34]:

df_orig['net_job_rate']=df_orig['job_creation_rate']-df_orig['job_destruction_rate']
df_orig['apartment_for_rent_searches'] = df_orig[['apartment for rent','studio for rent','1 bedroom for rent','3 bedroom for rent']].sum(axis=1)
df_orig['multifamily_for_rent_searches'] = df_orig[['townhomes for rent','townhouse for rent','house for rent','duplex apartments for rent','condos for rent']].sum(axis=1)
df_orig['gun_searches'] = df_orig[['gun range', 'gun control', 'gun violence']].sum(axis=1)
df_orig['zri'] = np.log(df_orig['zri'])

In [35]:
gen_cols = ['zip',
 'City',
 'State',
 'Metro',
 'CountyName',
 'zri',
 'year',
 'month',
 'year-month']
acs_cols = ['percent_white',
 'percent_black',
 'percent_asian',
 'percent_hispanic',
 'percent_native_am',
 'percent_other_race',
 'percent_0_17',
 'percent_18_39',
 'percent_40_64',
 'percent_65+',
 'percent_rental_units_vacant',
 'percent_rental_units_occupied',
 'percent_graduate_deg',
 'percent_bachelors',
 'percent_associates',
 'percent_highschool',
 'percent_less_highschool',
 'percent_commute_public_transport',
 'percent_commute_less_30',
 'percent_buildings_less_10_units',
 'percent_buildings_10_19_units',
 'percent_buildings_20_49_units',
 'percent_buildings_50+_units',
 'percent_commute_30_to_59',
 'percent_commute_60_to_89',
 'percent_commute_90_more',
 'percent_new_city',
 'percent_new_unit',
 'percent_units_owner_occupied',
 'median_building_age',
 'income_per_capita',
 'poverty_rate',
 'total_pop',
 'percent_workforce_unemployed',
 'percent_work_from_home',
 'median_age',
 'percent_female',
 'gini_index',
 'percent_not_us_citizen']
acs_cols_remove=['percent_other_race','percent_40_64','percent_0_17','percent_18_39',
       'percent_65+','percent_rental_units_vacant','percent_not_us_citizen','percent_less_highschool', 'percent_buildings_less_10_units',
 'percent_commute_30_to_59',
 'percent_commute_60_to_89',
 'percent_commute_90_more', 'percent_commute_less_30','percent_graduate_deg',
                'percent_female','gini_index','percent_hispanic','percent_black','percent_bachelors',
                 'percent_asian','percent_new_city','percent_new_unit']
acs_cols_keep=list(set(acs_cols) - set(acs_cols_remove))
bikeshare_cols = ['bs_total_stations',
 'bs_total_systems',
 'has_bike_sharing']
bikeshare_cols_remove=['has_bike_sharing','bs_total_systems']
bikeshare_cols_keep=list(set(bikeshare_cols) - set(bikeshare_cols_remove))
trends_cols = ['gun range',
 'gun control',
 'gun violence',
 'job opportunities',
 'unemployment',
 'retirement',
 'layoff',
 'lgbt',
 'same sex marriage',
 'they',
 'pronouns',
 'black lives matter',
 'political correctness',
 'make america great again',
 'euthanasia',
 'getaway',
 'places to go',
 'flight tickets',
 'twitter',
 'hashtag',
 'fake news',
 'hurricane',
 'wildfire',
 'flood',
 'fire',
 "trader joe's",
 'whole foods',
 'lululemon',
 'thrift',
 'condos for rent',
 'duplex apartments for rent',
 'townhomes for rent',
 'townhouses for rent',
 'home for rent',
 'house for rent',
 'townhome for rent',
 'townhouse for rent',
 'apartment for rent',
 'studio for rent',
 '1 bedroom for rent',
 '3 bedroom for rent',
 'starbucks',
  'apartment_for_rent_searches',
  'multifamily_for_rent_searches',
  'gun_searches']
trends_cols_remove=['they','apartment for rent','studio for rent','1 bedroom for rent',
                    '3 bedroom for rent', 'townhome for rent','townhouse for rent','townhomes for rent',
                    'townhouses for rent','house for rent','home for rent','duplex apartments for rent','condos for rent',
                   'gun range', 'gun control', 'gun violence','homes for rent']
trends_cols_keep=list(set(trends_cols) - set(trends_cols_remove))
economic_cols = ['total_firms',
 'job_creation_rate',
 'job_destruction_rate',
 'startup_firms','state_local_perc', 'net_job_rate']
economic_cols_remove=['total_firms', 'job_creation_rate','job_destruction_rate',]
economic_cols_keep=list(set(economic_cols) - set(economic_cols_remove))

In [36]:
for zipcode in df_orig['zip'].unique():
    globals()[f"scaler_{zipcode}"]=StandardScaler(copy=False)
    df_filtered=df_orig[df_orig['zip']==zipcode]
    df_filtered_train = df_filtered[df_filtered['year']<2019]
    globals()[f"scaler_{zipcode}"].fit(df_filtered_train[['zri']])
    df_orig.loc[df_orig['zip']==zipcode,'zri']=globals()[f"scaler_{zipcode}"].transform(df_filtered[['zri']])

In [37]:
df_orig = df_orig[gen_cols + acs_cols_keep + bikeshare_cols_keep + economic_cols_keep + trends_cols_keep]

In [38]:
scale_columns = ['percent_buildings_50+_units', 'percent_associates',
       'percent_rental_units_occupied', 'percent_white', 'percent_highschool',
       'percent_work_from_home', 'percent_buildings_20_49_units',
       'median_building_age', 'median_age', 'percent_commute_public_transport',
       'percent_buildings_10_19_units', 'income_per_capita',
       'percent_native_am', 'percent_workforce_unemployed', 'poverty_rate',
       'percent_units_owner_occupied', 'total_pop', 'bs_total_stations',
       'startup_firms', 'state_local_perc', 'net_job_rate', 'gun_searches',
       'wildfire', 'fire', 'lgbt', 'political correctness', 'lululemon',
       'make america great again', 'same sex marriage', 'job opportunities',
       'retirement', 'black lives matter', 'flight tickets', 'pronouns',
       'trader joe\'s', 'fake news', 'hurricane', 'flood', 'whole foods',
       'twitter', 'thrift', 'hashtag', 'apartment_for_rent_searches', 'layoff',
       'starbucks', 'getaway', 'places to go', 'unemployment', 'euthanasia',
       'multifamily_for_rent_searches']

In [39]:
for zipcode in df_orig['zip'].unique():
    globals()[f"scaler_features_{zipcode}"]=StandardScaler(copy=False)
    df_filtered=df_orig[df_orig['zip']==zipcode]
    df_filtered_train = df_filtered[df_filtered['year']<2019]
    globals()[f"scaler_features_{zipcode}"].fit(df_filtered_train[scale_columns])
    df_orig.loc[df_orig['zip']==zipcode,scale_columns]=globals()[f"scaler_features_{zipcode}"].transform(df_filtered[scale_columns])

In [40]:
df_scaled_no_lags = df_orig.copy()

In [20]:
df_zri = df_scaled_no_lags[gen_cols]


In [21]:
df_all_ext = df_scaled_no_lags

In [22]:
def laggenerator(i,colname,df):
    timelist=list(df['year-month'].drop_duplicates().sort_values())[0:i]
    df.loc[:,f'{colname}_lag{i}']=df.loc[:,f'{colname}'].shift(i)
    df.loc[df['year-month'].isin(timelist),f'{colname}_lag{i}']=0
    return df

def laggenerator_diff(i,colname,df):
    timelist=list(df['year-month'].drop_duplicates().sort_values())[0:i+1]
    df.loc[:,f'{colname}_diff_lag{i}_lag{i+1}']=df.loc[:,f'{colname}'].shift(i)-df.loc[:,f'{colname}'].shift(i+1)
    df.loc[df['year-month'].isin(timelist),f'{colname}_diff_lag{i}_lag{i+1}']=0
    return df

def laggenerator_diff12(i,colname,df):
    timelist=list(df['year-month'].drop_duplicates().sort_values())[0:i+11]
    df.loc[:,f'{colname}_diff_lag{i}_lag{i+11}']=df.loc[:,f'{colname}'].shift(i)-df.loc[:,f'{colname}'].shift(i+11)
    df.loc[df['year-month'].isin(timelist),f'{colname}_diff_lag{i}_lag{i+11}']=0
    return df

#ZRI 
def lag_gen(df):
    for i in range(1,12):
        df=laggenerator(i, 'zri', df)
    df=laggenerator_diff12(1, 'zri', df)
    return df

In [23]:
df_zri = lag_gen(df_zri)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


# 1. AUTO (JUST ZRI) 

In [18]:
df = df_zri
train = df[(df['year'] < 2019) & (df['year']>=2015)]
test = df[df['year']==2019]
save_train = train.copy()
save_test = test.copy()

train = train.drop(['zip','City','State','Metro','CountyName','year','month','year-month'],axis=1)
train_y = train['zri']
train_X = train.drop(['zri'],axis=1)

test_y = test['zri']
test_X = test
#test_X = test.drop(['zri'],axis=1)

In [19]:
#Lasso Grid Search
lasso = Lasso()
grid = dict()
grid['alpha'] = [1e-15, 1e-10, 1e-8, 1e-5,1e-4, 1e-3,1e-2, 1, 5, 10]
lasso_grid = GridSearchCV(lasso, grid, cv=5, n_jobs=-1).fit(train_X,train_y)
lasso_best = lasso_grid.best_estimator_
print(f'lasso_best : {lasso_best}')

lasso_best.fit(train_X, train_y)


lasso_best : Lasso(alpha=1e-05)


Lasso(alpha=1e-05)

In [20]:
predictor_table=df.copy()
X_test = test_X.copy()

#loop through all month in 2019
for month in list(X_test['year-month'].drop_duplicates()):
    
    #run prediction for one month
    X_test=X_test[X_test['year-month']==month]
    X_test=X_test.drop(['zip','year','month','City','State','Metro','CountyName','year-month','zri'],axis=1)
    val=lasso_best.predict(X_test)

    #write current month prediction into predictor_table

    predictor_table.loc[predictor_table['year-month']==month,'zri']=val
        
    predictor_table=lag_gen(predictor_table)

    X_test=predictor_table[predictor_table['year']==2019]


In [22]:
scaled_predictions_y = X_test['zri']
temp = pd.concat([save_test['zip'],test_y],axis=1)
temp.reset_index(drop=True, inplace=True)
scaled_predictions_y.reset_index(drop=True, inplace=True)
rstable = pd.concat([temp,scaled_predictions_y],axis=1)
rstable.columns = ['zip','zri_test','zri_predicted']
rstable

Unnamed: 0,zip,zri_test,zri_predicted
0,1013,0.973309,1.062795
1,1013,0.960762,1.051512
2,1013,1.060825,1.056004
3,1013,1.160167,1.065848
4,1013,1.184891,1.073373
...,...,...,...
15607,99654,0.007888,1.066825
15608,99654,-0.102332,1.041338
15609,99654,0.407646,1.023838
15610,99654,-0.277916,1.014662


In [23]:
for zipcode in rstable['zip'].unique():
    rstable_filtered=rstable[rstable['zip']==zipcode]
    rstable.loc[rstable['zip']==zipcode,'zri_test']=globals()[f"scaler_{zipcode}"].inverse_transform(rstable_filtered[['zri_test']])
    rstable.loc[rstable['zip']==zipcode,'zri_predicted']=globals()[f"scaler_{zipcode}"].inverse_transform(rstable_filtered[['zri_predicted']])  
rstable.loc[:,'zri_test'] = np.exp(rstable.loc[:,'zri_test'])
rstable.loc[:,'zri_predicted'] = np.exp(rstable.loc[:,'zri_predicted'])
rstable.isna().sum()

zip              0
zri_test         0
zri_predicted    0
dtype: int64

In [24]:
#RESULTS 
r2 = r2_score(rstable['zri_test'],rstable['zri_predicted'])
rmse = sqrt(mean_squared_error(rstable['zri_test'],rstable['zri_predicted']))
print(f'R2: {r2*100}')
print(f'RMSE: {rmse}')

R2: 98.93213838217116
RMSE: 68.75143047884802


In [25]:
train_X.columns

Index(['zri_lag1', 'zri_lag2', 'zri_lag3', 'zri_lag4', 'zri_lag5', 'zri_lag6',
       'zri_lag7', 'zri_lag8', 'zri_lag9', 'zri_lag10', 'zri_lag11',
       'zri_diff_lag1_lag12'],
      dtype='object')

In [26]:
importance = np.abs(lasso_best.coef_)
coef = lasso_best.coef_
feature_names = train_X.columns
feature_importances= pd.DataFrame([feature_names,importance,coef]).T
feature_importances.columns = ['feature_names','coef_abs','coef']

feature_importances = feature_importances.sort_values(by='coef_abs',ascending=False)

In [27]:
feature_importances['Parent_feature']=feature_importances['feature_names'].apply(lambda x: x[0:x.find('_lag')] if x.find('_lag')!=-1 else x)
feature_importances['Parent_feature']=feature_importances['Parent_feature'].apply(lambda x: x[0:x.find('_diff')] if x.find('_diff')!=-1 else x)

In [28]:
feature_importances

Unnamed: 0,feature_names,coef_abs,coef,Parent_feature
0,zri_lag1,1.527279,1.527279,zri
1,zri_lag2,0.613722,-0.613722,zri
2,zri_lag3,0.059676,-0.059676,zri
11,zri_diff_lag1_lag12,0.038757,0.038757,zri
6,zri_lag7,0.032243,0.032243,zri
3,zri_lag4,0.031968,0.031968,zri
10,zri_lag11,0.026219,0.026219,zri
9,zri_lag10,0.014151,0.014151,zri
7,zri_lag8,0.012129,-0.012129,zri
8,zri_lag9,0.011088,0.011088,zri


In [None]:
# title = 'Final_Auto'
# fi = title+'_FI'
# rstable.to_csv(f'../../../zillow_orientation/Residuals/{title}.csv')
# feature_importances.to_csv(f'../../../zillow_orientation/Residuals/{fi}.csv')

In [None]:
# BEST AUTOREGRESSIVE 
# 12 Features total 
# ['zri_lag1', 'zri_lag2', 'zri_lag3', 'zri_lag4', 'zri_lag5', 'zri_lag6',
#        'zri_lag7', 'zri_lag8', 'zri_lag9', 'zri_lag10', 'zri_lag11',
#        'zri_diff_lag1_lag12']
# Lasso(alpha=1e-05)
# R2: 98.93213838217116
# RMSE: 68.75143047884802
# top coefs: zri_lag1,zri_lag2,zri_lag3,zri_diff_lag1_lag12



# OLD TRIALS TO IGNORE
# ['zri_lag1', 'zri_lag6', 'zri_diff_lag1_lag12']
# R2: 98.85027227385696
# RMSE: 71.3381352101476
    
# ['zri_lag1', 'zri_lag2', 'zri_lag3', 'zri_lag4', 'zri_lag5', 'zri_lag6',
#        'zri_lag7', 'zri_lag8', 'zri_lag9', 'zri_lag10', 'zri_lag11',
#        'zri_lag12', 'zri_diff_lag1_lag12']   
# R2: 98.93199923119373
# RMSE: 68.7559097657487
    
# ['zri_lag1', 'zri_diff_lag1_lag2', 'zri_lag2', 'zri_diff_lag2_lag3',
#        'zri_lag3', 'zri_diff_lag3_lag4', 'zri_lag4', 'zri_diff_lag4_lag5',
#        'zri_lag5', 'zri_diff_lag5_lag6', 'zri_lag6', 'zri_diff_lag6_lag7',
#        'zri_lag7', 'zri_diff_lag7_lag8', 'zri_lag8', 'zri_diff_lag8_lag9',
#        'zri_lag9', 'zri_diff_lag9_lag10', 'zri_lag10', 'zri_diff_lag10_lag11',
#        'zri_lag11', 'zri_diff_lag11_lag12', 'zri_diff_lag1_lag12']
# R2: 98.93217535166379
# RMSE: 68.75024037731302

# 2. ZRI + ACS 

In [76]:
df_zri_acs = df_scaled_no_lags[gen_cols+acs_cols_keep]

In [77]:
df_zri_acs = lag_gen(df_zri_acs)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [78]:
for col in acs_cols_keep:
    df_zri_acs = laggenerator_diff12(1,col,df_zri_acs)
    df_zri_acs = laggenerator(1,col,df_zri_acs)
    df_zri_acs = laggenerator(6,col,df_zri_acs)
    df_zri_acs = laggenerator(12,col,df_zri_acs)
    #for i in range(1,4):
       # df_zri_acs = laggenerator_diff(i,col,df)


In [79]:
df = df_zri_acs
train = df[(df['year'] < 2019) & (df['year']>=2015)]
test = df[df['year']==2019]
save_train = train.copy()
save_test = test.copy()

train = train.drop(['zip','City','State','Metro','CountyName','year','month','year-month'],axis=1)
train_y = train['zri']
train_X = train.drop(['zri'],axis=1)

test_y = test['zri']
test_X = test
#test_X = test.drop(['zri'],axis=1)

In [81]:
#Lasso Grid Search
lasso = Lasso()
grid = dict()
grid['alpha'] = [1e-15, 1e-10, 1e-8, 1e-5,1e-4, 1e-3,1e-2, 1, 5, 10]
lasso_grid = GridSearchCV(lasso, grid, cv=5, n_jobs=-1).fit(train_X,train_y)
lasso_best = lasso_grid.best_estimator_
print(f'lasso_best : {lasso_best}')

lasso_best.fit(train_X, train_y)


lasso_best : Lasso(alpha=0.0001)


Lasso(alpha=0.0001)

In [82]:
predictor_table=df.copy()
X_test = test_X.copy()

#loop through all month in 2019
for month in list(X_test['year-month'].drop_duplicates()):
    
    #run prediction for one month
    X_test=X_test[X_test['year-month']==month]
    X_test=X_test.drop(['zip','year','month','City','State','Metro','CountyName','year-month','zri'],axis=1)
    val=lasso_best.predict(X_test)

    #write current month prediction into predictor_table

    predictor_table.loc[predictor_table['year-month']==month,'zri']=val
        
    predictor_table=lag_gen(predictor_table)

    X_test=predictor_table[predictor_table['year']==2019]


In [83]:
scaled_predictions_y = X_test['zri']
temp = pd.concat([save_test['zip'],test_y],axis=1)
temp.reset_index(drop=True, inplace=True)
scaled_predictions_y.reset_index(drop=True, inplace=True)
rstable = pd.concat([temp,scaled_predictions_y],axis=1)
rstable.columns = ['zip','zri_test','zri_predicted']
rstable

Unnamed: 0,zip,zri_test,zri_predicted
0,1013,0.973309,1.072649
1,1013,0.960762,1.077547
2,1013,1.060825,1.099619
3,1013,1.160167,1.126671
4,1013,1.184891,1.149418
...,...,...,...
15607,99654,0.007888,1.202899
15608,99654,-0.102332,1.213981
15609,99654,0.407646,1.235056
15610,99654,-0.277916,1.266095


In [84]:
# scaled_predictions_y = pd.Series(lasso_best.predict(test_X))
# temp = pd.concat([save_test['zip'],test_y],axis=1)
# temp.reset_index(drop=True, inplace=True)
# scaled_predictions_y.reset_index(drop=True, inplace=True)
# rstable = pd.concat([temp,scaled_predictions_y],axis=1)
# rstable.columns = ['zip','zri_test','zri_predicted']
# rstable

In [85]:
for zipcode in rstable['zip'].unique():
    rstable_filtered=rstable[rstable['zip']==zipcode]
    rstable.loc[rstable['zip']==zipcode,'zri_test']=globals()[f"scaler_{zipcode}"].inverse_transform(rstable_filtered[['zri_test']])
    rstable.loc[rstable['zip']==zipcode,'zri_predicted']=globals()[f"scaler_{zipcode}"].inverse_transform(rstable_filtered[['zri_predicted']])  
rstable.loc[:,'zri_test'] = np.exp(rstable.loc[:,'zri_test'])
rstable.loc[:,'zri_predicted'] = np.exp(rstable.loc[:,'zri_predicted'])
rstable.isna().sum()

zip              0
zri_test         0
zri_predicted    0
dtype: int64

In [86]:
#RESULTS 
r2 = r2_score(rstable['zri_test'],rstable['zri_predicted'])
rmse = sqrt(mean_squared_error(rstable['zri_test'],rstable['zri_predicted']))
print(f'R2: {r2*100}')
print(f'RMSE: {rmse}')

R2: 98.71343352758063
RMSE: 75.46409446867541


In [87]:
#train_X.columns

Index(['income_per_capita', 'percent_buildings_10_19_units', 'total_pop',
       'median_age', 'percent_workforce_unemployed',
       'percent_commute_public_transport', 'percent_associates',
       'percent_highschool', 'percent_native_am', 'percent_white',
       'percent_rental_units_occupied', 'percent_buildings_20_49_units',
       'percent_units_owner_occupied', 'percent_buildings_50+_units',
       'percent_work_from_home', 'poverty_rate', 'median_building_age',
       'zri_lag1', 'zri_lag2', 'zri_lag3', 'zri_lag4', 'zri_lag5', 'zri_lag6',
       'zri_lag7', 'zri_lag8', 'zri_lag9', 'zri_lag10', 'zri_lag11',
       'zri_diff_lag1_lag12', 'income_per_capita_diff_lag1_lag12',
       'income_per_capita_lag1', 'income_per_capita_lag6',
       'income_per_capita_lag12',
       'percent_buildings_10_19_units_diff_lag1_lag12',
       'percent_buildings_10_19_units_lag1',
       'percent_buildings_10_19_units_lag6',
       'percent_buildings_10_19_units_lag12', 'total_pop_diff_lag1_lag12

In [88]:
importance = np.abs(lasso_best.coef_)
coef = lasso_best.coef_
feature_names = train_X.columns
feature_importances= pd.DataFrame([feature_names,importance,coef]).T
feature_importances.columns = ['feature_names','coef_abs','coef']

feature_importances = feature_importances.sort_values(by='coef_abs',ascending=False)
feature_importances['Parent_feature']=feature_importances['feature_names'].apply(lambda x: x[0:x.find('_lag')] if x.find('_lag')!=-1 else x)
feature_importances['Parent_feature']=feature_importances['Parent_feature'].apply(lambda x: x[0:x.find('_diff')] if x.find('_diff')!=-1 else x)

In [90]:
#feature_importances=feature_importances.groupby('Parent_feature').agg('mean').sort_values('Importance',ascending=False)
#feature_importances

Unnamed: 0,feature_names,coef_abs,coef,Parent_feature
17,zri_lag1,1.492442,1.492442,zri
18,zri_lag2,0.598477,-0.598477,zri
28,zri_diff_lag1_lag12,0.053407,0.053407,zri
19,zri_lag3,0.048796,-0.048796,zri
27,zri_lag11,0.03075,0.03075,zri
23,zri_lag7,0.020821,0.020821,zri
26,zri_lag10,0.017657,0.017657,zri
20,zri_lag4,0.013095,0.013095,zri
47,percent_workforce_unemployed_lag6,0.011875,0.011875,percent_workforce_unemployed
46,percent_workforce_unemployed_lag1,0.011182,-0.011182,percent_workforce_unemployed


In [91]:
feats_agg = feature_importances[['coef_abs','Parent_feature']]
feats_agg['coef_abs'] = feats_agg['coef_abs'].astype('float')
feats_agg_abs = feats_agg.groupby('Parent_feature').agg('mean').sort_values('coef_abs',ascending=False)
feats_agg_abs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feats_agg['coef_abs'] = feats_agg['coef_abs'].astype('float')


Unnamed: 0_level_0,coef_abs
Parent_feature,Unnamed: 1_level_1
zri,0.190395
percent_workforce_unemployed,0.004869
median_building_age,0.003706
income_per_capita,0.003297
total_pop,0.002309
percent_white,0.001529
poverty_rate,0.000909
percent_native_am,0.000872
percent_units_owner_occupied,0.000785
percent_highschool,0.000747


In [92]:
feats_agg = feature_importances[['coef','Parent_feature']]
feats_agg['coef'] = feats_agg['coef'].astype('float')
feats_agg_sign = feats_agg.groupby('Parent_feature').agg('mean').sort_values('coef',ascending=False)
feats_agg_sign

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feats_agg['coef'] = feats_agg['coef'].astype('float')


Unnamed: 0_level_0,coef
Parent_feature,Unnamed: 1_level_1
zri,0.082516
median_building_age,0.002537
poverty_rate,0.000909
income_per_capita,0.00066
percent_work_from_home,0.000553
median_age,0.000518
percent_rental_units_occupied,0.000287
total_pop,0.000263
percent_associates,0.000243
percent_buildings_50+_units,0.000203


In [93]:
title = '02_predicted_Final_Auto_ACS'
fi = title+'_FI'
fi2 = title+'_FI_Agg'
rstable.to_csv(f'../../../zillow_orientation/Residuals/{title}.csv')
feature_importances.to_csv(f'../../../zillow_orientation/Residuals/{fi}.csv')
feats_agg_abs.to_csv(f'../../../zillow_orientation/Residuals/{fi2}.csv')

In [None]:
# BEST AUTO + ACS 
# Lasso(alpha=0.0001)
# R2: 98.71343352758063
# RMSE: 75.46409446867541


















# 63 features total 
# ['percent_buildings_50+_units', 'percent_associates',
#        'percent_rental_units_occupied', 'percent_white', 'percent_highschool',
#        'percent_work_from_home', 'percent_buildings_20_49_units',
#        'median_building_age', 'median_age', 'percent_commute_public_transport',
#        'percent_buildings_10_19_units', 'income_per_capita',
#        'percent_native_am', 'percent_workforce_unemployed', 'poverty_rate',
#        'percent_units_owner_occupied', 'total_pop', 'zri_lag1', 'zri_lag2',
#        'zri_lag3', 'zri_lag4', 'zri_lag5', 'zri_lag6', 'zri_lag7', 'zri_lag8',
#        'zri_lag9', 'zri_lag10', 'zri_lag11', 'zri_diff_lag1_lag12',
#        'percent_buildings_50+_units_diff_lag1_lag12',
#        'percent_buildings_50+_units_lag12',
#        'percent_associates_diff_lag1_lag12', 'percent_associates_lag12',
#        'percent_rental_units_occupied_diff_lag1_lag12',
#        'percent_rental_units_occupied_lag12', 'percent_white_diff_lag1_lag12',
#        'percent_white_lag12', 'percent_highschool_diff_lag1_lag12',
#        'percent_highschool_lag12', 'percent_work_from_home_diff_lag1_lag12',
#        'percent_work_from_home_lag12',
#        'percent_buildings_20_49_units_diff_lag1_lag12',
#        'percent_buildings_20_49_units_lag12',
#        'median_building_age_diff_lag1_lag12', 'median_building_age_lag12',
#        'median_age_diff_lag1_lag12', 'median_age_lag12',
#        'percent_commute_public_transport_diff_lag1_lag12',
#        'percent_commute_public_transport_lag12',
#        'percent_buildings_10_19_units_diff_lag1_lag12',
#        'percent_buildings_10_19_units_lag12',
#        'income_per_capita_diff_lag1_lag12', 'income_per_capita_lag12',
#        'percent_native_am_diff_lag1_lag12', 'percent_native_am_lag12',
#        'percent_workforce_unemployed_diff_lag1_lag12',
#        'percent_workforce_unemployed_lag12', 'poverty_rate_diff_lag1_lag12',
#        'poverty_rate_lag12', 'percent_units_owner_occupied_diff_lag1_lag12',
#        'percent_units_owner_occupied_lag12', 'total_pop_diff_lag1_lag12',
#        'total_pop_lag12']

# R2: 98.68438523437717
# RMSE: 76.31125919884467
# Lasso(alpha=1e-05)





# 3. ZRI + ALL 

In [102]:
df_zri_all = df_scaled_no_lags 

In [42]:
# def lag_gen(df):
#     for i in range(1,12):
#         df=laggenerator(i, 'zri', df)
#     df=laggenerator_diff12(1, 'zri', df)
#     return df

In [43]:
#ZRI
df_zri_all = lag_gen(df_zri_all)

In [44]:
#ZRI
df_zri_all = lag_gen(df_zri_all)

#ACS
for col in acs_cols_keep:
    df_zri_all = laggenerator_diff12(1,col,df_zri_all)
    df_zri_all = laggenerator(1,col,df_zri_all)
    df_zri_all = laggenerator(6,col,df_zri_all)
    df_zri_all = laggenerator(12,col,df_zri_all)
    
#BIKESHARE AND ECONOMIC 
for col in bikeshare_cols_keep + economic_cols_keep:
    df_zri_all = laggenerator(1, col, df_zri_all)
    df_zri_all = laggenerator_diff12(1, col, df_zri_all)  
    
#TRENDS 
for col in trends_cols_keep:
    for i in range(1,4):
        df_zri_all=laggenerator(i, col, df_zri_all)
    #df_zri_all = laggenerator_diff12(1,col,df_zri_all)    
    

In [150]:
# #BIKESHARE AND ECONOMIC 
# for col in bikeshare_cols_keep + economic_cols_keep:
#     df_zri_all = laggenerator(1, col, df_zri_all)
#     df_zri_all = laggenerator_diff12(1, col, df_zri_all)

In [151]:
# #TRENDS 
# for col in trends_cols_keep:
#     for i in range(1,3):
#         df_zri_all=laggenerator(i, col, df_zri_all)
#         #df_zri_all=laggenerator_diff(i, col, df_zri_all)
#     #df_zri_all = laggenerator_diff12(1,col,df_zri_all)    

In [45]:
df_zri_all

Unnamed: 0,zip,City,State,Metro,CountyName,zri,year,month,year-month,percent_associates,percent_highschool,percent_units_owner_occupied,percent_buildings_10_19_units,percent_native_am,income_per_capita,percent_rental_units_occupied,percent_buildings_20_49_units,median_age,percent_work_from_home,percent_workforce_unemployed,percent_buildings_50+_units,percent_commute_public_transport,median_building_age,total_pop,poverty_rate,percent_white,bs_total_stations,startup_firms,state_local_perc,net_job_rate,flight tickets,political correctness,hashtag,fire,same sex marriage,layoff,unemployment,thrift,retirement,make america great again,pronouns,lululemon,job opportunities,fake news,wildfire,flood,getaway,lgbt,hurricane,euthanasia,gun_searches,places to go,starbucks,apartment_for_rent_searches,trader joe's,twitter,whole foods,multifamily_for_rent_searches,black lives matter,zri_lag1,zri_lag2,zri_lag3,zri_lag4,zri_lag5,zri_lag6,zri_lag7,zri_lag8,zri_lag9,zri_lag10,zri_lag11,zri_diff_lag1_lag12,percent_associates_diff_lag1_lag12,percent_associates_lag1,percent_associates_lag6,percent_associates_lag12,percent_highschool_diff_lag1_lag12,percent_highschool_lag1,percent_highschool_lag6,percent_highschool_lag12,percent_units_owner_occupied_diff_lag1_lag12,percent_units_owner_occupied_lag1,percent_units_owner_occupied_lag6,percent_units_owner_occupied_lag12,percent_buildings_10_19_units_diff_lag1_lag12,percent_buildings_10_19_units_lag1,percent_buildings_10_19_units_lag6,percent_buildings_10_19_units_lag12,percent_native_am_diff_lag1_lag12,percent_native_am_lag1,percent_native_am_lag6,percent_native_am_lag12,income_per_capita_diff_lag1_lag12,income_per_capita_lag1,income_per_capita_lag6,income_per_capita_lag12,percent_rental_units_occupied_diff_lag1_lag12,percent_rental_units_occupied_lag1,percent_rental_units_occupied_lag6,percent_rental_units_occupied_lag12,percent_buildings_20_49_units_diff_lag1_lag12,percent_buildings_20_49_units_lag1,percent_buildings_20_49_units_lag6,percent_buildings_20_49_units_lag12,median_age_diff_lag1_lag12,median_age_lag1,median_age_lag6,median_age_lag12,percent_work_from_home_diff_lag1_lag12,percent_work_from_home_lag1,percent_work_from_home_lag6,percent_work_from_home_lag12,percent_workforce_unemployed_diff_lag1_lag12,percent_workforce_unemployed_lag1,percent_workforce_unemployed_lag6,percent_workforce_unemployed_lag12,percent_buildings_50+_units_diff_lag1_lag12,percent_buildings_50+_units_lag1,percent_buildings_50+_units_lag6,percent_buildings_50+_units_lag12,percent_commute_public_transport_diff_lag1_lag12,percent_commute_public_transport_lag1,percent_commute_public_transport_lag6,percent_commute_public_transport_lag12,median_building_age_diff_lag1_lag12,median_building_age_lag1,median_building_age_lag6,median_building_age_lag12,total_pop_diff_lag1_lag12,total_pop_lag1,total_pop_lag6,total_pop_lag12,poverty_rate_diff_lag1_lag12,poverty_rate_lag1,poverty_rate_lag6,poverty_rate_lag12,percent_white_diff_lag1_lag12,percent_white_lag1,percent_white_lag6,percent_white_lag12,bs_total_stations_lag1,bs_total_stations_diff_lag1_lag12,startup_firms_lag1,startup_firms_diff_lag1_lag12,state_local_perc_lag1,state_local_perc_diff_lag1_lag12,net_job_rate_lag1,net_job_rate_diff_lag1_lag12,flight tickets_lag1,flight tickets_lag2,flight tickets_lag3,political correctness_lag1,political correctness_lag2,political correctness_lag3,hashtag_lag1,hashtag_lag2,hashtag_lag3,fire_lag1,fire_lag2,fire_lag3,same sex marriage_lag1,same sex marriage_lag2,same sex marriage_lag3,layoff_lag1,layoff_lag2,layoff_lag3,unemployment_lag1,unemployment_lag2,unemployment_lag3,thrift_lag1,thrift_lag2,thrift_lag3,retirement_lag1,retirement_lag2,retirement_lag3,make america great again_lag1,make america great again_lag2,make america great again_lag3,pronouns_lag1,pronouns_lag2,pronouns_lag3,lululemon_lag1,lululemon_lag2,lululemon_lag3,job opportunities_lag1,job opportunities_lag2,job opportunities_lag3,fake news_lag1,fake news_lag2,fake news_lag3,wildfire_lag1,wildfire_lag2,wildfire_lag3,flood_lag1,flood_lag2,flood_lag3,getaway_lag1,getaway_lag2,getaway_lag3,lgbt_lag1,lgbt_lag2,lgbt_lag3,hurricane_lag1,hurricane_lag2,hurricane_lag3,euthanasia_lag1,euthanasia_lag2,euthanasia_lag3,gun_searches_lag1,gun_searches_lag2,gun_searches_lag3,places to go_lag1,places to go_lag2,places to go_lag3,starbucks_lag1,starbucks_lag2,starbucks_lag3,apartment_for_rent_searches_lag1,apartment_for_rent_searches_lag2,apartment_for_rent_searches_lag3,trader joe's_lag1,trader joe's_lag2,trader joe's_lag3,twitter_lag1,twitter_lag2,twitter_lag3,whole foods_lag1,whole foods_lag2,whole foods_lag3,multifamily_for_rent_searches_lag1,multifamily_for_rent_searches_lag2,multifamily_for_rent_searches_lag3,black lives matter_lag1,black lives matter_lag2,black lives matter_lag3
0,1013,Chicopee,MA,Springfield,Hampden County,-1.357844,2014,1,2014-01-01,0.528917,-0.002505,2.353991,-0.919095,-1.631316,-2.029337,-2.023827,0.991546,1.274909,-1.390903,-0.969092,-2.462342,-0.816386,-1.378641,0.294540,0.946046,1.597214,0.0,0.386305,1.779482,-0.387187,,,,,,,,,,,,,,,,,,,,,0.035080,,,1.108220,,,,0.545331,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,1013,Chicopee,MA,Springfield,Hampden County,-1.313356,2014,2,2014-02-01,0.521445,-0.041369,2.227519,-0.922701,-1.631316,-1.903530,-1.913896,1.039871,1.304083,-1.292497,-0.965910,-2.211482,-0.862803,-1.378641,0.119927,1.011503,1.477034,0.0,0.353815,1.728516,-0.275534,,,,,,,,,,,,,,,,,,,,,0.035080,,,1.108220,,,,0.545331,,-1.357844,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.528917,0.000000,0.000000,0.000000,-0.002505,0.000000,0.000000,0.000000,2.353991,0.000000,0.000000,0.000000,-0.919095,0.000000,0.000000,0.000000,-1.631316,0.000000,0.000000,0.000000,-2.029337,0.000000,0.000000,0.000000,-2.023827,0.000000,0.000000,0.000000,0.991546,0.000000,0.000000,0.000000,1.274909,0.000000,0.000000,0.000000,-1.390903,0.000000,0.000000,0.000000,-0.969092,0.000000,0.000000,0.000000,-2.462342,0.000000,0.000000,0.000000,-0.816386,0.000000,0.000000,0.000000,-1.378641,0.000000,0.000000,0.000000,0.294540,0.000000,0.000000,0.000000,0.946046,0.000000,0.000000,0.000000,1.597214,0.000000,0.000000,0.0,0.0,0.386305,0.000000,1.779482,0.000000,-0.387187,0.000000,,0.000000,0.000000,,0.0,0.0,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.0,0.0,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,0.035080,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,1.108220,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,,0.000000,0.000000,0.545331,0.000000,0.000000,,0.000000,0.000000
2,1013,Chicopee,MA,Springfield,Hampden County,-1.269012,2014,3,2014-03-01,0.513972,-0.080233,2.101046,-0.926308,-1.631316,-1.777723,-1.803966,1.088197,1.333257,-1.194090,-0.962727,-1.960621,-0.909219,-1.378641,-0.054685,1.076959,1.356854,0.0,0.321325,1.677550,-0.163880,,,,,,,,,,,,,,,,,,,,,0.035080,,,1.108220,,,,0.545331,,-1.313356,-1.357844,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.521445,0.000000,0.000000,0.000000,-0.041369,0.000000,0.000000,0.000000,2.227519,0.000000,0.000000,0.000000,-0.922701,0.000000,0.000000,0.000000,-1.631316,0.000000,0.000000,0.000000,-1.903530,0.000000,0.000000,0.000000,-1.913896,0.000000,0.000000,0.000000,1.039871,0.000000,0.000000,0.000000,1.304083,0.000000,0.000000,0.000000,-1.292497,0.000000,0.000000,0.000000,-0.965910,0.000000,0.000000,0.000000,-2.211482,0.000000,0.000000,0.000000,-0.862803,0.000000,0.000000,0.000000,-1.378641,0.000000,0.000000,0.000000,0.119927,0.000000,0.000000,0.000000,1.011503,0.000000,0.000000,0.000000,1.477034,0.000000,0.000000,0.0,0.0,0.353815,0.000000,1.728516,0.000000,-0.275534,0.000000,,,0.000000,,,0.0,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.0,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.000000,,,0.000000,0.035080,0.035080,0.000000,,,0.000000,,,0.000000,1.108220,1.108220,0.000000,,,0.000000,,,0.000000,,,0.000000,0.545331,0.545331,0.000000,,,0.000000
3,1013,Chicopee,MA,Springfield,Hampden County,-1.342998,2014,4,2014-04-01,0.506500,-0.119097,1.974574,-0.929914,-1.631316,-1.651916,-1.694035,1.136522,1.362431,-1.095684,-0.959545,-1.709761,-0.955636,-1.378641,-0.229298,1.142416,1.236675,0.0,0.288835,1.626584,-0.052227,,,,,,,,,,,,,,,,,,,,,0.035080,,,1.108220,,,,0.545331,,-1.269012,-1.313356,-1.357844,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.513972,0.000000,0.000000,0.000000,-0.080233,0.000000,0.000000,0.000000,2.101046,0.000000,0.000000,0.000000,-0.926308,0.000000,0.000000,0.000000,-1.631316,0.000000,0.000000,0.000000,-1.777723,0.000000,0.000000,0.000000,-1.803966,0.000000,0.000000,0.000000,1.088197,0.000000,0.000000,0.000000,1.333257,0.000000,0.000000,0.000000,-1.194090,0.000000,0.000000,0.000000,-0.962727,0.000000,0.000000,0.000000,-1.960621,0.000000,0.000000,0.000000,-0.909219,0.000000,0.000000,0.000000,-1.378641,0.000000,0.000000,0.000000,-0.054685,0.000000,0.000000,0.000000,1.076959,0.000000,0.000000,0.000000,1.356854,0.000000,0.000000,0.0,0.0,0.321325,0.000000,1.677550,0.000000,-0.163880,0.000000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.035080,0.035080,0.035080,,,,,,,1.108220,1.108220,1.108220,,,,,,,,,,0.545331,0.545331,0.545331,,,
4,1013,Chicopee,MA,Springfield,Hampden County,-1.342998,2014,5,2014-05-01,0.499028,-0.157962,1.848102,-0.933520,-1.631316,-1.526109,-1.584105,1.184847,1.391605,-0.997278,-0.956363,-1.458900,-1.002052,-1.378641,-0.403910,1.207872,1.116495,0.0,0.256345,1.575618,0.059427,,,,,,,,,,,,,,,,,,,,,0.035080,,,1.108220,,,,0.545331,,-1.342998,-1.269012,-1.313356,-1.357844,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.506500,0.000000,0.000000,0.000000,-0.119097,0.000000,0.000000,0.000000,1.974574,0.000000,0.000000,0.000000,-0.929914,0.000000,0.000000,0.000000,-1.631316,0.000000,0.000000,0.000000,-1.651916,0.000000,0.000000,0.000000,-1.694035,0.000000,0.000000,0.000000,1.136522,0.000000,0.000000,0.000000,1.362431,0.000000,0.000000,0.000000,-1.095684,0.000000,0.000000,0.000000,-0.959545,0.000000,0.000000,0.000000,-1.709761,0.000000,0.000000,0.000000,-0.955636,0.000000,0.000000,0.000000,-1.378641,0.000000,0.000000,0.000000,-0.229298,0.000000,0.000000,0.000000,1.142416,0.000000,0.000000,0.000000,1.236675,0.000000,0.000000,0.0,0.0,0.288835,0.000000,1.626584,0.000000,-0.052227,0.000000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.035080,0.035080,0.035080,,,,,,,1.108220,1.108220,1.108220,,,,,,,,,,0.545331,0.545331,0.545331,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93667,99654,Wasilla,AK,Anchorage,Matanuska Susitna Borough,0.007888,2019,8,2019-08-01,1.361233,-1.621183,-1.273137,1.333320,-2.488448,1.711605,-3.152414,-2.044123,2.650424,2.320791,-0.640881,-0.764931,-1.052928,2.500834,2.570895,-1.922102,-3.923321,0.0,2.096633,-0.960072,-2.539234,0.191792,0.0,-1.262634,-0.026856,-0.511872,-0.558584,-1.225972,0.028606,-1.229718,-0.347441,3.436105,-0.338743,0.0,-0.649002,0.288231,1.192079,1.916550,0.502895,0.808896,-0.590624,-0.210512,-0.204284,0.996066,0.520563,-0.875945,-1.264465,1.178159,0.332005,-0.474408,0.263069,0.317390,0.353534,0.551336,0.747480,0.800690,0.941994,1.152364,1.064940,1.047415,1.082451,-0.819382,-0.170170,1.452892,1.911189,1.623063,-0.027450,-1.642434,-1.748692,-1.614984,-0.075777,-1.274650,-1.282214,-1.198873,-0.147695,1.366186,1.530511,1.513881,-1.458016,-2.365089,-1.748294,-0.907073,0.384164,1.685994,1.557939,1.301830,-3.076951,-2.632065,-0.030319,0.444886,-0.381402,-2.034261,-1.984952,-1.652859,0.908990,2.602583,2.363375,1.693593,1.486661,2.220551,1.719349,0.733889,0.245645,-0.604935,-0.425206,-0.850579,-0.500164,-0.768740,-0.787783,-0.268576,-1.402054,-0.849373,0.168402,0.552681,2.800934,2.100700,0.100033,-0.700233,0.880857,2.502367,2.159724,1.621509,-0.310317,-2.000512,-2.392567,-1.690196,-1.697230,-3.811142,-3.250250,-2.113912,0.0,0.0,2.096633,0.552417,-0.612796,-1.971064,-2.539234,-1.132889,0.191792,-0.912930,0.191792,0.0,0.0,0.0,-0.475538,-1.262634,-0.475538,0.214847,-0.429695,-0.187991,-0.511872,-0.511872,-0.511872,-0.558584,-0.558584,-0.558584,-0.709773,-0.795806,-1.828204,-0.246012,1.676312,0.028606,-0.977468,-0.388885,-0.052552,-0.347441,-0.347441,-0.347441,0.087211,0.087211,1.761658,0.804514,-0.973886,-1.354972,0.0,0.0,0.0,0.092715,-0.649002,0.092715,0.288231,-0.480384,-0.480384,-0.397360,-0.397360,-0.397360,-1.040413,-1.040413,-1.040413,-1.106370,2.648582,2.648582,-0.082540,-0.181589,-0.280637,1.771873,-0.590624,-0.590624,-0.210512,-2.093527,0.612958,0.549995,-0.204284,-0.204284,1.436797,1.065768,0.542131,-1.212190,-1.212190,-0.172538,2.702385,-0.875945,0.465928,0.118881,-0.918628,-1.005087,0.266036,-0.342046,0.266036,0.453891,-0.090222,-0.634336,0.208740,-0.474408,-0.474408
93668,99654,Wasilla,AK,Anchorage,Matanuska Susitna Borough,-0.102332,2019,9,2019-09-01,1.269573,-1.599931,-1.271625,1.300455,-2.611808,1.737216,-3.672763,-2.053985,2.698266,2.421031,-0.676827,-0.761122,-1.256484,2.900967,2.639424,-1.843691,-4.035499,0.0,2.096633,-1.307347,-2.539234,1.296514,0.0,-1.262634,-0.671398,-0.511872,0.903891,-0.451674,-0.932556,-0.557052,-0.347441,-0.750013,-1.227943,0.0,1.576147,-0.480384,-0.397360,-1.040413,0.502895,2.393672,-0.590624,-1.544770,-0.204284,2.371056,-0.172538,0.465928,0.032422,0.570077,-0.997079,0.208740,0.007888,0.263069,0.317390,0.353534,0.551336,0.747480,0.800690,0.941994,1.152364,1.064940,1.047415,-1.074562,-0.379691,1.361233,1.819530,1.740924,0.037854,-1.621183,-1.727441,-1.659037,-0.052673,-1.273137,-1.280701,-1.220464,-0.201150,1.333320,1.497646,1.534471,-1.432750,-2.488448,-1.871653,-1.055699,0.358553,1.711605,1.583550,1.353052,-3.738673,-3.152414,-0.550668,0.586259,-0.313172,-2.044123,-1.994814,-1.730951,0.813307,2.650424,2.411216,1.837117,1.390657,2.320791,1.819589,0.930134,0.085382,-0.640881,-0.461152,-0.726263,-0.364649,-0.764931,-0.783974,-0.400282,-1.611317,-1.052928,-0.035153,0.558389,3.201067,2.500834,0.500167,-0.700233,0.849097,2.570895,2.228253,1.721799,-0.017108,-1.922102,-2.314156,-1.904994,-1.581414,-3.923321,-3.362428,-2.341907,0.0,0.0,2.096633,0.441933,-0.960072,-2.433306,-2.539234,-0.906311,0.191792,0.191792,-0.912930,0.0,0.0,0.0,-1.262634,-0.475538,-1.262634,-0.026856,0.214847,-0.429695,-0.511872,-0.511872,-0.511872,-0.558584,-0.558584,-0.558584,-1.225972,-0.709773,-0.795806,0.028606,-0.246012,1.676312,-1.229718,-0.977468,-0.388885,-0.347441,-0.347441,-0.347441,3.436105,0.087211,0.087211,-0.338743,0.804514,-0.973886,0.0,0.0,0.0,-0.649002,0.092715,-0.649002,0.288231,0.288231,-0.480384,1.192079,-0.397360,-0.397360,1.916550,-1.040413,-1.040413,0.502895,-1.106370,2.648582,0.808896,-0.082540,-0.181589,-0.590624,1.771873,-0.590624,-0.210512,-0.210512,-2.093527,-0.204284,0.549995,-0.204284,0.996066,1.436797,1.065768,0.520563,-1.212190,-1.212190,-0.875945,2.702385,-0.875945,-1.264465,0.118881,-0.918628,1.178159,0.266036,-0.342046,0.332005,0.453891,-0.090222,-0.474408,0.208740,-0.474408
93669,99654,Wasilla,AK,Anchorage,Matanuska Susitna Borough,0.407646,2019,10,2019-10-01,1.177914,-1.578679,-1.270112,1.267590,-2.735167,1.762827,-4.193113,-2.063847,2.746108,2.521272,-0.712772,-0.757313,-1.460039,3.301101,2.707952,-1.765280,-4.147678,0.0,2.096633,-1.654623,-2.539234,-0.912930,0.0,-0.475538,-0.913101,-0.511872,-0.558584,-0.451674,0.989768,0.788281,0.887905,0.087211,-0.338743,0.0,-0.649002,-0.480384,-1.986799,0.602344,1.039317,0.511750,0.354375,-2.093527,0.298568,2.709389,-1.212190,0.465928,-0.659251,0.266036,0.332005,-0.474408,-0.102332,0.007888,0.263069,0.317390,0.353534,0.551336,0.747480,0.800690,0.941994,1.152364,1.064940,-1.149747,-0.589212,1.269573,1.727871,1.858785,0.103159,-1.599931,-1.706189,-1.703090,-0.029569,-1.271625,-1.279188,-1.242056,-0.254606,1.300455,1.464781,1.555061,-1.407483,-2.611808,-1.995012,-1.204324,0.332942,1.737216,1.609161,1.404274,-4.400396,-3.672763,-1.071017,0.727632,-0.244942,-2.053985,-2.004676,-1.809044,0.717624,2.698266,2.459058,1.980642,1.294653,2.421031,1.919829,1.126379,-0.074880,-0.676827,-0.497097,-0.601947,-0.229134,-0.761122,-0.780166,-0.531988,-1.820580,-1.256484,-0.238708,0.564097,3.601201,2.900967,0.900300,-0.700233,0.817336,2.639424,2.296781,1.822088,0.276101,-1.843691,-2.235745,-2.119792,-1.465597,-4.035499,-3.474607,-2.569903,0.0,0.0,2.096633,0.331450,-1.307347,-2.895548,-2.539234,-0.679734,1.296514,0.191792,0.191792,0.0,0.0,0.0,-1.262634,-1.262634,-0.475538,-0.671398,-0.026856,0.214847,-0.511872,-0.511872,-0.511872,0.903891,-0.558584,-0.558584,-0.451674,-1.225972,-0.709773,-0.932556,0.028606,-0.246012,-0.557052,-1.229718,-0.977468,-0.347441,-0.347441,-0.347441,-0.750013,3.436105,0.087211,-1.227943,-0.338743,0.804514,0.0,0.0,0.0,1.576147,-0.649002,0.092715,-0.480384,0.288231,0.288231,-0.397360,1.192079,-0.397360,-1.040413,1.916550,-1.040413,0.502895,0.502895,-1.106370,2.393672,0.808896,-0.082540,-0.590624,-0.590624,1.771873,-1.544770,-0.210512,-0.210512,-0.204284,-0.204284,0.549995,2.371056,0.996066,1.436797,-0.172538,0.520563,-1.212190,0.465928,-0.875945,2.702385,0.032422,-1.264465,0.118881,0.570077,1.178159,0.266036,-0.997079,0.332005,0.453891,0.208740,-0.474408,0.208740
93670,99654,Wasilla,AK,Anchorage,Matanuska Susitna Borough,-0.277916,2019,11,2019-11-01,1.086254,-1.557428,-1.268599,1.234725,-2.858526,1.788438,-4.713462,-2.073709,2.793949,2.621512,-0.748718,-0.753505,-1.663594,3.701234,2.776481,-1.686869,-4.259856,0.0,2.096633,-2.001899,-2.539234,0.191792,0.0,-1.262634,-0.268559,-0.511872,0.416399,-0.709773,-0.795247,-0.557052,-0.347441,-0.750013,0.804514,0.0,0.092715,0.288231,-0.397360,-1.040413,-0.033526,-0.280637,2.716872,1.606467,-0.204284,4.197635,-0.519089,2.255093,-0.745710,0.874118,-0.534222,-0.474408,0.407646,-0.102332,0.007888,0.263069,0.317390,0.353534,0.551336,0.747480,0.800690,0.941994,1.152364,-0.657293,-0.798733,1.177914,1.636211,1.976647,0.168463,-1.578679,-1.684937,-1.747143,-0.006465,-1.270112,-1.277676,-1.263648,-0.308061,1.267590,1.431916,1.575652,-1.382216,-2.735167,-2.118371,-1.352950,0.307331,1.762827,1.634772,1.455496,-5.062118,-4.193113,-1.591367,0.869006,-0.176711,-2.063847,-2.014538,-1.887136,0.621941,2.746108,2.506900,2.124167,1.198649,2.521272,2.020070,1.322623,-0.235142,-0.712772,-0.533043,-0.477630,-0.093619,-0.757313,-0.776357,-0.663694,-2.029843,-1.460039,-0.442263,0.569805,4.001334,3.301101,1.300434,-0.700233,0.785575,2.707952,2.365310,1.922378,0.569310,-1.765280,-2.157334,-2.334590,-1.349780,-4.147678,-3.586785,-2.797898,0.0,0.0,2.096633,0.220967,-1.654623,-3.357790,-2.539234,-0.453156,-0.912930,1.296514,0.191792,0.0,0.0,0.0,-0.475538,-1.262634,-1.262634,-0.913101,-0.671398,-0.026856,-0.511872,-0.511872,-0.511872,-0.558584,0.903891,-0.558584,-0.451674,-0.451674,-1.225972,0.989768,-0.932556,0.028606,0.788281,-0.557052,-1.229718,0.887905,-0.347441,-0.347441,0.087211,-0.750013,3.436105,-0.338743,-1.227943,-0.338743,0.0,0.0,0.0,-0.649002,1.576147,-0.649002,-0.480384,-0.480384,0.288231,-1.986799,-0.397360,1.192079,0.602344,-1.040413,1.916550,1.039317,0.502895,0.502895,0.511750,2.393672,0.808896,0.354375,-0.590624,-0.590624,-2.093527,-1.544770,-0.210512,0.298568,-0.204284,-0.204284,2.709389,2.371056,0.996066,-1.212190,-0.172538,0.520563,0.465928,0.465928,-0.875945,-0.659251,0.032422,-1.264465,0.266036,0.570077,1.178159,0.332005,-0.997079,0.332005,-0.474408,0.208740,-0.474408


In [49]:
df_zri_all = df_zri_all[df_zri_all['year-month']>='2015-04-01']

In [100]:
df = df_zri_all[df_zri_all['year-month']>='2015-04-01']
train = df[(df['year'] < 2019) & (df['year']>=2015)]
test = df[df['year']==2019]
save_train = train.copy()
save_test = test.copy()

train = train.drop(['zip','City','State','Metro','CountyName','year','month','year-month'],axis=1)
train_y = train['zri']
train_X = train.drop(['zri'],axis=1)

test_y = test['zri']
test_X = test
#test_X = test.drop(['zri'],axis=1)

In [101]:
train

Unnamed: 0,zri,percent_associates,percent_highschool,percent_units_owner_occupied,percent_buildings_10_19_units,percent_native_am,income_per_capita,percent_rental_units_occupied,percent_buildings_20_49_units,median_age,percent_work_from_home,percent_workforce_unemployed,percent_buildings_50+_units,percent_commute_public_transport,median_building_age,total_pop,poverty_rate,percent_white,bs_total_stations,startup_firms,state_local_perc,net_job_rate,flight tickets,political correctness,hashtag,fire,same sex marriage,layoff,unemployment,thrift,retirement,make america great again,pronouns,lululemon,job opportunities,fake news,wildfire,flood,getaway,lgbt,hurricane,euthanasia,gun_searches,places to go,starbucks,apartment_for_rent_searches,trader joe's,twitter,whole foods,multifamily_for_rent_searches,black lives matter,zri_lag1,zri_lag2,zri_lag3,zri_lag4,zri_lag5,zri_lag6,zri_lag7,zri_lag8,zri_lag9,zri_lag10,zri_lag11,zri_diff_lag1_lag12,percent_associates_diff_lag1_lag12,percent_associates_lag1,percent_associates_lag6,percent_associates_lag12,percent_highschool_diff_lag1_lag12,percent_highschool_lag1,percent_highschool_lag6,percent_highschool_lag12,percent_units_owner_occupied_diff_lag1_lag12,percent_units_owner_occupied_lag1,percent_units_owner_occupied_lag6,percent_units_owner_occupied_lag12,percent_buildings_10_19_units_diff_lag1_lag12,percent_buildings_10_19_units_lag1,percent_buildings_10_19_units_lag6,percent_buildings_10_19_units_lag12,percent_native_am_diff_lag1_lag12,percent_native_am_lag1,percent_native_am_lag6,percent_native_am_lag12,income_per_capita_diff_lag1_lag12,income_per_capita_lag1,income_per_capita_lag6,income_per_capita_lag12,percent_rental_units_occupied_diff_lag1_lag12,percent_rental_units_occupied_lag1,percent_rental_units_occupied_lag6,percent_rental_units_occupied_lag12,percent_buildings_20_49_units_diff_lag1_lag12,percent_buildings_20_49_units_lag1,percent_buildings_20_49_units_lag6,percent_buildings_20_49_units_lag12,median_age_diff_lag1_lag12,median_age_lag1,median_age_lag6,median_age_lag12,percent_work_from_home_diff_lag1_lag12,percent_work_from_home_lag1,percent_work_from_home_lag6,percent_work_from_home_lag12,percent_workforce_unemployed_diff_lag1_lag12,percent_workforce_unemployed_lag1,percent_workforce_unemployed_lag6,percent_workforce_unemployed_lag12,percent_buildings_50+_units_diff_lag1_lag12,percent_buildings_50+_units_lag1,percent_buildings_50+_units_lag6,percent_buildings_50+_units_lag12,percent_commute_public_transport_diff_lag1_lag12,percent_commute_public_transport_lag1,percent_commute_public_transport_lag6,percent_commute_public_transport_lag12,median_building_age_diff_lag1_lag12,median_building_age_lag1,median_building_age_lag6,median_building_age_lag12,total_pop_diff_lag1_lag12,total_pop_lag1,total_pop_lag6,total_pop_lag12,poverty_rate_diff_lag1_lag12,poverty_rate_lag1,poverty_rate_lag6,poverty_rate_lag12,percent_white_diff_lag1_lag12,percent_white_lag1,percent_white_lag6,percent_white_lag12,bs_total_stations_lag1,bs_total_stations_diff_lag1_lag12,startup_firms_lag1,startup_firms_diff_lag1_lag12,state_local_perc_lag1,state_local_perc_diff_lag1_lag12,net_job_rate_lag1,net_job_rate_diff_lag1_lag12,flight tickets_lag1,flight tickets_lag2,flight tickets_lag3,political correctness_lag1,political correctness_lag2,political correctness_lag3,hashtag_lag1,hashtag_lag2,hashtag_lag3,fire_lag1,fire_lag2,fire_lag3,same sex marriage_lag1,same sex marriage_lag2,same sex marriage_lag3,layoff_lag1,layoff_lag2,layoff_lag3,unemployment_lag1,unemployment_lag2,unemployment_lag3,thrift_lag1,thrift_lag2,thrift_lag3,retirement_lag1,retirement_lag2,retirement_lag3,make america great again_lag1,make america great again_lag2,make america great again_lag3,pronouns_lag1,pronouns_lag2,pronouns_lag3,lululemon_lag1,lululemon_lag2,lululemon_lag3,job opportunities_lag1,job opportunities_lag2,job opportunities_lag3,fake news_lag1,fake news_lag2,fake news_lag3,wildfire_lag1,wildfire_lag2,wildfire_lag3,flood_lag1,flood_lag2,flood_lag3,getaway_lag1,getaway_lag2,getaway_lag3,lgbt_lag1,lgbt_lag2,lgbt_lag3,hurricane_lag1,hurricane_lag2,hurricane_lag3,euthanasia_lag1,euthanasia_lag2,euthanasia_lag3,gun_searches_lag1,gun_searches_lag2,gun_searches_lag3,places to go_lag1,places to go_lag2,places to go_lag3,starbucks_lag1,starbucks_lag2,starbucks_lag3,apartment_for_rent_searches_lag1,apartment_for_rent_searches_lag2,apartment_for_rent_searches_lag3,trader joe's_lag1,trader joe's_lag2,trader joe's_lag3,twitter_lag1,twitter_lag2,twitter_lag3,whole foods_lag1,whole foods_lag2,whole foods_lag3,multifamily_for_rent_searches_lag1,multifamily_for_rent_searches_lag2,multifamily_for_rent_searches_lag3,black lives matter_lag1,black lives matter_lag2,black lives matter_lag3
15,-0.919302,-0.517894,0.510601,0.442235,-0.784162,-0.626863,-0.696082,-0.703862,1.102507,0.953994,-0.429803,-0.374189,0.390487,-0.513948,-1.053298,-1.268994,1.002036,0.456043,0.0,-0.120538,0.811218,0.090480,-0.227195,0.0,0.281387,-0.096077,-0.440574,-0.553454,1.341937,0.671317,-0.275328,-0.603434,-1.325838,-0.576335,-1.025955,-0.638096,-0.512989,-0.497701,-1.031018,-1.136990,-0.276915,-0.104189,-0.057413,-1.487107,-0.605309,-0.613506,2.120461,2.799236,2.198006,-0.017017,-0.858034,-0.919302,-1.034889,-1.093048,-1.107626,-1.093048,-1.093048,-1.093048,-1.180749,-1.328169,-1.342998,-1.342998,0.423697,-0.783240,-0.276740,0.461667,0.506500,0.394545,0.275447,-0.352283,-0.119097,-1.402199,0.572375,1.215740,1.974574,0.102101,-0.827813,-0.951552,-0.929914,0.753340,-0.877976,-1.631316,-1.631316,0.968490,-0.683427,-0.897075,-1.651916,0.962491,-0.731544,-1.034452,-1.694035,0.071139,1.207661,1.426474,1.136522,-0.247980,1.114451,1.537476,1.362431,0.696224,-0.399460,-0.505246,-1.095684,0.445382,-0.514163,-0.940451,-0.959545,2.076907,0.367146,-0.204598,-1.709761,0.238433,-0.717203,-1.234136,-0.955636,0.244007,-1.134634,-1.378641,-1.378641,-1.128997,-1.358295,-1.276973,-0.229298,0.025629,1.168044,1.535155,1.142416,-0.825833,0.410842,0.515597,1.236675,0.0,0.0,-0.081550,-0.370385,0.913127,-0.713456,0.377871,0.430098,-0.227195,-0.227195,-0.954217,0.0,0.0,0.0,0.281387,0.281387,0.281387,1.441153,-1.633307,1.056846,0.163642,-0.440574,-0.440574,-0.553454,-0.553454,-0.553454,0.828175,1.855700,3.525428,0.464758,0.051640,0.671317,1.083301,-0.769375,-1.016399,-0.603434,-0.603434,-0.603434,0.947027,-1.325838,-1.325838,-0.953573,0.178140,-0.199098,-0.069725,1.364620,0.408390,0.587048,-0.638096,-0.638096,-0.512989,-0.512989,-0.512989,-0.497701,-1.824904,0.829502,-0.253706,1.041814,0.264502,-1.580693,-1.136990,-0.249583,-0.208751,-0.345079,-0.208751,1.396136,-0.104189,-0.104189,0.559572,1.098260,-1.771898,-1.023593,0.135192,-1.023593,-1.359315,-1.320676,-1.996372,-0.480670,0.894880,-1.647238,-0.546320,0.787071,-0.546320,2.055871,1.498346,1.126664,0.521434,1.479475,0.641189,0.298128,-0.017017,0.354973,-0.858034,-0.858034,-0.858034
16,-0.962533,-0.759049,0.745754,0.312095,-0.740511,-0.375750,-0.708737,-0.676179,0.997352,0.793536,-0.460145,-0.234214,0.413828,-0.310693,-0.971962,-1.179693,0.836029,0.501245,0.0,-0.159525,0.709308,-0.196912,0.136317,0.0,-1.219346,1.056846,0.163642,-0.553454,1.791479,-0.154919,1.453836,-0.603434,-1.325838,-0.324843,0.886505,0.587048,1.949359,0.829502,-0.771914,0.637823,-0.208751,-1.104406,0.435016,0.598706,0.104674,-1.231182,-0.101856,1.312505,-0.316852,-0.684141,-0.858034,-0.919302,-0.919302,-1.034889,-1.093048,-1.107626,-1.093048,-1.093048,-1.093048,-1.180749,-1.328169,-1.342998,0.423697,-1.016922,-0.517894,0.454195,0.499028,0.668562,0.510601,-0.391148,-0.157962,-1.405867,0.442235,1.089268,1.848102,0.149358,-0.784162,-0.955159,-0.933520,1.004453,-0.626863,-1.631316,-1.631316,0.830028,-0.696082,-0.771268,-1.526109,0.880243,-0.703862,-0.924522,-1.584105,-0.082341,1.102507,1.474799,1.184847,-0.437612,0.953994,1.566650,1.391605,0.567475,-0.429803,-0.406840,-0.997278,0.582174,-0.374189,-0.937269,-0.956363,1.849387,0.390487,0.046262,-1.458900,0.488105,-0.513948,-1.280552,-1.002052,0.325343,-1.053298,-1.378641,-1.378641,-0.865084,-1.268994,-1.451585,-0.403910,-0.205836,1.002036,1.600612,1.207872,-0.660452,0.456043,0.395417,1.116495,0.0,0.0,-0.120538,-0.376883,0.811218,-0.764400,0.090480,0.031053,-0.227195,-0.227195,-0.227195,0.0,0.0,0.0,0.281387,0.281387,0.281387,-0.096077,1.441153,-1.633307,-0.440574,0.163642,-0.440574,-0.553454,-0.553454,-0.553454,1.341937,0.828175,1.855700,0.671317,0.464758,0.051640,-0.275328,1.083301,-0.769375,-0.603434,-0.603434,-0.603434,-1.325838,0.947027,-1.325838,-0.576335,-0.953573,0.178140,-1.025955,-0.069725,1.364620,-0.638096,0.587048,-0.638096,-0.512989,-0.512989,-0.512989,-0.497701,-0.497701,-1.824904,-1.031018,-0.253706,1.041814,-1.136990,-1.580693,-1.136990,-0.276915,-0.208751,-0.345079,-0.104189,1.396136,-0.104189,-0.057413,0.559572,1.098260,-1.487107,-1.023593,0.135192,-0.605309,-1.359315,-1.320676,-0.613506,-0.480670,0.894880,2.120461,-0.546320,0.787071,2.799236,2.055871,1.498346,2.198006,0.521434,1.479475,-0.017017,0.298128,-0.017017,-0.858034,-0.858034,-0.858034
17,-1.034889,-1.000203,0.980907,0.181954,-0.696860,-0.124636,-0.721392,-0.648497,0.892198,0.633078,-0.490488,-0.094240,0.437169,-0.107437,-0.890627,-1.090392,0.670021,0.546446,0.0,-0.198513,0.607399,-0.484303,-0.954217,0.0,0.281387,-0.864692,-0.440574,-0.553454,0.699734,-2.220510,-1.016399,-0.603434,0.378811,0.052394,1.364620,-0.638096,-0.512989,-0.497701,1.300917,-0.693286,-0.208751,-1.104406,-0.346881,0.135192,-1.367675,0.717490,0.120376,0.383298,0.401679,1.337241,-0.858034,-0.962533,-0.919302,-0.919302,-1.034889,-1.093048,-1.107626,-1.093048,-1.093048,-1.093048,-1.180749,-1.328169,0.380465,-1.250605,-0.759049,0.446723,0.491556,0.942580,0.745754,-0.430012,-0.196826,-1.409535,0.312095,0.962796,1.721630,0.196616,-0.740511,-0.958765,-0.937127,1.255566,-0.375750,-1.631316,-1.631316,0.691566,-0.708737,-0.645461,-1.400302,0.797995,-0.676179,-0.814591,-1.474174,-0.235820,0.997352,1.523125,1.233173,-0.627243,0.793536,1.595824,1.420779,0.438726,-0.460145,-0.308433,-0.898871,0.718966,-0.234214,-0.934087,-0.953180,1.621868,0.413828,0.297123,-1.208040,0.737776,-0.310693,-1.326969,-1.048469,0.406679,-0.971962,-1.378641,-1.378641,-0.601170,-1.179693,-1.626198,-0.578523,-0.437300,0.836029,1.666068,1.273329,-0.495071,0.501245,0.275238,0.996315,0.0,0.0,-0.159525,-0.383381,0.709308,-0.815343,-0.196912,-0.367991,0.136317,-0.227195,-0.227195,0.0,0.0,0.0,-1.219346,0.281387,0.281387,1.056846,-0.096077,1.441153,0.163642,-0.440574,0.163642,-0.553454,-0.553454,-0.553454,1.791479,1.341937,0.828175,-0.154919,0.671317,0.464758,1.453836,-0.275328,1.083301,-0.603434,-0.603434,-0.603434,-1.325838,-1.325838,0.947027,-0.324843,-0.576335,-0.953573,0.886505,-1.025955,-0.069725,0.587048,-0.638096,0.587048,1.949359,-0.512989,-0.512989,0.829502,-0.497701,-0.497701,-0.771914,-1.031018,-0.253706,0.637823,-1.136990,-1.580693,-0.208751,-0.276915,-0.208751,-1.104406,-0.104189,1.396136,0.435016,-0.057413,0.559572,0.598706,-1.487107,-1.023593,0.104674,-0.605309,-1.359315,-1.231182,-0.613506,-0.480670,-0.101856,2.120461,-0.546320,1.312505,2.799236,2.055871,-0.316852,2.198006,0.521434,-0.684141,-0.017017,0.298128,-0.858034,-0.858034,-0.858034
18,-1.063937,-1.241357,1.216060,0.051814,-0.653209,0.126477,-0.734047,-0.620815,0.787043,0.472621,-0.520830,0.045735,0.460510,0.095818,-0.809291,-1.001091,0.504013,0.591648,0.0,-0.237501,0.505489,-0.771694,2.680895,0.0,1.782120,1.441153,-0.440574,-0.553454,-0.070910,-1.394274,-0.769375,-0.603434,-1.325838,-1.456556,-0.069725,-0.638096,-0.512989,-0.497701,0.264502,-0.249583,-0.004260,0.395919,-0.990001,-0.791836,-0.530543,-0.723307,-0.768551,1.591267,0.401679,-1.152992,-0.858034,-1.034889,-0.962533,-0.919302,-0.919302,-1.034889,-1.093048,-1.107626,-1.093048,-1.093048,-1.093048,-1.180749,0.293281,-1.484287,-1.000203,0.205569,0.484084,1.216597,0.980907,-0.194859,-0.235690,-1.413203,0.181954,0.832656,1.595157,0.243873,-0.696860,-0.915114,-0.940733,1.506680,-0.124636,-1.380203,-1.631316,0.553104,-0.721392,-0.658116,-1.274495,0.715747,-0.648497,-0.786909,-1.364244,-0.389300,0.892198,1.417970,1.281498,-0.816875,0.633078,1.435366,1.449954,0.309978,-0.490488,-0.338776,-0.800465,0.855758,-0.094240,-0.794112,-0.949998,1.394348,0.437169,0.320464,-0.957179,0.987448,-0.107437,-1.123714,-1.094886,0.488015,-0.890627,-1.297305,-1.378641,-0.337257,-1.090392,-1.536897,-0.753135,-0.668765,0.670021,1.500060,1.338785,-0.329690,0.546446,0.320439,0.876136,0.0,0.0,-0.198513,-0.389879,0.607399,-0.866287,-0.484303,-0.767036,-0.954217,0.136317,-0.227195,0.0,0.0,0.0,0.281387,-1.219346,0.281387,-0.864692,1.056846,-0.096077,-0.440574,0.163642,-0.440574,-0.553454,-0.553454,-0.553454,0.699734,1.791479,1.341937,-2.220510,-0.154919,0.671317,-1.016399,1.453836,-0.275328,-0.603434,-0.603434,-0.603434,0.378811,-1.325838,-1.325838,0.052394,-0.324843,-0.576335,1.364620,0.886505,-1.025955,-0.638096,0.587048,-0.638096,-0.512989,1.949359,-0.512989,-0.497701,0.829502,-0.497701,1.300917,-0.771914,-1.031018,-0.693286,0.637823,-1.136990,-0.208751,-0.208751,-0.276915,-1.104406,-1.104406,-0.104189,-0.346881,0.435016,-0.057413,0.135192,0.598706,-1.487107,-1.367675,0.104674,-0.605309,0.717490,-1.231182,-0.613506,0.120376,-0.101856,2.120461,0.383298,1.312505,2.799236,0.401679,-0.316852,2.198006,1.337241,-0.684141,-0.017017,-0.858034,-0.858034,-0.858034
19,-1.049405,-1.482511,1.451213,-0.078326,-0.609559,0.377590,-0.746702,-0.593132,0.681889,0.312163,-0.551172,0.185709,0.483851,0.299073,-0.727955,-0.911790,0.338005,0.636849,0.0,-0.276489,0.403580,-1.059085,-0.954217,0.0,1.782120,-0.096077,-0.440574,-0.553454,1.020836,0.258199,1.083301,-0.603434,-0.757622,-0.450589,-0.069725,-0.638096,1.949359,-1.824904,-0.771914,0.194120,-0.276915,0.395919,-1.154913,-0.791836,0.157405,0.126786,-1.657478,2.613395,-0.197097,-1.132984,-0.858034,-1.063937,-1.034889,-0.962533,-0.919302,-0.919302,-1.034889,-1.093048,-1.107626,-1.093048,-1.093048,-1.093048,0.116811,-1.717969,-1.241357,-0.035586,0.476612,1.490615,1.216060,0.040294,-0.274555,-1.416871,0.051814,0.702515,1.468685,0.291130,-0.653209,-0.871463,-0.944339,1.757793,0.126477,-1.129090,-1.631316,0.414642,-0.734047,-0.670771,-1.148689,0.633498,-0.620815,-0.759226,-1.254313,-0.542780,0.787043,1.312816,1.329823,-1.006507,0.472621,1.274909,1.479128,0.181229,-0.520830,-0.369118,-0.702059,0.992550,0.045735,-0.654138,-0.946816,1.166829,0.460510,0.343805,-0.706319,1.237120,0.095818,-0.920458,-1.141302,0.569350,-0.809291,-1.215970,-1.378641,-0.073343,-1.001091,-1.447596,-0.927748,-0.900229,0.504013,1.334052,1.404242,-0.164308,0.591648,0.365640,0.755956,0.0,0.0,-0.237501,-0.396377,0.505489,-0.917230,-0.771694,-1.166081,2.680895,-0.954217,0.136317,0.0,0.0,0.0,1.782120,0.281387,-1.219346,1.441153,-0.864692,1.056846,-0.440574,-0.440574,0.163642,-0.553454,-0.553454,-0.553454,-0.070910,0.699734,1.791479,-1.394274,-2.220510,-0.154919,-0.769375,-1.016399,1.453836,-0.603434,-0.603434,-0.603434,-1.325838,0.378811,-1.325838,-1.456556,0.052394,-0.324843,-0.069725,1.364620,0.886505,-0.638096,-0.638096,0.587048,-0.512989,-0.512989,1.949359,-0.497701,-0.497701,0.829502,0.264502,1.300917,-0.771914,-0.249583,-0.693286,0.637823,-0.004260,-0.208751,-0.208751,0.395919,-1.104406,-1.104406,-0.990001,-0.346881,0.435016,-0.791836,0.135192,0.598706,-0.530543,-1.367675,0.104674,-0.723307,0.717490,-1.231182,-0.768551,0.120376,-0.101856,1.591267,0.383298,1.312505,0.401679,0.401679,-0.316852,-1.152992,1.337241,-0.684141,-0.858034,-0.858034,-0.858034
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93655,1.082451,1.623063,-1.614984,-1.198873,1.513881,-0.907073,1.301830,0.444886,-1.652859,1.693593,0.733889,-0.850579,-0.268576,0.552681,-0.700233,1.621509,-1.690196,-2.113912,0.0,1.544217,1.358268,-1.406345,0.191792,0.0,-1.262634,-0.187991,-0.511872,-0.558584,-1.398038,0.028606,0.367864,-0.347441,1.761658,1.058572,0.000000,1.576147,0.288231,1.192079,-0.054759,0.502895,1.601284,0.826874,-1.152020,-0.204284,1.215276,-0.172538,0.465928,-1.350924,-0.342046,0.876119,2.485898,0.994764,1.029878,1.047415,0.871449,0.658528,0.497556,0.371585,0.461633,0.694149,0.836096,0.889105,-0.209898,1.456086,1.505201,0.915894,0.049115,-1.204123,-1.570932,-1.350668,-0.366808,-0.353705,-1.177281,-1.069324,-0.823576,0.586019,1.493290,1.390339,0.907271,-1.458087,-0.758447,-0.015319,0.699640,0.365618,1.250608,0.994499,0.884990,0.185894,0.303512,-0.403355,0.117618,-0.967797,-1.574767,-1.184305,-0.606969,1.004674,1.550068,0.832444,0.545394,1.729665,0.537645,-0.443578,-1.192020,-0.082131,-0.974896,-1.596477,-0.892765,-1.026990,-0.136870,0.521661,0.890120,-0.146194,0.546973,0.518433,0.693167,0.000000,-0.700233,-0.700233,-0.700233,0.859423,1.521220,1.019772,0.661797,-2.002706,-1.475397,-0.401407,0.527309,-2.055303,-1.885917,-0.745940,0.169387,0.0,0.0,1.433734,1.002849,1.243302,1.289928,-1.179767,-1.889482,1.296514,0.191792,-0.912930,0.0,0.0,0.0,1.098656,1.098656,-0.475538,-0.590830,0.537118,-0.349127,-0.511872,-0.511872,0.210771,-0.558584,2.366367,-0.558584,-1.398038,-0.365641,0.494690,2.225548,-0.520629,1.264386,-1.481968,0.536031,-0.725218,1.505578,-0.347441,-0.347441,-0.750013,0.087211,-0.750013,-0.846857,-0.084686,-0.973886,0.000000,0.000000,0.000000,0.092715,1.576147,0.092715,0.288231,1.056846,0.288231,-0.397360,-0.397360,1.192079,-0.054759,-0.054759,-0.054759,-1.106370,2.648582,-1.106370,-0.280637,-0.181589,-0.280637,-0.590624,-0.590624,-0.590624,-0.210512,0.338244,-1.152020,3.064255,0.549995,-0.204284,0.397690,0.559939,1.970003,1.906765,-0.172538,1.560214,0.465928,3.149676,-0.875945,0.637636,2.107441,-0.054037,1.482201,0.570077,1.178159,-2.085306,-2.085306,-0.634336,-0.474408,0.891887,-0.474408
93656,1.082451,1.740924,-1.659037,-1.220464,1.534471,-1.055699,1.353052,0.586259,-1.730951,1.837117,0.930134,-0.726263,-0.400282,0.558389,-0.700233,1.721799,-1.904994,-2.341907,0.0,1.654700,1.473235,-1.632922,0.191792,0.0,-1.262634,0.053712,-0.511872,-0.558584,-0.795806,-0.246012,1.208697,-0.347441,0.087211,0.169371,0.000000,-0.649002,-0.480384,2.781518,-0.054759,0.502895,6.256562,-0.590624,-0.210512,2.058551,-0.027309,-0.172538,0.465928,-0.486333,-0.342046,0.391490,0.891887,1.082451,0.994764,1.029878,1.047415,0.871449,0.658528,0.497556,0.371585,0.461633,0.694149,0.836096,0.193345,1.416183,1.623063,1.033755,0.206879,-1.024238,-1.614984,-1.394720,-0.590747,-0.324656,-1.198873,-1.090915,-0.874217,0.496138,1.513881,1.410929,1.017743,-1.502286,-0.907073,-0.163945,0.595213,0.415074,1.301830,1.045721,0.886756,0.528197,0.444886,-0.261981,-0.083312,-0.940602,-1.652859,-1.262397,-0.712257,1.148198,1.693593,0.975969,0.545394,1.836921,0.733889,-0.247333,-1.103032,0.280272,-0.850579,-1.472161,-1.130851,-1.132434,-0.268576,0.389955,0.863859,-0.093949,0.552681,0.524141,0.646630,0.000000,-0.700233,-0.700233,-0.700233,0.920363,1.621509,1.120062,0.701146,-2.092725,-1.690196,-0.616205,0.402529,-2.168465,-2.113912,-0.973935,0.054553,0.0,0.0,1.544217,1.045342,1.358268,1.283604,-1.406345,-2.010057,0.191792,1.296514,0.191792,0.0,0.0,0.0,-1.262634,1.098656,1.098656,-0.187991,-0.590830,0.537118,-0.511872,-0.511872,-0.511872,-0.558584,-0.558584,2.366367,-1.398038,-1.398038,-0.365641,0.028606,2.225548,-0.520629,0.367864,-1.481968,0.536031,-0.347441,1.505578,-0.347441,1.761658,-0.750013,0.087211,1.058572,-0.846857,-0.084686,0.000000,0.000000,0.000000,1.576147,0.092715,1.576147,0.288231,0.288231,1.056846,1.192079,-0.397360,-0.397360,-0.054759,-0.054759,-0.054759,0.502895,-1.106370,2.648582,1.601284,-0.280637,-0.181589,0.826874,-0.590624,-0.590624,-1.152020,-0.210512,0.338244,-0.204284,3.064255,0.549995,1.215276,0.397690,0.559939,-0.172538,1.906765,-0.172538,0.465928,0.465928,3.149676,-1.350924,0.637636,2.107441,-0.342046,1.482201,0.570077,0.876119,-2.085306,-2.085306,2.485898,-0.474408,0.891887
93657,1.047415,1.858785,-1.703090,-1.242056,1.555061,-1.204324,1.404274,0.727632,-1.809044,1.980642,1.126379,-0.601947,-0.531988,0.564097,-0.700233,1.822088,-2.119792,-2.569903,0.0,1.765184,1.588201,-1.859500,-0.912930,0.0,-1.262634,0.053712,0.210771,0.903891,-0.021508,0.028606,1.881364,-0.347441,1.761658,0.423429,0.000000,1.576147,0.288231,-0.397360,0.930896,-1.106370,0.115557,-0.590624,0.612958,-0.958562,-1.200849,0.520563,-0.875945,0.291800,-1.254170,-1.178450,-0.474408,1.082451,1.082451,0.994764,1.029878,1.047415,0.871449,0.658528,0.497556,0.371585,0.461633,0.694149,0.246354,1.376281,1.740924,1.151617,0.364643,-0.844352,-1.659037,-1.438773,-0.814685,-0.295606,-1.220464,-1.112507,-0.924859,0.406256,1.534471,1.431520,1.128215,-1.546485,-1.055699,-0.312570,0.490786,0.464529,1.353052,1.096943,0.888523,0.870501,0.586259,-0.120608,-0.284242,-0.913406,-1.730951,-1.340490,-0.817545,1.291723,1.837117,1.119493,0.545394,1.944178,0.930134,-0.051089,-1.014044,0.642674,-0.726263,-1.347845,-1.368938,-1.237879,-0.400282,0.258249,0.837597,-0.041704,0.558389,0.529849,0.600092,0.000000,-0.700233,-0.700233,-0.700233,0.981303,1.721799,1.220351,0.740495,-2.182743,-1.904994,-0.831003,0.277749,-2.281626,-2.341907,-1.201931,-0.060281,0.0,0.0,1.654700,1.087836,1.473235,1.277279,-1.632922,-2.130632,0.191792,0.191792,1.296514,0.0,0.0,0.0,-1.262634,-1.262634,1.098656,0.053712,-0.187991,-0.590830,-0.511872,-0.511872,-0.511872,-0.558584,-0.558584,-0.558584,-0.795806,-1.398038,-1.398038,-0.246012,0.028606,2.225548,1.208697,0.367864,-1.481968,-0.347441,-0.347441,1.505578,0.087211,1.761658,-0.750013,0.169371,1.058572,-0.846857,0.000000,0.000000,0.000000,-0.649002,1.576147,0.092715,-0.480384,0.288231,0.288231,2.781518,1.192079,-0.397360,-0.054759,-0.054759,-0.054759,0.502895,0.502895,-1.106370,6.256562,1.601284,-0.280637,-0.590624,0.826874,-0.590624,-0.210512,-1.152020,-0.210512,2.058551,-0.204284,3.064255,-0.027309,1.215276,0.397690,-0.172538,-0.172538,1.906765,0.465928,0.465928,0.465928,-0.486333,-1.350924,0.637636,-0.342046,-0.342046,1.482201,0.391490,0.876119,-2.085306,0.891887,2.485898,-0.474408
93658,1.064940,1.976647,-1.747143,-1.263648,1.575652,-1.352950,1.455496,0.869006,-1.887136,2.124167,1.322623,-0.477630,-0.663694,0.569805,-0.700233,1.922378,-2.334590,-2.797898,0.0,1.875667,1.703167,-2.086078,-0.912930,0.0,-0.475538,-0.349127,-0.511872,-0.558584,-0.107541,-0.246012,0.536031,-0.347441,-0.750013,4.361315,0.000000,0.092715,-0.480384,1.192079,-1.040413,-1.106370,-0.181589,0.826874,-1.544770,1.304273,-0.012337,0.867113,0.465928,-1.956138,-1.254170,-1.541193,-0.474408,1.047415,1.082451,1.082451,0.994764,1.029878,1.047415,0.871449,0.658528,0.497556,0.371585,0.461633,0.353266,1.336378,1.858785,1.269478,0.522407,-0.664466,-1.703090,-1.482826,-1.038624,-0.266556,-1.242056,-1.134098,-0.975500,0.316375,1.555061,1.452110,1.238687,-1.590684,-1.204324,-0.461196,0.386359,0.513985,1.404274,1.148164,0.890289,1.212804,0.727632,0.020765,-0.485172,-0.886211,-1.809044,-1.418582,-0.922833,1.435248,1.980642,1.263018,0.545394,2.051434,1.126379,0.145156,-0.925055,1.005077,-0.601947,-1.223528,-1.607024,-1.343323,-0.531988,0.126543,0.811335,0.010542,0.564097,0.535557,0.553555,0.000000,-0.700233,-0.700233,-0.700233,1.042244,1.822088,1.320641,0.779844,-2.272761,-2.119792,-1.045801,0.152970,-2.394787,-2.569903,-1.429926,-0.175115,0.0,0.0,1.765184,1.130329,1.588201,1.270954,-1.859500,-2.251207,-0.912930,0.191792,0.191792,0.0,0.0,0.0,-1.262634,-1.262634,-1.262634,0.053712,0.053712,-0.187991,0.210771,-0.511872,-0.511872,0.903891,-0.558584,-0.558584,-0.021508,-0.795806,-1.398038,0.028606,-0.246012,0.028606,1.881364,1.208697,0.367864,-0.347441,-0.347441,-0.347441,1.761658,0.087211,1.761658,0.423429,0.169371,1.058572,0.000000,0.000000,0.000000,1.576147,-0.649002,1.576147,0.288231,-0.480384,0.288231,-0.397360,2.781518,1.192079,0.930896,-0.054759,-0.054759,-1.106370,0.502895,0.502895,0.115557,6.256562,1.601284,-0.590624,-0.590624,0.826874,0.612958,-0.210512,-1.152020,-0.958562,2.058551,-0.204284,-1.200849,-0.027309,1.215276,0.520563,-0.172538,-0.172538,-0.875945,0.465928,0.465928,0.291800,-0.486333,-1.350924,-1.254170,-0.342046,-0.342046,-1.178450,0.391490,0.876119,-0.474408,0.891887,2.485898


In [52]:
#Lasso Grid Search
fold=TimeSeriesSplit(n_splits=5)
lasso = Lasso()
grid = dict()
grid['alpha'] = [1e-5,1e-4, 1e-3,1e-2, 1, 10]
lasso_grid = GridSearchCV(lasso, grid, cv=fold, n_jobs=-1).fit(train_X,train_y)
lasso_best = lasso_grid.best_estimator_



In [53]:
lasso_best.fit(train_X, train_y)

Lasso(alpha=0.0001)

In [87]:
lasso_best = Lasso(alpha= 0.01)
lasso_best.fit(train_X, train_y)

Lasso(alpha=0.1)

In [88]:
predictor_table=df.copy()
X_test = test_X.copy()

#loop through all month in 2019
for month in list(X_test['year-month'].drop_duplicates()):
    
    #run prediction for one month
    X_test=X_test[X_test['year-month']==month]
    X_test=X_test.drop(['zip','year','month','City','State','Metro','CountyName','year-month','zri'],axis=1)
    val=lasso_best.predict(X_test)

    #write current month prediction into predictor_table

    predictor_table.loc[predictor_table['year-month']==month,'zri']=val
        
    predictor_table=lag_gen(predictor_table)

    X_test=predictor_table[predictor_table['year']==2019]

scaled_predictions_y = X_test['zri']
temp = pd.concat([save_test['zip'],test_y],axis=1)
temp.reset_index(drop=True, inplace=True)
scaled_predictions_y.reset_index(drop=True, inplace=True)
rstable = pd.concat([temp,scaled_predictions_y],axis=1)
rstable.columns = ['zip','zri_test','zri_predicted']

for zipcode in rstable['zip'].unique():
    rstable_filtered=rstable[rstable['zip']==zipcode]
    rstable.loc[rstable['zip']==zipcode,'zri_test']=globals()[f"scaler_{zipcode}"].inverse_transform(rstable_filtered[['zri_test']])
    rstable.loc[rstable['zip']==zipcode,'zri_predicted']=globals()[f"scaler_{zipcode}"].inverse_transform(rstable_filtered[['zri_predicted']])  
rstable.loc[:,'zri_test'] = np.exp(rstable.loc[:,'zri_test'])
rstable.loc[:,'zri_predicted'] = np.exp(rstable.loc[:,'zri_predicted'])
rstable


Unnamed: 0,zip,zri_test,zri_predicted
0,1013,0.973309,0.969190
1,1013,0.960762,0.870002
2,1013,1.060825,0.793721
3,1013,1.160167,0.735057
4,1013,1.184891,0.689940
...,...,...,...
15607,99654,0.007888,0.614669
15608,99654,-0.102332,0.597355
15609,99654,0.407646,0.584039
15610,99654,-0.277916,0.573799


In [90]:
for zipcode in rstable['zip'].unique():
    rstable_filtered=rstable[rstable['zip']==zipcode]
    rstable.loc[rstable['zip']==zipcode,'zri_test']=globals()[f"scaler_{zipcode}"].inverse_transform(rstable_filtered[['zri_test']])
    rstable.loc[rstable['zip']==zipcode,'zri_predicted']=globals()[f"scaler_{zipcode}"].inverse_transform(rstable_filtered[['zri_predicted']])  
rstable.loc[:,'zri_test'] = np.exp(rstable.loc[:,'zri_test'])
rstable.loc[:,'zri_predicted'] = np.exp(rstable.loc[:,'zri_predicted'])
rstable.isna().sum()

zip              0
zri_test         0
zri_predicted    0
dtype: int64

In [91]:
rstable

Unnamed: 0,zip,zri_test,zri_predicted
0,1013,1099.0,1098.671591
1,1013,1098.0,1090.793915
2,1013,1106.0,1084.773960
3,1013,1114.0,1080.166869
4,1013,1116.0,1076.637057
...,...,...,...
15607,99654,1277.0,1310.541726
15608,99654,1271.0,1309.572524
15609,99654,1299.0,1308.827639
15610,99654,1261.5,1308.255066


In [92]:
 rstable.loc[rstable['zri_predicted'] > 1e4,'zip']

Series([], Name: zip, dtype: int64)

In [93]:
rstable.loc[(rstable['zip'] == 78521) | (rstable['zip'] ==  78550),:]

Unnamed: 0,zip,zri_test,zri_predicted
11604,78521,762.0,754.25318
11605,78521,765.0,755.9906
11606,78521,770.0,757.329501
11607,78521,777.0,758.360808
11608,78521,787.0,759.1549
11609,78521,793.0,759.766169
11610,78521,790.0,760.236606
11611,78521,782.0,760.598598
11612,78521,780.0,760.877108
11613,78521,806.5,761.091369


In [94]:
# rstable.replace([np.inf, - np.inf], np.nan, inplace = True)
# rstable = rstable.fillna(0)

In [97]:
#RESULTS 
r2 = r2_score(rstable['zri_test'],rstable['zri_predicted'])
rmse = sqrt(mean_squared_error(rstable['zri_test'],rstable['zri_predicted']))
print(f'R2: {r2*100}')
print(f'RMSE: {rmse}')

R2: 97.75747275351809
RMSE: 99.63068458898006


In [184]:
importance = np.abs(lasso_best.coef_)
coef = lasso_best.coef_
feature_names = train_X.columns
feature_importances= pd.DataFrame([feature_names,importance,coef]).T
feature_importances.columns = ['feature_names','coef_abs','coef']

feature_importances = feature_importances.sort_values(by='coef_abs',ascending=False)
feature_importances['Parent_feature']=feature_importances['feature_names'].apply(lambda x: x[0:x.find('_lag')] if x.find('_lag')!=-1 else x)
feature_importances['Parent_feature']=feature_importances['Parent_feature'].apply(lambda x: x[0:x.find('_diff')] if x.find('_diff')!=-1 else x)

In [185]:
feature_importances

Unnamed: 0,feature_names,coef_abs,coef,Parent_feature
50,zri_lag1,1.470077,1.470077,zri
51,zri_lag2,0.579239,-0.579239,zri
61,zri_diff_lag1_lag12,0.049631,0.049631,zri
52,zri_lag3,0.042993,-0.042993,zri
60,zri_lag11,0.030877,0.030877,zri
27,unemployment,0.015324,-0.015324,unemployment
54,zri_lag5,0.011284,0.011284,zri
53,zri_lag4,0.011181,0.011181,zri
56,zri_lag7,0.010477,0.010477,zri
59,zri_lag10,0.009003,0.009003,zri


In [186]:
feats_agg = feature_importances[['coef_abs','Parent_feature']]
feats_agg['coef_abs'] = feats_agg['coef_abs'].astype('float')
feats_agg_abs = feats_agg.groupby('Parent_feature').agg('mean').sort_values('coef_abs',ascending=False)
feats_agg_abs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feats_agg['coef_abs'] = feats_agg['coef_abs'].astype('float')


Unnamed: 0_level_0,coef_abs
Parent_feature,Unnamed: 1_level_1
zri,0.184563
net_job_rate,0.004358
state_local_perc,0.003033
unemployment,0.003032
startup_firms,0.002899
percent_white,0.002428
poverty_rate,0.002181
percent_highschool,0.001714
hurricane,0.001711
retirement,0.001562


In [187]:
feats_agg = feature_importances[['coef','Parent_feature']]
feats_agg['coef'] = feats_agg['coef'].astype('float')
feats_agg_sign = feats_agg.groupby('Parent_feature').agg('mean').sort_values('coef',ascending=False)
feats_agg_sign

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feats_agg['coef'] = feats_agg['coef'].astype('float')


Unnamed: 0_level_0,coef
Parent_feature,Unnamed: 1_level_1
zri,0.080858
poverty_rate,0.002181
percent_highschool,0.001714
hurricane,0.001711
startup_firms,0.001464
percent_work_from_home,0.00129
percent_associates,0.001161
percent_rental_units_occupied,0.00115
job opportunities,0.000876
lgbt,0.000749


In [None]:
# Lasso(alpha=0.0001)
# R2: 93.61898611976116
# RMSE: 168.06183261166848
    
# R2: 98.75195418606042
# RMSE: 74.32578662880445
# Lasso(alpha=0.01)

In [188]:
title = '03_predicted_Final_Auto_All'
fi = title+'_FI'
fi2 = title+'_FI_Agg_sign'
rstable.to_csv(f'../../../zillow_orientation/Residuals/{title}.csv')
feature_importances.to_csv(f'../../../zillow_orientation/Residuals/{fi}.csv')
feats_agg_sign.to_csv(f'../../../zillow_orientation/Residuals/{fi2}.csv')

In [1]:
acs_cols_keep

NameError: name 'acs_cols_keep' is not defined