In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [2]:
df = pd.read_csv("./data/total_df.csv")

In [3]:
df.head()

Unnamed: 0,zip_code,Yearly_Avg_Zip,FL_Unemployment,year,Encoded_Zip,total_pop,households,male_pop,female_pop,median_age,...,occupation_sales_office,occupation_services,management_business_sci_arts_employed,sales_office_employed,in_grades_1_to_4,in_grades_5_to_8,in_grades_9_to_12,in_school,in_undergrad_college,FLSTHPI_Yearly_Avg
0,33160,1866.833333,7.533333,2013,73,37674,19247,18472,19202,49.6,...,4713.0,2773.0,7911.0,4713.0,935,1346,965,6471,2019,291.655
1,33025,1341.083333,7.533333,2013,41,57766,21206,27852,29914,32.9,...,8948.0,5448.0,10806.0,8948.0,2963,3682,3345,18163,4987,291.655
2,33139,1842.666667,7.533333,2013,61,38066,20883,22090,15976,38.1,...,4865.0,7164.0,9995.0,4865.0,685,649,379,5387,1906,291.655
3,32256,947.75,7.533333,2013,10,40024,18039,18865,21159,32.7,...,6247.0,2597.0,11070.0,6247.0,1638,1598,1629,10962,3429,291.655
4,33009,1443.416667,7.533333,2013,36,39889,19125,19256,20633,47.1,...,4958.0,3977.0,5087.0,4958.0,1335,1020,1249,6495,1885,291.655


In [4]:
df.year.unique()

array([2013, 2014, 2015, 2016, 2017, 2018])

In [5]:
df.shape

(645, 219)

In [6]:
df.columns.tolist()

['zip_code',
 'Yearly_Avg_Zip',
 'FL_Unemployment',
 'year',
 'Encoded_Zip',
 'total_pop',
 'households',
 'male_pop',
 'female_pop',
 'median_age',
 'male_under_5',
 'male_5_to_9',
 'male_10_to_14',
 'male_15_to_17',
 'male_18_to_19',
 'male_20',
 'male_21',
 'male_22_to_24',
 'male_25_to_29',
 'male_30_to_34',
 'male_35_to_39',
 'male_40_to_44',
 'male_45_to_49',
 'male_50_to_54',
 'male_55_to_59',
 'male_65_to_66',
 'male_67_to_69',
 'male_70_to_74',
 'male_75_to_79',
 'male_80_to_84',
 'male_85_and_over',
 'female_under_5',
 'female_5_to_9',
 'female_10_to_14',
 'female_15_to_17',
 'female_18_to_19',
 'female_20',
 'female_21',
 'female_22_to_24',
 'female_25_to_29',
 'female_30_to_34',
 'female_35_to_39',
 'female_40_to_44',
 'female_45_to_49',
 'female_50_to_54',
 'female_55_to_59',
 'female_60_to_61',
 'female_62_to_64',
 'female_65_to_66',
 'female_67_to_69',
 'female_70_to_74',
 'female_75_to_79',
 'female_80_to_84',
 'female_85_and_over',
 'population_1_year_and_over',
 'popu

# Remove Features That Can Cause Leakage

In [7]:
df=df.drop([
    'renter_occupied_housing_units_paying_cash_median_gross_rent', 
    'median_rent', 
    'percent_income_spent_on_rent', 
    'rent_burden_not_computed', 
    'rent_over_50_percent',
    'rent_40_to_50_percent',
    'rent_35_to_40_percent',
    'rent_30_to_35_percent',
    'rent_25_to_30_percent',
    'rent_20_to_25_percent',
    'rent_15_to_20_percent',
    'rent_10_to_15_percent',
    'rent_under_10_percent'], axis=1
)

## Get a list of duplicated columns

In [25]:
def getDuplicateColumns(df):
    '''
    Get a list of duplicate columns.
    It will iterate over all the columns in dataframe and find the columns whose contents are duplicate.
    :param df: Dataframe object
    :return: List of columns whose contents are duplicates.
    '''
    duplicateColumnNames = set()
    # Iterate over all the columns in dataframe
    for x in range(df.shape[1]):
        # Select column at xth index.
        col = df.iloc[:, x]
        # Iterate over all the columns in DataFrame from (x+1)th index till end
        for y in range(x + 1, df.shape[1]):
            # Select column at yth index.
            otherCol = df.iloc[:, y]
            # Check if two columns at x 7 y index are equal
            if col.equals(otherCol):
                duplicateColumnNames.add(df.columns.values[y])
    return list(duplicateColumnNames)
#from https://thispointer.com/how-to-find-drop-duplicate-columns-in-a-dataframe-python-pandas/

In [26]:
getDuplicateColumns(df)

['management_business_sci_arts_employed',
 'occupied_housing_units',
 'sales_office_employed']

# Standardization/Standard Scaling

In [8]:
df_preprocess=df.drop(["zip_code", "year", "Yearly_Avg_Zip"], axis=1)

In [9]:
df_train=pd.DataFrame()

In [10]:
from sklearn.preprocessing import StandardScaler
standard=StandardScaler() #give each column the same range of values and centered around 0
standard.fit(df_preprocess)
df_train=pd.DataFrame(standard.transform(df_preprocess), 
                      index=df_preprocess.index, 
                      columns=df_preprocess.columns)

In [11]:
df_train.head()

Unnamed: 0,FL_Unemployment,Encoded_Zip,total_pop,households,male_pop,female_pop,median_age,male_under_5,male_5_to_9,male_10_to_14,...,occupation_sales_office,occupation_services,management_business_sci_arts_employed,sales_office_employed,in_grades_1_to_4,in_grades_5_to_8,in_grades_9_to_12,in_school,in_undergrad_college,FLSTHPI_Yearly_Avg
0,2.005619,0.068493,0.094607,1.184796,0.11919,0.071695,1.608587,-0.790637,-0.602021,0.123111,...,0.028752,-0.462638,0.624245,0.028752,-0.731641,-0.28278,-0.695893,-0.481108,-0.180243,-1.675376
1,2.005619,-0.719515,1.416572,1.604604,1.419377,1.400375,-1.041677,1.69104,1.69819,2.346499,...,1.984141,0.905133,1.605867,1.984141,1.357065,2.146299,1.566783,1.933062,1.853966,-1.675376
2,2.005619,-0.22701,0.120399,1.535386,0.620691,-0.328448,-0.216445,-0.209889,-0.826005,-1.20014,...,0.098934,1.782551,1.330877,0.098934,-0.989125,-1.007552,-1.253006,-0.704932,-0.257691,-1.675376
3,2.005619,-1.482898,0.249227,0.925925,0.173665,0.314434,-1.073416,-0.04139,0.144594,0.165586,...,0.737032,-0.55263,1.695383,0.737032,-0.007598,-0.020739,-0.064626,0.446196,0.786143,-1.675376
4,2.005619,-0.842641,0.240344,1.158652,0.227863,0.249191,1.211841,0.169643,-0.322437,-0.373517,...,0.141874,0.152987,-0.333303,0.141874,-0.319668,-0.62177,-0.425893,-0.476152,-0.272084,-1.675376


In [12]:
df_train["zip_code"]=df["zip_code"]
df_train["year"]=df["year"]
df_train["Yearly_Avg_Zip"]=df["Yearly_Avg_Zip"]

In [13]:
df_train.head()

Unnamed: 0,FL_Unemployment,Encoded_Zip,total_pop,households,male_pop,female_pop,median_age,male_under_5,male_5_to_9,male_10_to_14,...,sales_office_employed,in_grades_1_to_4,in_grades_5_to_8,in_grades_9_to_12,in_school,in_undergrad_college,FLSTHPI_Yearly_Avg,zip_code,year,Yearly_Avg_Zip
0,2.005619,0.068493,0.094607,1.184796,0.11919,0.071695,1.608587,-0.790637,-0.602021,0.123111,...,0.028752,-0.731641,-0.28278,-0.695893,-0.481108,-0.180243,-1.675376,33160,2013,1866.833333
1,2.005619,-0.719515,1.416572,1.604604,1.419377,1.400375,-1.041677,1.69104,1.69819,2.346499,...,1.984141,1.357065,2.146299,1.566783,1.933062,1.853966,-1.675376,33025,2013,1341.083333
2,2.005619,-0.22701,0.120399,1.535386,0.620691,-0.328448,-0.216445,-0.209889,-0.826005,-1.20014,...,0.098934,-0.989125,-1.007552,-1.253006,-0.704932,-0.257691,-1.675376,33139,2013,1842.666667
3,2.005619,-1.482898,0.249227,0.925925,0.173665,0.314434,-1.073416,-0.04139,0.144594,0.165586,...,0.737032,-0.007598,-0.020739,-0.064626,0.446196,0.786143,-1.675376,32256,2013,947.75
4,2.005619,-0.842641,0.240344,1.158652,0.227863,0.249191,1.211841,0.169643,-0.322437,-0.373517,...,0.141874,-0.319668,-0.62177,-0.425893,-0.476152,-0.272084,-1.675376,33009,2013,1443.416667


# Feature Selection (Forward Stepwise)

In [14]:
def forward_selection(data, target, significance_level=0.05):
    import statsmodels.api as sm
    initial_features = data.columns.tolist()
    best_features = []
    while (len(initial_features)>0):
        remaining_features = list(set(initial_features)-set(best_features))
        new_pval = pd.Series(index=remaining_features)
        for new_column in remaining_features:
            model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]])).fit()
            new_pval[new_column] = model.pvalues[new_column]
        min_p_value = new_pval.min()
        if(min_p_value<significance_level):
            best_features.append(new_pval.idxmin())
        else:
            break
    return best_features
#from here 
#https://www.analyticsvidhya.com/blog/2020/10/a-comprehensive-guide-to-feature-selection-using-wrapper-methods-in-python/

In [15]:
forward_selected_features=forward_selection(df_train.drop(columns = 'Yearly_Avg_Zip'), 
                                            df_train.Yearly_Avg_Zip, 
                                            significance_level=0.05)

  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Seri

  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Series(index=remaining_features)
  new_pval = pd.Seri

In [16]:
forward_selected_features=sorted(forward_selected_features)

In [18]:
df_train_final=pd.DataFrame()

In [19]:
for feature in forward_selected_features:
    df_train_final[feature]=df_train[feature]

In [20]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
def get_vif(df):
    vif = pd.DataFrame()
    vif['VIF'] = [variance_inflation_factor(df.values, i) for i in range(df.shape[1])]
    vif['variable'] = df.columns
    return vif.sort_values("VIF", ascending=False)   
#writing a function to get VIF among remaining features 
# using VIF to confirm if there's still multicollinearity exists 

In [21]:
get_vif(df_train_final)

  vif = 1. / (1. - r_squared_i)


Unnamed: 0,VIF,variable
52,inf,sales_office_employed
48,inf,occupation_sales_office
51,268.509029,pop_25_years_over
2,213.219181,children
44,86.664728,married_households
49,75.945527,owner_occupied_housing_units
53,60.709623,some_college_and_associates_degree
46,59.313197,mortgaged_housing_units
0,53.497692,FLSTHPI_Yearly_Avg
1,52.6559,FL_Unemployment


In [22]:
df_train["occupation_sales_office"]

0      0.028752
1      1.984141
2      0.098934
3      0.737032
4      0.141874
         ...   
640    1.669709
641    1.115644
642   -1.745181
643   -0.174405
644   -1.325015
Name: occupation_sales_office, Length: 645, dtype: float64

In [23]:
df_train["sales_office_employed"]

0      0.028752
1      1.984141
2      0.098934
3      0.737032
4      0.141874
         ...   
640    1.669709
641    1.115644
642   -1.745181
643   -0.174405
644   -1.325015
Name: sales_office_employed, Length: 645, dtype: float64

In [None]:
df_train=df_train.drop(["sales_office_employed", ], axis=1)

In [491]:
forward_selected_features=sorted(forward_selected_features)
forward_selected_features.extend(
    ["zip_code", "year", "Yearly_Avg_Zip"])

In [492]:
forward_selected_features

['FLSTHPI_Yearly_Avg',
 'FL_Unemployment',
 'children',
 'children_in_single_female_hh',
 'commute_10_14_mins',
 'commute_35_44_mins',
 'commute_45_59_mins',
 'commute_less_10_mins',
 'commuters_by_bus',
 'different_house_year_ago_same_city',
 'dwellings_20_to_49_units',
 'dwellings_50_or_more_units',
 'dwellings_5_to_9_units',
 'employed_arts_entertainment_recreation_accommodation_food',
 'employed_public_administration',
 'employed_transportation_warehousing_utilities',
 'female_50_to_54',
 'female_5_to_9',
 'female_60_to_61',
 'female_62_to_64',
 'female_65_to_66',
 'gini_index',
 'group_quarters',
 'households_public_asst_or_food_stamps',
 'housing_units',
 'in_grades_5_to_8',
 'income_100000_124999',
 'income_125000_149999',
 'income_150000_199999',
 'income_15000_19999',
 'income_200000_or_more',
 'income_30000_34999',
 'income_40000_44999',
 'income_50000_59999',
 'less_one_year_college',
 'male_15_to_17',
 'male_25_to_29',
 'male_35_to_39',
 'male_45_64_associates_degree',
 'ma

In [495]:
df_train_final.head()

Unnamed: 0,FLSTHPI_Yearly_Avg,FL_Unemployment,children,children_in_single_female_hh,commute_10_14_mins,commute_35_44_mins,commute_45_59_mins,commute_less_10_mins,commuters_by_bus,different_house_year_ago_same_city,...,sales_office_employed,some_college_and_associates_degree,two_parents_father_in_labor_force_families_with_young_children,two_parents_mother_in_labor_force_families_with_young_children,unemployed_pop,vacant_housing_units_for_rent,walked_to_work,zip_code,year,Yearly_Avg_Zip
0,-1.675376,2.005619,-0.604674,-0.787676,-0.226279,2.314692,0.775059,-0.283637,-0.042198,-0.761251,...,0.028752,0.383085,0.33959,-0.248349,-0.412005,1.612705,0.303524,33160,2013,1866.833333
1,-1.675376,2.005619,1.82154,1.470348,0.892614,2.283387,2.167135,0.289622,-0.120738,0.660936,...,1.984141,1.883253,0.692808,-0.21312,1.801816,2.143672,-0.503603,33025,2013,1341.083333
2,-1.675376,2.005619,-0.851691,-0.942912,3.052789,0.157246,-0.599016,3.642101,1.428648,1.816176,...,0.098934,0.157775,-0.02912,0.720441,-0.469948,2.936081,9.543007,33139,2013,1842.666667
3,-1.675376,2.005619,-0.009587,-0.380379,2.120162,-0.884955,-0.875031,1.67379,-0.677661,2.861721,...,0.737032,0.256348,1.538673,-0.036977,0.216179,0.569637,-0.41227,32256,2013,947.75
4,-1.675376,2.005619,-0.399941,-0.187782,-0.93254,0.564213,0.041019,-0.704511,0.329083,0.312677,...,0.141874,0.471171,-0.317272,0.051095,1.025553,0.014412,-0.225357,33009,2013,1443.416667


In [496]:
df_2013_to_2017=df_train_final[df_train_final["year"].isin(
    [2013,2014,2015,2016,2017])]

In [497]:
df_2018=df_train_final[df_train_final["year"]==2018]

In [498]:
df_2013_to_2017.shape

(518, 62)

In [499]:
df_2018.shape

(127, 62)

## Linear Models on the 4 Biggest Metro Areas in FL

In [500]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV, GridSearchCV


In [501]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=3, shuffle=True, random_state=0)

In [502]:
from sklearn import linear_model
from sklearn.linear_model import LinearRegression 

In [503]:
%store -r miami_zip
%store -r orlando_zip
%store -r tampa_zip
%store -r jax_zip

## Miami

In [565]:
df_2013_to_2017_miami=df_2013_to_2017[
    df_2013_to_2017["zip_code"].isin(miami_zip)]

df_2013_to_2017_miami=df_2013_to_2017_miami.drop(["zip_code","year"],axis=1)
#dropping unique identifiers

In [566]:
df_2018_miami=df_2018[df_2018["zip_code"].isin(miami_zip)]
df_2018_miami=df_2018_miami.drop(["zip_code","year"],axis=1)

### Linear Regression

In [567]:
ols_miami = linear_model.LinearRegression()
ols_miami.fit(df_2013_to_2017_miami.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_miami.Yearly_Avg_Zip)

LinearRegression()

In [568]:
ols_miami.score(df_2018_miami.drop(columns = 'Yearly_Avg_Zip'), 
               df_2018_miami.Yearly_Avg_Zip)

0.7439390242269595

In [569]:
rmse_test_ols_miami=np.sqrt(
    mean_squared_error
    (df_2018_miami.Yearly_Avg_Zip, 
    ols_miami.predict(df_2018_miami.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_ols_miami)

122.13801400677313


### Lasso Regression

In [570]:
lasso_miami=Lasso()

In [571]:
lasso_miami.fit(df_2013_to_2017_miami.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_miami.Yearly_Avg_Zip)

  model = cd_fast.enet_coordinate_descent(


Lasso()

In [572]:
rmse_test_lasso_miami=np.sqrt(
    mean_squared_error
    (df_2018_miami.Yearly_Avg_Zip, 
    lasso_miami.predict(df_2018_miami.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_lasso_miami)

140.25559175342505


In [573]:
from sklearn.model_selection import GridSearchCV

lasso_params = {
    "alpha": list(np.logspace(-8,2,11))
    ,"max_iter": [10, 100, 1000, 10000]
    , "tol": list(np.logspace(-8,0,9))
    
}

lasso_miami_gs = GridSearchCV(lasso_miami, lasso_params, cv=kfold)
lasso_miami_gs.fit(df_2013_to_2017_miami.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_miami.Yearly_Avg_Zip)
print("The best parameters are: ", lasso_miami_gs.best_params_)
lasso_miami_gs.cv_results_['mean_test_score']

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

The best parameters are:  {'alpha': 0.1, 'max_iter': 10000, 'tol': 0.01}


array([0.81818037, 0.81818037, 0.81818037, 0.81818037, 0.81818037,
       0.81818037, 0.81818037, 0.80040537, 0.77977921, 0.83607534,
       0.83607534, 0.83607534, 0.83607534, 0.83607534, 0.83607534,
       0.83607534, 0.80040537, 0.77977921, 0.8410742 , 0.8410742 ,
       0.8410742 , 0.8410742 , 0.8410742 , 0.8410742 , 0.8410742 ,
       0.80040537, 0.77977921, 0.84367913, 0.84367913, 0.84367913,
       0.84367913, 0.84367913, 0.84367913, 0.84367913, 0.80040537,
       0.77977921, 0.81818037, 0.81818037, 0.81818037, 0.81818037,
       0.81818037, 0.81818037, 0.81818037, 0.80040537, 0.77977921,
       0.83607534, 0.83607534, 0.83607534, 0.83607534, 0.83607534,
       0.83607534, 0.83607534, 0.80040537, 0.77977921, 0.8410742 ,
       0.8410742 , 0.8410742 , 0.8410742 , 0.8410742 , 0.8410742 ,
       0.8410742 , 0.80040537, 0.77977921, 0.84367913, 0.84367913,
       0.84367913, 0.84367913, 0.84367913, 0.84367913, 0.84367913,
       0.80040537, 0.77977921, 0.81818038, 0.81818038, 0.81818

In [577]:
print("The best parameters are: ", lasso_miami_gs.best_params_)

The best parameters are:  {'alpha': 0.1, 'max_iter': 10000, 'tol': 0.01}


In [578]:
lasso_miami_gs.score(df_2018_miami.drop(columns = 'Yearly_Avg_Zip'), 
               df_2018_miami.Yearly_Avg_Zip)

0.7404640701944775

In [579]:
lasso_miami_tuned=Lasso(**lasso_miami_gs.best_params_)

In [580]:
lasso_miami_tuned.fit(df_2013_to_2017_miami.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_miami.Yearly_Avg_Zip)

Lasso(alpha=0.1, max_iter=10000, tol=0.01)

In [581]:
rmse_test_lasso_miami=np.sqrt(
    mean_squared_error
    (df_2018_miami.Yearly_Avg_Zip, 
    lasso_miami_tuned.predict(df_2018_miami.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_lasso_miami)

122.9639769000837


In [582]:
df_2018_miami.Yearly_Avg_Zip.describe()

count      66.000000
mean     1705.161780
std       243.217290
min      1316.166667
25%      1513.875000
50%      1677.041667
75%      1815.833333
max      2294.250000
Name: Yearly_Avg_Zip, dtype: float64

### Cross Validation on the train set (data in 2013-2017)

In [635]:
X_train, X_test, y_train, y_test = train_test_split(
    df_2013_to_2017_miami.drop(columns = 'Yearly_Avg_Zip'), 
    df_2013_to_2017_miami.Yearly_Avg_Zip, 
    test_size=0.3, random_state=0)

In [636]:
lasso_miami_tuned_scores = cross_val_score(lasso_miami_tuned, 
                                           X_train, y_train, cv=kfold)
print(lasso_miami_tuned_scores)
np.mean(lasso_miami_tuned_scores)

[0.82849333 0.63764471 0.78222631]


0.749454784151424

In [639]:
lasso_miami_tuned.score(X_test, y_test)

0.9041034804787595

### Random Forest

In [596]:
rf_miami = RandomForestRegressor(random_state=0)
rf_miami.fit(df_2013_to_2017_miami.drop(columns = 'Yearly_Avg_Zip'), 
             df_2013_to_2017_miami.Yearly_Avg_Zip)

RandomForestRegressor(random_state=0)

In [597]:
feature_importances_miami = pd.Series(rf_miami.feature_importances_, 
                                index=df_2013_to_2017_miami.drop(columns = 'Yearly_Avg_Zip').columns)
feature_importances_miami=feature_importances_miami.sort_values(ascending=False)
feature_importances_miami.head(10)

owner_occupied_housing_units_median_value    0.455664
income_200000_or_more                        0.042689
commute_less_10_mins                         0.031588
male_male_households                         0.028170
not_us_citizen_pop                           0.027658
gini_index                                   0.025275
dwellings_50_or_more_units                   0.025146
dwellings_5_to_9_units                       0.022776
children_in_single_female_hh                 0.022239
male_35_to_39                                0.019940
dtype: float64

## Orlando

In [620]:
df_2013_to_2017_orlando=df_2013_to_2017[
    df_2013_to_2017["zip_code"].isin(orlando_zip)]

df_2013_to_2017_orlando=df_2013_to_2017_orlando.drop(["zip_code","year"],axis=1)
#dropping unique identifiers

In [621]:
df_2018_orlando=df_2018[df_2018["zip_code"].isin(orlando_zip)]
df_2018_orlando=df_2018_orlando.drop(["zip_code","year"],axis=1)

## Linear Regression

In [622]:
ols_orlando = linear_model.LinearRegression()
ols_orlando.fit(df_2013_to_2017_orlando.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_orlando.Yearly_Avg_Zip)

LinearRegression()

In [623]:
ols_orlando.score(df_2018_orlando.drop(columns = 'Yearly_Avg_Zip'), 
               df_2018_orlando.Yearly_Avg_Zip)

0.003973089319027068

In [625]:
rmse_test_ols_orlando=np.sqrt(
    mean_squared_error
    (df_2018_orlando.Yearly_Avg_Zip, 
    ols_orlando.predict(df_2018_orlando.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_ols_orlando)

107.19703811779303


## Lasso Regression

In [626]:
lasso_orlando=Lasso()

In [627]:
lasso_orlando.fit(df_2013_to_2017_orlando.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_orlando.Yearly_Avg_Zip)

Lasso()

In [628]:
rmse_test_lasso_orlando=np.sqrt(
    mean_squared_error
    (df_2018_orlando.Yearly_Avg_Zip, 
    lasso_orlando.predict(df_2018_orlando.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_lasso_orlando)

86.31817190456071


In [629]:
from sklearn.model_selection import GridSearchCV

lasso_params = {
    "alpha": list(np.logspace(-8,2,11))
    ,"max_iter": [10, 100, 1000, 10000]
    , "tol": list(np.logspace(-8,0,9))
    
}

lasso_orlando_gs = GridSearchCV(lasso_orlando, lasso_params, cv=kfold)
lasso_orlando_gs.fit(df_2013_to_2017_orlando.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_orlando.Yearly_Avg_Zip)
lasso_orlando_gs.cv_results_['mean_test_score']

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

array([0.92734596, 0.92734596, 0.92734596, 0.92734596, 0.92734596,
       0.92734596, 0.92734596, 0.93565483, 0.92852877, 0.92608783,
       0.92608783, 0.92608783, 0.92608783, 0.92608783, 0.92608783,
       0.93035608, 0.93565483, 0.92852877, 0.67509922, 0.67509922,
       0.67509922, 0.67509922, 0.67509922, 0.74461111, 0.93035608,
       0.93565483, 0.92852877, 0.52510326, 0.52510326, 0.52510326,
       0.52510326, 0.58088119, 0.74461111, 0.93035608, 0.93565483,
       0.92852877, 0.92734596, 0.92734596, 0.92734596, 0.92734596,
       0.92734596, 0.92734596, 0.92734596, 0.93565483, 0.92852877,
       0.92608785, 0.92608785, 0.92608785, 0.92608785, 0.92608785,
       0.92608785, 0.93035608, 0.93565483, 0.92852877, 0.67509976,
       0.67509976, 0.67509976, 0.67509976, 0.67509976, 0.74461136,
       0.93035608, 0.93565483, 0.92852877, 0.52510567, 0.52510567,
       0.52510567, 0.52510567, 0.58087546, 0.74461136, 0.93035608,
       0.93565483, 0.92852877, 0.92734596, 0.92734596, 0.92734

In [630]:
print("The best parameters are: ", lasso_orlando_gs.best_params_)

The best parameters are:  {'alpha': 1e-08, 'max_iter': 10, 'tol': 0.1}


In [631]:
lasso_orlando_gs.score(df_2018_orlando.drop(columns = 'Yearly_Avg_Zip'), 
               df_2018_orlando.Yearly_Avg_Zip)

0.27833827569822733

In [632]:
lasso_orlando_tuned=Lasso(**lasso_orlando_gs.best_params_)

In [633]:
lasso_orlando_tuned.fit(df_2013_to_2017_orlando.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_orlando.Yearly_Avg_Zip)

Lasso(alpha=1e-08, max_iter=10, tol=0.1)

In [634]:
rmse_test_lasso_orlando=np.sqrt(
    mean_squared_error
    (df_2018_orlando.Yearly_Avg_Zip, 
    lasso_orlando_tuned.predict(df_2018_orlando.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_lasso_orlando)

91.24605260195403


### Cross Validation on the train set (data in 2013-2017)

In [640]:
X_train, X_test, y_train, y_test = train_test_split(
    df_2013_to_2017_orlando.drop(columns = 'Yearly_Avg_Zip'), 
    df_2013_to_2017_orlando.Yearly_Avg_Zip, 
    test_size=0.3, random_state=0)

In [642]:
lasso_orlando_tuned_scores = cross_val_score(lasso_orlando_tuned, 
                                           X_train, y_train, cv=kfold)
print(lasso_orlando_tuned_scores)
np.mean(lasso_orlando_tuned_scores)

[0.82214155 0.71027804 0.938578  ]


0.8236658627405234

In [643]:
lasso_orlando_tuned.score(X_test, y_test)

0.9728991216349766

### Random Forest

In [644]:
rf_orlando = RandomForestRegressor(random_state=0)
rf_orlando.fit(df_2013_to_2017_orlando.drop(columns = 'Yearly_Avg_Zip'), 
             df_2013_to_2017_orlando.Yearly_Avg_Zip)

RandomForestRegressor(random_state=0)

In [645]:
feature_importances_orlando = pd.Series(rf_orlando.feature_importances_, 
                                index=df_2013_to_2017_orlando.drop(columns = 'Yearly_Avg_Zip').columns)
feature_importances_orlando=feature_importances_orlando.sort_values(ascending=False)
feature_importances_orlando.head(10)

owner_occupied_housing_units_median_value                         0.258367
FLSTHPI_Yearly_Avg                                                0.184349
FL_Unemployment                                                   0.171718
unemployed_pop                                                    0.061277
male_85_and_over                                                  0.025457
income_15000_19999                                                0.024772
two_parents_mother_in_labor_force_families_with_young_children    0.020358
male_male_households                                              0.017856
employed_arts_entertainment_recreation_accommodation_food         0.016793
vacant_housing_units_for_rent                                     0.016375
dtype: float64

## Tampa

In [646]:
df_2013_to_2017_tampa=df_2013_to_2017[
    df_2013_to_2017["zip_code"].isin(tampa_zip)]

df_2013_to_2017_tampa=df_2013_to_2017_tampa.drop(["zip_code","year"],axis=1)
#dropping unique identifiers

In [647]:
df_2018_tampa=df_2018[df_2018["zip_code"].isin(tampa_zip)]
df_2018_tampa=df_2018_tampa.drop(["zip_code","year"],axis=1)

### Linear Regression

In [648]:
ols_tampa = linear_model.LinearRegression()
ols_tampa.fit(df_2013_to_2017_tampa.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_tampa.Yearly_Avg_Zip)

LinearRegression()

In [649]:
ols_tampa.score(df_2018_tampa.drop(columns = 'Yearly_Avg_Zip'), 
               df_2018_tampa.Yearly_Avg_Zip)

0.7556507826905067

In [650]:
rmse_test_ols_tampa=np.sqrt(
    mean_squared_error
    (df_2018_tampa.Yearly_Avg_Zip, 
    ols_tampa.predict(df_2018_tampa.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_ols_tampa)

86.96108497175207


### Lasso Regression

In [651]:
lasso_tampa=Lasso()

In [652]:
lasso_tampa.fit(df_2013_to_2017_tampa.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_tampa.Yearly_Avg_Zip)

Lasso()

In [653]:
rmse_test_lasso_tampa=np.sqrt(
    mean_squared_error
    (df_2018_tampa.Yearly_Avg_Zip, 
    lasso_tampa.predict(df_2018_tampa.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_lasso_tampa)

102.7709851037753


In [654]:
from sklearn.model_selection import GridSearchCV

lasso_params = {
    "alpha": list(np.logspace(-8,2,11))
    ,"max_iter": [10, 100, 1000, 10000]
    , "tol": list(np.logspace(-8,0,9))
    
}

lasso_tampa_gs = GridSearchCV(lasso_tampa, lasso_params, cv=kfold)
lasso_tampa_gs.fit(df_2013_to_2017_tampa.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_tampa.Yearly_Avg_Zip)
print("The best parameters are: ", lasso_tampa_gs.best_params_)
lasso_tampa_gs.cv_results_['mean_test_score']

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


The best parameters are:  {'alpha': 0.1, 'max_iter': 10000, 'tol': 1e-08}


array([ 0.82636364,  0.82636364,  0.82636364,  0.82636364,  0.82636364,
        0.82636364,  0.82636364,  0.79086128,  0.75121536,  0.87561802,
        0.87561802,  0.87561802,  0.87561802,  0.87561802,  0.87561802,
        0.85835217,  0.79086128,  0.75121536,  0.82425836,  0.82425836,
        0.82425836,  0.82425836,  0.82425836,  0.82425836,  0.84587354,
        0.79086128,  0.75121536,  0.70155455,  0.70155455,  0.70155455,
        0.70155455,  0.70155455,  0.70155455,  0.84587354,  0.79086128,
        0.75121536,  0.82636364,  0.82636364,  0.82636364,  0.82636364,
        0.82636364,  0.82636364,  0.82636364,  0.79086128,  0.75121536,
        0.87561802,  0.87561802,  0.87561802,  0.87561802,  0.87561802,
        0.87561802,  0.85835218,  0.79086128,  0.75121536,  0.82425847,
        0.82425847,  0.82425847,  0.82425847,  0.82425847,  0.82425847,
        0.84587356,  0.79086128,  0.75121536,  0.70155545,  0.70155545,
        0.70155545,  0.70155545,  0.70155545,  0.70155545,  0.84

In [655]:
print("The best parameters are: ", lasso_tampa_gs.best_params_)

The best parameters are:  {'alpha': 0.1, 'max_iter': 10000, 'tol': 1e-08}


In [656]:
lasso_tampa_gs.score(df_2018_tampa.drop(columns = 'Yearly_Avg_Zip'), 
               df_2018_tampa.Yearly_Avg_Zip)

0.7325601135546038

In [657]:
lasso_tampa_tuned=Lasso(**lasso_tampa_gs.best_params_)

In [658]:
lasso_tampa_tuned.fit(df_2013_to_2017_tampa.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_tampa.Yearly_Avg_Zip)

Lasso(alpha=0.1, max_iter=10000, tol=1e-08)

In [659]:
rmse_test_lasso_tampa=np.sqrt(
    mean_squared_error
    (df_2018_tampa.Yearly_Avg_Zip, 
    lasso_tampa_tuned.predict(df_2018_tampa.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_lasso_tampa)

90.97719929178346


### Cross Validation on the train set (data in 2013-2017)

In [660]:
X_train, X_test, y_train, y_test = train_test_split(
    df_2013_to_2017_tampa.drop(columns = 'Yearly_Avg_Zip'), 
    df_2013_to_2017_tampa.Yearly_Avg_Zip, 
    test_size=0.3, random_state=0)

In [661]:
lasso_tampa_tuned_scores = cross_val_score(lasso_tampa_tuned, 
                                           X_train, y_train, cv=kfold)
print(lasso_tampa_tuned_scores)
np.mean(lasso_tampa_tuned_scores)

[ 0.89949114 -0.46682757  0.44224387]


  model = cd_fast.enet_coordinate_descent(


0.2916358156811291

In [662]:
lasso_tampa_tuned.score(X_test, y_test)

0.983001105259894

### Random Forest

In [663]:
rf_tampa = RandomForestRegressor(random_state=0)
rf_tampa.fit(df_2013_to_2017_tampa.drop(columns = 'Yearly_Avg_Zip'), 
             df_2013_to_2017_tampa.Yearly_Avg_Zip)

RandomForestRegressor(random_state=0)

In [665]:
feature_importances_tampa = pd.Series(rf_tampa.feature_importances_, 
                                index=df_2013_to_2017_tampa.drop(columns = 'Yearly_Avg_Zip').columns)
feature_importances_tampa=feature_importances_tampa.sort_values(ascending=False)
feature_importances_tampa.head(10)

unemployed_pop                               0.211922
income_40000_44999                           0.070736
income_30000_34999                           0.069585
income_200000_or_more                        0.064871
FL_Unemployment                              0.061388
owner_occupied_housing_units_median_value    0.058847
FLSTHPI_Yearly_Avg                           0.056798
male_45_64_grade_9_12                        0.033159
income_15000_19999                           0.030899
dwellings_50_or_more_units                   0.026100
dtype: float64

## Jacksonville

In [666]:
df_2013_to_2017_jax=df_2013_to_2017[
    df_2013_to_2017["zip_code"].isin(jax_zip)]

df_2013_to_2017_jax=df_2013_to_2017_jax.drop(["zip_code","year"],axis=1)
#dropping unique identifiers

In [667]:
df_2018_jax=df_2018[df_2018["zip_code"].isin(jax_zip)]
df_2018_jax=df_2018_jax.drop(["zip_code","year"],axis=1)

### Linear Regression

In [668]:
ols_jax = linear_model.LinearRegression()
ols_jax.fit(df_2013_to_2017_jax.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_jax.Yearly_Avg_Zip)

LinearRegression()

In [669]:
ols_jax.score(df_2018_jax.drop(columns = 'Yearly_Avg_Zip'), 
               df_2018_jax.Yearly_Avg_Zip)

0.8997533588545343

In [670]:
rmse_test_ols_jax=np.sqrt(
    mean_squared_error
    (df_2018_jax.Yearly_Avg_Zip, 
    ols_jax.predict(df_2018_jax.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_ols_jax)

66.27851721048069


### Lasso Regression

In [671]:
lasso_jax=Lasso()

In [672]:
lasso_jax.fit(df_2013_to_2017_jax.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_jax.Yearly_Avg_Zip)

Lasso()

In [673]:
rmse_test_lasso_jax=np.sqrt(
    mean_squared_error
    (df_2018_jax.Yearly_Avg_Zip, 
    lasso_jax.predict(df_2018_jax.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_lasso_jax)

37.8609401530369


In [674]:
from sklearn.model_selection import GridSearchCV

lasso_params = {
    "alpha": list(np.logspace(-8,2,11))
    ,"max_iter": [10, 100, 1000, 10000]
    , "tol": list(np.logspace(-8,0,9))
    
}

lasso_jax_gs = GridSearchCV(lasso_jax, lasso_params, cv=kfold)
lasso_jax_gs.fit(df_2013_to_2017_jax.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_jax.Yearly_Avg_Zip)
print("The best parameters are: ", lasso_jax_gs.best_params_)
lasso_jax_gs.cv_results_['mean_test_score']

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

The best parameters are:  {'alpha': 0.1, 'max_iter': 10000, 'tol': 0.001}


array([0.72014823, 0.72014823, 0.72014823, 0.72014823, 0.72014823,
       0.72014823, 0.72014823, 0.70266617, 0.5924276 , 0.62401555,
       0.62401555, 0.62401555, 0.62401555, 0.62401555, 0.62401555,
       0.69474167, 0.70266617, 0.5924276 , 0.45626205, 0.45626205,
       0.45626205, 0.45626205, 0.4610602 , 0.56613192, 0.69474167,
       0.70266617, 0.5924276 , 0.44836488, 0.44837099, 0.44844484,
       0.4493507 , 0.4610602 , 0.56613192, 0.69474167, 0.70266617,
       0.5924276 , 0.72014824, 0.72014824, 0.72014824, 0.72014824,
       0.72014824, 0.72014824, 0.72014824, 0.70266618, 0.5924276 ,
       0.62401588, 0.62401588, 0.62401588, 0.62401588, 0.62401588,
       0.62401588, 0.69474171, 0.70266618, 0.5924276 , 0.45626588,
       0.45626588, 0.45626588, 0.45626588, 0.46106302, 0.56613285,
       0.69474171, 0.70266618, 0.5924276 , 0.44838144, 0.44837938,
       0.44845157, 0.44935528, 0.46106302, 0.56613285, 0.69474171,
       0.70266618, 0.5924276 , 0.72014838, 0.72014838, 0.72014

In [675]:
print("The best parameters are: ", lasso_jax_gs.best_params_)

The best parameters are:  {'alpha': 0.1, 'max_iter': 10000, 'tol': 0.001}


In [676]:
lasso_jax_gs.score(df_2018_jax.drop(columns = 'Yearly_Avg_Zip'), 
               df_2018_jax.Yearly_Avg_Zip)

0.9671651264200801

In [677]:
lasso_jax_tuned=Lasso(**lasso_jax_gs.best_params_)

In [678]:
lasso_jax_tuned.fit(df_2013_to_2017_jax.drop(columns = 'Yearly_Avg_Zip'), 
              df_2013_to_2017_jax.Yearly_Avg_Zip)

Lasso(alpha=0.1, max_iter=10000, tol=0.001)

In [679]:
rmse_test_lasso_jax=np.sqrt(
    mean_squared_error
    (df_2018_jax.Yearly_Avg_Zip, 
    lasso_jax_tuned.predict(df_2018_jax.drop(columns = 'Yearly_Avg_Zip'))
    )
)
print(rmse_test_lasso_jax)

37.93198262730631


### Cross Validation on the train set (data in 2013-2017)

In [680]:
X_train, X_test, y_train, y_test = train_test_split(
    df_2013_to_2017_jax.drop(columns = 'Yearly_Avg_Zip'), 
    df_2013_to_2017_jax.Yearly_Avg_Zip, 
    test_size=0.3, random_state=0)

In [681]:
lasso_jax_tuned_scores = cross_val_score(lasso_jax_tuned, 
                                           X_train, y_train, cv=kfold)
print(lasso_jax_tuned_scores)
np.mean(lasso_jax_tuned_scores)

[0.6774788  0.39114405 0.6033645 ]


0.5573291192012199

In [682]:
lasso_jax_tuned.score(X_test, y_test)

0.9983197604003861

### Random Forest

In [683]:
rf_jax = RandomForestRegressor(random_state=0)
rf_jax.fit(df_2013_to_2017_jax.drop(columns = 'Yearly_Avg_Zip'), 
             df_2013_to_2017_jax.Yearly_Avg_Zip)

RandomForestRegressor(random_state=0)

In [684]:
feature_importances_jax = pd.Series(rf_jax.feature_importances_, 
                                index=df_2013_to_2017_jax.drop(columns = 'Yearly_Avg_Zip').columns)
feature_importances_jax=feature_importances_jax.sort_values(ascending=False)
feature_importances_jax.head(10)

households_public_asst_or_food_stamps        0.478428
income_200000_or_more                        0.076912
owner_occupied_housing_units_median_value    0.065419
commuters_by_bus                             0.056402
median_year_structure_built                  0.042046
walked_to_work                               0.027739
income_125000_149999                         0.024417
income_30000_34999                           0.017101
FL_Unemployment                              0.016970
income_150000_199999                         0.015011
dtype: float64