# Chicago COVID mobility data

## Set up

In [2]:
import pandas as pd
import numpy as np

### Load features

In [3]:
df_features_census = pd.read_csv('./data/census_processed.csv', dtype={'geo_12': 'str'})
df_features_census["geo_12"] = df_features_census["GEO_ID"].map(lambda x: str(x)[-12:])
df_features_census.drop(["GEO_ID"], axis=1, inplace=True)
df_features_census.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3993 entries, 0 to 3992
Data columns (total 16 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   Median_Income                            3836 non-null   float64
 1   Median_Age                               3993 non-null   float64
 2   Percent_NonCitizen                       3989 non-null   float64
 3   Percent_SpeakEngl_Poorly                 3989 non-null   float64
 4   Percent_less_than_HS                     3989 non-null   float64
 5   Percent_HS                               3989 non-null   float64
 6   Percent_SomeCollege                      3989 non-null   float64
 7   Percent_Bach                             3989 non-null   float64
 8   Percent_Grad                             3989 non-null   float64
 9   Percent_No_vehicals                      3989 non-null   float64
 10  Percent_Received_SNAP                    3989 no

In [4]:
df_features_places = pd.read_csv('./data/places_count_by_census_block.csv', dtype={'geo_12': 'str'})
df_features_places.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2194 entries, 0 to 2193
Data columns (total 11 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   geo_12                                               2194 non-null   object 
 1   automotive_repair_and_maintenance                    2194 non-null   float64
 2   child_day_care_services                              2194 non-null   float64
 3   elementary_and_secondary_schools                     2194 non-null   float64
 4   grocery_stores                                       2194 non-null   float64
 5   health_and_personal_care_stores                      2194 non-null   float64
 6   museums,_historical_sites,_and_similar_institutions  2194 non-null   float64
 7   offices_of_physicians                                2194 non-null   float64
 8   other_amusement_and_recreation_industries            2194 non-null  

In [5]:
df_features = df_features_places.merge(df_features_census, on='geo_12')
df_features.head()

Unnamed: 0,geo_12,automotive_repair_and_maintenance,child_day_care_services,elementary_and_secondary_schools,grocery_stores,health_and_personal_care_stores,"museums,_historical_sites,_and_similar_institutions",offices_of_physicians,other_amusement_and_recreation_industries,religious_organizations,...,Percent_HS,Percent_SomeCollege,Percent_Bach,Percent_Grad,Percent_No_vehicals,Percent_Received_SNAP,Percent_Men_Usually_Fulltime_Employed,Percent_Women_Usually_Fulltime_Employed,Percent_No_Internet_Access,Percent_Computing_Device
0,170310101001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.168511,0.255138,0.289598,0.17926,0.344589,0.18696,0.885906,0.382653,0.217742,0.782258
1,170310101002,0.0,1.0,1.0,0.0,0.0,3.0,1.0,2.0,1.0,...,0.168511,0.255138,0.289598,0.17926,0.344589,0.18696,0.507064,0.452071,0.330517,0.829989
2,170310101003,0.0,1.0,0.0,0.0,0.0,3.0,0.0,1.0,1.0,...,0.168511,0.255138,0.289598,0.17926,0.344589,0.18696,0.765318,0.663338,0.074041,0.950045
3,170310102011,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.24611,0.246314,0.193898,0.114251,0.140014,0.316592,0.559184,0.437107,0.10503,0.921598
4,170310102012,1.0,4.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.24611,0.246314,0.193898,0.114251,0.140014,0.316592,0.495495,0.3208,0.202247,0.822868


## Targets - diff data 2019-2020
### Load targets

In [6]:
df_targets_diff = pd.read_csv('./data/COVID_mobility_targets_adjusted.csv', dtype={'geo_12': 'str'})
df_targets_diff.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58062 entries, 0 to 58061
Data columns (total 6 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   geo_12                        58062 non-null  object 
 1   Week                          58062 non-null  int64  
 2   fraction_of_devices_home_adj  58062 non-null  float64
 3   fraction_of_devices_work_adj  58062 non-null  float64
 4   fraction_time_away_all_adj    58062 non-null  float64
 5   fraction_time_away_leave_adj  58062 non-null  float64
dtypes: float64(4), int64(1), object(1)
memory usage: 2.7+ MB


### Create dataframe

In [7]:
df_diff = df_features.merge(df_targets_diff, on='geo_12')
df_diff.columns

Index(['geo_12', 'automotive_repair_and_maintenance',
       'child_day_care_services', 'elementary_and_secondary_schools',
       'grocery_stores', 'health_and_personal_care_stores',
       'museums,_historical_sites,_and_similar_institutions',
       'offices_of_physicians', 'other_amusement_and_recreation_industries',
       'religious_organizations', 'restaurants_and_other_eating_places',
       'Median_Income', 'Median_Age', 'Percent_NonCitizen',
       'Percent_SpeakEngl_Poorly', 'Percent_less_than_HS', 'Percent_HS',
       'Percent_SomeCollege', 'Percent_Bach', 'Percent_Grad',
       'Percent_No_vehicals', 'Percent_Received_SNAP',
       'Percent_Men_Usually_Fulltime_Employed',
       'Percent_Women_Usually_Fulltime_Employed', 'Percent_No_Internet_Access',
       'Percent_Computing_Device', 'Week', 'fraction_of_devices_home_adj',
       'fraction_of_devices_work_adj', 'fraction_time_away_all_adj',
       'fraction_time_away_leave_adj'],
      dtype='object')

## Feature & Target selection

### Look at correlations between features and targets to get an idea about which features might be important

In [7]:
df_diff.corr()

Unnamed: 0,automotive_repair_and_maintenance,child_day_care_services,elementary_and_secondary_schools,grocery_stores,health_and_personal_care_stores,"museums,_historical_sites,_and_similar_institutions",offices_of_physicians,other_amusement_and_recreation_industries,religious_organizations,restaurants_and_other_eating_places,...,Percent_Received_SNAP,Percent_Men_Usually_Fulltime_Employed,Percent_Women_Usually_Fulltime_Employed,Percent_No_Internet_Access,Percent_Computing_Device,Week,fraction_of_devices_home_adj,fraction_of_devices_work_adj,fraction_time_away_all_adj,fraction_time_away_leave_adj
automotive_repair_and_maintenance,1.0,0.089246,0.043666,0.108608,0.071862,0.030947,0.014925,0.08091,-0.005233,0.055909,...,-0.029272,0.05315894,-0.010992,0.01662934,-0.01114432,-4.953872e-05,-0.013703,0.048549,0.045345,0.049201
child_day_care_services,0.089246,1.0,0.166462,0.120419,0.15996,0.202457,0.054236,0.14237,0.154368,0.192267,...,0.046516,-0.04779458,-0.001662,0.02017015,-0.02424201,-5.931032e-05,0.039487,0.01892,-0.064054,-0.047576
elementary_and_secondary_schools,0.043666,0.166462,1.0,0.034539,0.032242,0.072436,0.060065,0.035593,0.189967,0.008721,...,0.075651,-0.08904497,-0.068671,0.02584205,-0.0123293,-6.893552e-05,0.067352,-0.002546,-0.033289,0.003425
grocery_stores,0.108608,0.120419,0.034539,1.0,0.355827,0.258992,0.027259,0.305795,-0.01788,0.481652,...,-0.018075,0.05612708,0.005673,-0.02143545,0.02017757,-5.297937e-05,0.059446,0.032216,-0.061237,-0.037467
health_and_personal_care_stores,0.071862,0.15996,0.032242,0.355827,1.0,0.402859,0.31526,0.507238,-0.03406,0.70113,...,-0.196355,0.1437923,0.142086,-0.1585893,0.1441897,7.131526e-05,-0.006413,0.065427,-0.114838,-0.132697
"museums,_historical_sites,_and_similar_institutions",0.030947,0.202457,0.072436,0.258992,0.402859,1.0,0.116484,0.426369,-0.02098,0.519153,...,-0.138718,0.08443492,0.095723,-0.1330151,0.1154594,0.0007323064,0.016536,0.050762,-0.119953,-0.126229
offices_of_physicians,0.014925,0.054236,0.060065,0.027259,0.31526,0.116484,1.0,0.146924,0.008118,0.175234,...,-0.112313,0.07606267,0.072904,-0.08121441,0.06991075,0.0008567673,-0.003934,0.031176,-0.039926,-0.04369
other_amusement_and_recreation_industries,0.08091,0.14237,0.035593,0.305795,0.507238,0.426369,0.146924,1.0,-0.043326,0.551663,...,-0.272237,0.2380633,0.256404,-0.2423743,0.2176391,2.957543e-05,-0.057431,0.122403,-0.130112,-0.196238
religious_organizations,-0.005233,0.154368,0.189967,-0.01788,-0.03406,-0.02098,0.008118,-0.043326,1.0,-0.05011,...,0.29404,-0.274231,-0.137731,0.2057544,-0.1575955,-7.195416e-05,0.14731,-0.076534,-0.03428,0.044237
restaurants_and_other_eating_places,0.055909,0.192267,0.008721,0.481652,0.70113,0.519153,0.175234,0.551663,-0.05011,1.0,...,-0.197815,0.1685919,0.147573,-0.1808712,0.1650446,-3.163242e-05,0.012552,0.069365,-0.162599,-0.178266


Looks like a lot of correlations that are not very strong.

### Split into training and testing

In [8]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df_diff, test_size=0.2)
train.is_copy = None
test.is_copy = None


### Impute missing

In [9]:
train['Median_Income'].fillna((train['Median_Income'].median()), inplace=True)
test['Median_Income'].fillna((train['Median_Income'].median()), inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


In [10]:
train.dropna(inplace=True)
test.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


### Regression model - feature & target selection

In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

features = ['automotive_repair_and_maintenance',
            'child_day_care_services', 'elementary_and_secondary_schools',
            'grocery_stores', 'health_and_personal_care_stores',
            'museums,_historical_sites,_and_similar_institutions',
            'offices_of_physicians', 'other_amusement_and_recreation_industries',
            'religious_organizations', 'restaurants_and_other_eating_places',
            'Median_Income', 'Median_Age', 'Percent_NonCitizen',
            'Percent_SpeakEngl_Poorly', 'Percent_less_than_HS', 'Percent_HS',
            'Percent_SomeCollege', 'Percent_Bach', 'Percent_Grad',
            'Percent_No_vehicals', 'Percent_Received_SNAP',
            'Percent_Men_Usually_Fulltime_Employed',
            'Percent_Women_Usually_Fulltime_Employed', 'Percent_No_Internet_Access',
            'Percent_Computing_Device']

targets = ['fraction_of_devices_home_adj','fraction_of_devices_work_adj',
           'fraction_time_away_all_adj','fraction_time_away_leave_adj']
    
def run_linear_regression(target, verbose=False):
    X_train, X_test = train[features].values, test[features].values
    y_train, y_test = train[target].values, test[target].values
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test) 
    regr = linear_model.LinearRegression()

    regr.fit(X_train_scaled, y_train)
    y_pred = regr.predict(X_test_scaled)

    MSE = mean_squared_error(y_pred, y_test)
    R2 = regr.score(X_train_scaled, y_train)
    if verbose:
        print("\nRegression with {} target".format(target))
        print("MSE: ", MSE, "R2: ", R2)
    return regr


def feature_importance(regr_model, df):
    return pd.DataFrame(regr_model.coef_, index = df[features].columns,
            columns=['importance']).sort_values('importance', ascending=False)

In [12]:
for target in targets:
    regr = run_linear_regression(target, True)
    print(feature_importance(regr, df_diff))
    print('\n')


Regression with fraction_of_devices_home_adj target
MSE:  0.008240938236253208 R2:  0.21671011806225104
                                                    importance
Percent_NonCitizen                                    0.024664
Percent_less_than_HS                                  0.020279
Percent_No_vehicals                                   0.008010
restaurants_and_other_eating_places                   0.004679
grocery_stores                                        0.004224
elementary_and_secondary_schools                      0.003301
Percent_HS                                            0.003112
religious_organizations                               0.002544
offices_of_physicians                                 0.002454
museums,_historical_sites,_and_similar_institut...    0.001782
Percent_No_Internet_Access                            0.000907
Percent_Computing_Device                              0.000799
Percent_Women_Usually_Fulltime_Employed               0.000271
health_and_pe

So it looks like the differenced data explains more of the variance in the models than the raw data.

Targets: diff data

It seems as though abs(0.002) is a sensible cut off for feature selection 

### Feature selection

In [13]:
def create_selected_dfs(target):
    regr_diff = run_linear_regression(target)
    selected_features = list(pd.Series(features)[list(abs(regr_diff.coef_) > 0.002)])
    return train[selected_features], train[target], test[selected_features], test[target]

In [14]:
fodh_features_train, fodh_target_train, fodh_features_test, fodh_target_test = create_selected_dfs('fraction_of_devices_home_adj')

fodw_features_train, fodw_target_train, fodw_features_test, fodw_target_test = create_selected_dfs('fraction_of_devices_work_adj')

ftaa_features_train, ftaa_target_train, ftaa_features_test, ftaa_target_test = create_selected_dfs('fraction_time_away_all_adj')

ftal_features_train, ftal_target_train, ftal_features_test, ftal_target_test = create_selected_dfs('fraction_time_away_leave_adj')

## Random Forest Model

### Hyperparameter tuning set up

In [15]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from pprint import pprint

# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 10, stop = 400, num = 5)]
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 200, num = 5)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2]
# Method of selecting samples for training each tree
bootstrap = [True, False]


In [31]:
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

In [32]:
pprint(random_grid)

{'bootstrap': [True, False],
 'max_depth': [10, 57, 105, 152, 200, None],
 'min_samples_leaf': [1, 2],
 'min_samples_split': [2, 5],
 'n_estimators': [10, 107, 205, 302, 400]}


In [33]:
def random_grid_search(train_X, train_y):
    # Use the random grid to search for best hyperparameters
    # First create the base model to tune
    rf = RandomForestRegressor()
    # Random search of parameters, using 3 fold cross validation, 
    # search across 100 different combinations, and use all available cores
    rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=True, random_state=42, n_jobs = -1)

    # Fit the random search model
    rf_random.fit(train_X, train_y)

    #Print best parameters
    print(rf_random.best_params_)

### Model 1 - Fraction of devices home

In [None]:
# Best parameters from random grid search
random_grid_search(fodh_features_train, fodh_target_train)

In [None]:
# run grid search to find best model
param_grid = {'bootstrap': [True],
              'min_samples_leaf': [1],
              'min_samples_split': [3,5,7],
              'n_estimators': [300,400,500]}


rf = RandomForestRegressor()# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
                          cv = 3, n_jobs = -1, verbose = 2)

In [None]:
grid_search.fit(fodh_features_train, fodh_target_train)

grid_search.best_params_

grid_search.best_estimator_

In [16]:
rf = RandomForestRegressor(n_jobs=-1, 
                           n_estimators=500,
                           min_samples_split=7,
                           min_samples_leaf=1,
                           bootstrap=True
                            )
model = rf.fit(fodh_features_train,fodh_target_train)

In [17]:
fodh_y_pred=rf.predict(fodh_features_test)
mean_squared_error(fodh_y_pred, fodh_target_test)

0.003934931406348667

In [None]:
# #Show how much each feature contributes to the model prediction
# feature_importances = pd.DataFrame(rf.feature_importances_,
#                                    index = df[features].columns,
#                                     columns=['importance']).sort_values('importance', ascending=False)
# print(feature_importances)

In [None]:
# pd.DataFrame.from_dict(rf_random.cv_results_)

In [None]:
# best_random = rf_random.best_estimator_

### Model 2 - Fraction of devices work

In [35]:
# Best parameters from random grid search
random_grid_search(fodw_features_train, fodw_target_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   52.3s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  4.9min finished


{'n_estimators': 302, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 57, 'bootstrap': True}


In [37]:
# Run grid search to find best model
param_grid = {'bootstrap': [True],
              'min_samples_leaf': [2],
              'min_samples_split': [3,5,7],
              'max_depth': [45, 55, 65],
              'n_estimators': [300]}


rf_fodw = RandomForestRegressor()# Instantiate the grid search model
grid_search_fodw = GridSearchCV(estimator = rf_fodw, param_grid = param_grid, 
                          cv = 3, n_jobs = -1, verbose = 2)

In [38]:
grid_search_fodw.fit(fodw_features_train, fodw_target_train)

grid_search_fodw.best_params_

grid_search_fodw.best_estimator_

Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:   37.4s finished


RandomForestRegressor(max_depth=45, min_samples_leaf=2, min_samples_split=5,
                      n_estimators=300)

In [18]:
# Run best model
rf_best_fodw = RandomForestRegressor(n_jobs=-1, 
                                   n_estimators=300,
                                   min_samples_split=5,
                                   min_samples_leaf=2,
                                   bootstrap=True
                            )
model = rf_best_fodw.fit(fodw_features_train,fodw_target_train)

In [19]:
fodw_y_pred=rf_best_fodw.predict(fodw_features_test)
mean_squared_error(fodw_y_pred, fodw_target_test)

0.0014635345274303933

### Model 3 - Fraction of time away all

In [25]:
# Best parameters from random grid search
random_grid_search(ftaa_features_train, ftaa_target_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed: 11.5min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 18.1min finished


{'n_estimators': 302, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 57, 'bootstrap': True}


In [27]:
# Run grid search to find best model
param_grid = {'bootstrap': [True],
              'min_samples_leaf': [2],
              'min_samples_split': [3,5,7],
              'max_depth': [45, 55, 65],
              'n_estimators': [300]}


rf_ftaa = RandomForestRegressor()# Instantiate the grid search model
grid_search_ftaa = GridSearchCV(estimator = rf_ftaa, param_grid = param_grid, 
                          cv = 3, n_jobs = -1, verbose = 2)

In [28]:
grid_search_ftaa.fit(ftaa_features_train, ftaa_target_train)

grid_search_ftaa.best_params_

grid_search_ftaa.best_estimator_

Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:  2.4min finished


RandomForestRegressor(max_depth=65, min_samples_leaf=2, min_samples_split=7,
                      n_estimators=300)

In [44]:
# Run best model
rf_best_ftaa = RandomForestRegressor(n_jobs=-1, 
                                    n_estimators=300,
                                    min_samples_split=7,
                                    min_samples_leaf=2,
                                    bootstrap=True
                            )
model = rf_best_ftaa.fit(ftaa_features_train,ftaa_target_train)

In [52]:
# Evaluate model
ftaa_y_pred=rf_best_ftaa.predict(ftaa_features_test)
mean_squared_error(ftaa_y_pred, ftaa_target_test)

0.0012245857177477306

### Model 4 - Fraction time away leave

In [39]:
# Best parameters from random grid search
random_grid_search(ftal_features_train, ftal_target_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  6.1min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 10.0min finished


{'n_estimators': 400, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 152, 'bootstrap': True}


In [40]:
# Run grid search to find best model
param_grid = {'bootstrap': [True],
              'min_samples_leaf': [2],
              'min_samples_split': [3,5,7],
              'n_estimators': [400]}

rf_ftal = RandomForestRegressor()# Instantiate the grid search model
grid_search_ftal = GridSearchCV(estimator = rf_ftal, param_grid = param_grid, 
                          cv = 3, n_jobs = -1, verbose = 2)

In [41]:
grid_search_ftal.fit(ftal_features_train, ftal_target_train)

grid_search_ftal.best_params_

grid_search_ftal.best_estimator_

Fitting 3 folds for each of 3 candidates, totalling 9 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   4 out of   9 | elapsed:   24.5s remaining:   30.6s
[Parallel(n_jobs=-1)]: Done   9 out of   9 | elapsed:   38.7s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of   9 | elapsed:   38.7s finished


RandomForestRegressor(min_samples_leaf=2, min_samples_split=7, n_estimators=400)

In [42]:
# Run best model
rf_best_ftal = RandomForestRegressor(n_jobs=-1, 
                                    n_estimators=400,
                                    min_samples_split=7,
                                    min_samples_leaf=2,
                                    bootstrap=True
                            )
model = rf_best_ftal.fit(ftal_features_train,ftal_target_train)

In [43]:
# Evaluate model
ftal_y_pred=rf_best_ftal.predict(ftal_features_test)
mean_squared_error(ftal_y_pred, ftal_target_test)

0.0026647819860131793