----------------------------------------------------------------------------------------------------------------------

# Building Models with Interactions - Manually Created Algorithm
## (Best 10, Best 25, Best 10 + Categorical 10)

----------------------------------------------------------------------------------------------------------------------

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import make_scorer, r2_score
from sklearn.svm import LinearSVR, SVR
from xgboost import XGBRegressor
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, RFE, f_regression, SelectFromModel
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.model_selection import cross_val_score, cross_validate, GridSearchCV
from sklearn.ensemble import RandomForestRegressor

In [2]:
def score_to_stats(scores):
    
    return {score:round(values.mean(),4) for score, values in scores.items()}

In [3]:
def rmsle_custom(y_actual, y_predicted):
    return sqrt(mean_squared_error(y_actual, y_predicted))
rmsle_score = make_scorer(score_func=rmsle_custom)

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Data Importing

In [6]:
df_train = pd.read_csv('train_master.csv')
df_test = pd.read_csv('test_master.csv')

### Output variable logarithmic transformation

In [7]:
df_train['SalePrice_log'] = np.log1p(df_train['SalePrice'])

In [8]:
df_train = df_train.drop(['Id','SalePrice'], axis = 1)

In [9]:
df_test = df_test.drop('Id', axis = 1)

### Pre-processing and data cleaning

In [10]:
outlier1 = df_train[df_train['GrLivArea'] > 4500].index
outlier2 = df_train[df_train["1stFlrSF"] > 4000].index
outlier3 = df_train[df_train["TotalBsmtSF"] > 4000].index

In [11]:
# 523, 1298
df_train = df_train.drop(outlier1)

In [12]:
missing_values_attribute = ['PoolQC','MiscFeature', 'Alley', 'Fence', 'FireplaceQu']
correlated_attributes = ['GarageArea', '1stFlrSF']

In [13]:
df_train = df_train.drop(missing_values_attribute, axis = 1)
df_test = df_test.drop(missing_values_attribute, axis = 1)

In [14]:
df_train = df_train.drop(correlated_attributes, axis = 1)
df_test = df_test.drop(correlated_attributes, axis = 1)

### Attributes mapping and dummy coding

In [15]:
df_train['train'] = 1
df_test['train'] = 0

In [16]:
df_combined = pd.concat([df_train, df_test])

In [17]:
df_combined = df_combined.reset_index(drop = True)

In [18]:
cat_mapping = {'NA':0, 'Po':1, 'Fa':2, 'TA':3, 'Gd':4, 'Ex':5}
scale_attributes = ['ExterQual', 'ExterCond', 'BsmtQual', 'BsmtCond', 'HeatingQC', 'KitchenQual', 'GarageQual', 'GarageCond']
for i in df_combined[scale_attributes]:
    df_combined[i] = df_combined[i].map(cat_mapping)

In [19]:
df_cat = df_combined.select_dtypes(include = ['object'])

In [20]:
df_cat_columns = df_cat.columns

In [21]:
df_cat_dummies = pd.get_dummies(df_cat)

In [22]:
df_combined = df_combined.drop(df_cat_columns, axis = 1)

In [23]:
df_combined = pd.concat([df_combined, df_cat_dummies], axis = 1)

In [24]:
df_train = df_combined[df_combined['train'] == 1]
df_train = df_train.drop(['train'], axis = 1)

In [25]:
df_train = df_train.fillna(df_train.mean())

In [26]:
X = df_train.drop('SalePrice_log', axis = 1)
y = df_train['SalePrice_log']

In [27]:
scoring = {'rmsle': rmsle_score,
           'r2': 'r2'}

### Interaction Examining - Manually Created Algorithm

In [28]:
from sklearn import tree
from matplotlib.pyplot import figure

In [29]:
def get_interactions(tree_model, feature_names, plot_tree = False):
    if plot_tree:
        figure(figsize=(15,15))
        tree.plot_tree(tree_model, feature_names = feature_names, class_names = tree_model.classes_, filled = True,
                       rounded = True, fontsize = 10)
    children_left = tree_model.tree_.children_left
    children_right = tree_model.tree_.children_right
    feature = tree_model.tree_.feature
    #treshold = tree_model.tree_.treshold
    
    def get_paths(node):
        if feature[node] < 0: return []
        feat = feature_names[feature[node]]
        left = get_paths(children_left[node])
        right = get_paths(children_right[node])
        left = [feat + '___' + l for l in left] if left else [feat]
        right = [feat + '___' + r for r in right] if right else [feat]
        #return left+right if left+right else [feat]
        return left + right
    
    paths = [path.split('___') for path in set(['___'.join(set(path.split('___'))) for path in get_paths(0)])]
    return paths

In [30]:
from random import sample

In [31]:
interaction_list = []
for i in range(1,10001):
    df_rows = X.sample(frac=0.25)
    all_columns_index = list(range(0,239))
    sample_columns = sample(all_columns_index, 30)
    df_sample = df_rows.iloc[:, sample_columns]
    df_sample['SalePrice_log'] = y
    X_sample = df_sample.drop('SalePrice_log', axis = 1)
    y_sample = df_sample['SalePrice_log']
    model = tree.DecisionTreeRegressor(max_depth = 2)
    model = model.fit(X_sample, y_sample)
    interactions = get_interactions(model, feature_names = X_sample.columns, plot_tree = False)
    #print(i)
    #print(interactions)
    interaction_list.append(interactions)
    
interaction_list_final = [i[0] for i in interaction_list if len(i) < 2] + [i[0] for i in interaction_list if len(i) > 1] + [i[1] for i in interaction_list if len(i) > 1]
sorted_interaction_list_final = sorted(interaction_list_final)

In [32]:
for i in sorted_interaction_list_final:
    if len(i) < 2:
        #print(i)
        try:
            while True:
                sorted_interaction_list_final.remove(i)
        except ValueError:
            pass

In [33]:
from itertools import groupby

In [34]:
interaction_count = [" " + str(len(list(group))) + " - " + str(key) for key, group in groupby(sorted_interaction_list_final)]

In [35]:
print("\n".join(interaction_count))

 2 - ['2ndFlrSF', 'BsmtExposure_No']
 9 - ['2ndFlrSF', 'BsmtFinSF1']
 48 - ['2ndFlrSF', 'BsmtQual']
 1 - ['2ndFlrSF', 'BsmtUnfSF']
 6 - ['2ndFlrSF', 'CentralAir_N']
 5 - ['2ndFlrSF', 'CentralAir_Y']
 1 - ['2ndFlrSF', 'Electrical_SBrkr']
 55 - ['2ndFlrSF', 'ExterQual']
 1 - ['2ndFlrSF', 'Exterior1st_VinylSd']
 3 - ['2ndFlrSF', 'Exterior2nd_VinylSd']
 14 - ['2ndFlrSF', 'Fireplaces']
 2 - ['2ndFlrSF', 'Foundation_BrkTil']
 19 - ['2ndFlrSF', 'Foundation_PConc']
 17 - ['2ndFlrSF', 'GarageCars']
 4 - ['2ndFlrSF', 'GarageCond']
 13 - ['2ndFlrSF', 'GarageFinish_Unf']
 8 - ['2ndFlrSF', 'GarageQual']
 16 - ['2ndFlrSF', 'GarageType_Attchd']
 4 - ['2ndFlrSF', 'GarageType_Detchd']
 38 - ['2ndFlrSF', 'GarageYrBlt']
 16 - ['2ndFlrSF', 'HeatingQC']
 35 - ['2ndFlrSF', 'KitchenQual']
 3 - ['2ndFlrSF', 'LotArea']
 3 - ['2ndFlrSF', 'LotFrontage']
 8 - ['2ndFlrSF', 'MasVnrArea']
 6 - ['2ndFlrSF', 'MasVnrType_None']
 5 - ['2ndFlrSF', 'MasVnrType_Stone']
 1 - ['2ndFlrSF', 'MoSold']
 1 - ['2ndFlrSF', 'Neighbo

----------------------------------------------------------------------------------------------------------------------
## Dataset Expanded with Best 10 Interactions
----------------------------------------------------------------------------------------------------------------------

### Feature engineering

In [36]:
X['ExterQual_GarageCars'] = X['ExterQual'] * X['GarageCars']
X['GrLivArea_YearBuilt'] = X['GrLivArea'] * X['YearBuilt']
X['GrLivArea_OverallQual'] = X['GrLivArea'] * X['OverallQual']
X['GrLivArea_KitchenQual'] = X['GrLivArea'] * X['KitchenQual']
X['ExterQual_TotalBsmtSF'] = X['ExterQual'] * X['TotalBsmtSF']
X['BsmtQual_GrLivArea'] = X['BsmtQual'] * X['GrLivArea']
X['GrLivArea_GarageCars'] = X['GrLivArea'] * X['GarageCars']
X['TotalBsmtSF_YearBuilt'] = X['TotalBsmtSF'] * X['YearBuilt']
X['Fireplaces_YearBuilt'] = X['Fireplaces'] * X['YearBuilt']
X['KitchenQual_GarageCars'] = X['KitchenQual'] * X['GarageCars']

### Models

#### Linear Regression

In [37]:
param_grid = {'f_regression__k':[20,50,80,100,120,150,200,239,244]}

In [38]:
pipe_lr = Pipeline([('f_regression', SelectKBest(f_regression)), ('lr', LinearRegression())])
grid = GridSearchCV(pipe_lr, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'f_regression__k': 200}


##### Best params

In [39]:
pipe_lr = Pipeline([('f_regression', SelectKBest(k=200)), ('lr', LinearRegression())])
cv_scores = cross_validate(pipe_lr, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.0949, 'score_time': 0.0035, 'test_rmsle': 0.1155, 'test_r2': 0.9154}


#### Ridge

In [40]:
param_grid = {'alpha':[0.01,0,0.1,1,5,10,20,100]}

In [41]:
rid = Ridge()
grid = GridSearchCV(rid, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'alpha': 10}


##### Best params

In [42]:
rid = Ridge(alpha = 10)
cv_scores = cross_validate(rid, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.0137, 'score_time': 0.0031, 'test_rmsle': 0.1114, 'test_r2': 0.9212}


#### Lasso

In [43]:
param_grid = {'alpha':[0.01,0,0.1,1,5,10,20,100]}

In [44]:
lasso = Lasso()
grid = GridSearchCV(lasso, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'alpha': 0}


##### Best params

In [45]:
lasso = Lasso(alpha = 0)
cv_scores = cross_validate(lasso, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.3807, 'score_time': 0.0047, 'test_rmsle': 0.119, 'test_r2': 0.9103}


#### ElasticNet

In [46]:
param_grid = {'alpha':[0.01,0.1,1,5,10,20,100],
              'l1_ratio':[0,0.01,0.1,0.5,0.8,1]}

In [47]:
enet = ElasticNet()
grid = GridSearchCV(enet, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'alpha': 0.01, 'l1_ratio': 0.01}


##### Best params

In [48]:
enet = ElasticNet(alpha = 0.01, l1_ratio = 0.01)
cv_scores = cross_validate(enet, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.3402, 'score_time': 0.0033, 'test_rmsle': 0.1114, 'test_r2': 0.9213}


#### Xgboost

In [49]:
xgboost = XGBRegressor(eta = 0.01, subsample = 0.5)
cv_scores = cross_validate(xgboost, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 1.8179, 'score_time': 0.0113, 'test_rmsle': 0.1229, 'test_r2': 0.9038}


#### RandomForest

In [50]:
rf = RandomForestRegressor(max_depth=20, min_samples_leaf = 1, n_estimators = 150)
cv_scores = cross_validate(rf, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 5.9269, 'score_time': 0.0307, 'test_rmsle': 0.1297, 'test_r2': 0.8931}


----------------------------------------------------------------------------------------------------------------------
## Dataset Expanded with Best 25 Interactions
----------------------------------------------------------------------------------------------------------------------

In [51]:
X = df_train.drop('SalePrice_log', axis = 1)
y = df_train['SalePrice_log']

### Feature engineering

In [52]:
X['ExterQual_GarageCars'] = X['ExterQual'] * X['GarageCars']
X['GrLivArea_YearBuilt'] = X['GrLivArea'] * X['YearBuilt']
X['GrLivArea_OverallQual'] = X['GrLivArea'] * X['OverallQual']
X['GrLivArea_KitchenQual'] = X['GrLivArea'] * X['KitchenQual']
X['ExterQual_TotalBsmtSF'] = X['ExterQual'] * X['TotalBsmtSF']
X['BsmtQual_GrLivArea'] = X['BsmtQual'] * X['GrLivArea']
X['GrLivArea_GarageCars'] = X['GrLivArea'] * X['GarageCars']
X['TotalBsmtSF_YearBuilt'] = X['TotalBsmtSF'] * X['YearBuilt']
X['Fireplaces_YearBuilt'] = X['Fireplaces'] * X['YearBuilt']
X['KitchenQual_GarageCars'] = X['KitchenQual'] * X['GarageCars']
X['GarageCars_YearBuilt'] = X['GarageCars'] * X['YearBuilt']
X['ExterQual_GrLivArea'] = X['ExterQual'] * X['GrLivArea']
X['KitchenQual_TotalBsmtSF'] = X['KitchenQual'] * X['TotalBsmtSF']
X['OverallQual_GarageCars'] = X['OverallQual'] * X['GarageCars']
X['BsmtQual_ExterQual'] = X['BsmtQual'] * X['ExterQual']
X['ExterQual_LotArea'] = X['ExterQual'] * X['LotArea']
X['LotArea_YearBuilt'] = X['LotArea'] * X['YearBuilt']
X['ExterQual_Fireplaces'] = X['ExterQual'] * X['Fireplaces']
X['GrLivArea_TotalBsmtSF'] = X['GrLivArea'] * X['TotalBsmtSF']
X['BsmtQual_TotalBsmtSF'] = X['BsmtQual'] * X['TotalBsmtSF']
X['GrLivArea_YearRemodAdd'] = X['GrLivArea'] * X['YearRemodAdd']
X['BsmtQual_Fireplaces'] = X['BsmtQual'] * X['Fireplaces']
X['BsmtQual_KitchenQual'] = X['BsmtQual'] * X['KitchenQual']
X['KitchenQual_YearBuilt'] = X['KitchenQual'] * X['YearBuilt']
X['BsmtQual_GarageCars'] = X['BsmtQual'] * X['GarageCars']

### Models

#### Linear Regression

In [53]:
param_grid = {'f_regression__k':[20,50,80,100,120,150,200,239,244,264]}

In [54]:
pipe_lr = Pipeline([('f_regression', SelectKBest(f_regression)), ('lr', LinearRegression())])
grid = GridSearchCV(pipe_lr, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'f_regression__k': 150}


##### Best params

In [55]:
pipe_lr = Pipeline([('f_regression', SelectKBest(k=200)), ('lr', LinearRegression())])
cv_scores = cross_validate(pipe_lr, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.1146, 'score_time': 0.0056, 'test_rmsle': 0.1151, 'test_r2': 0.916}


#### Ridge

In [56]:
param_grid = {'alpha':[0.01,0,0.1,1,5,10,20,100]}

In [57]:
rid = Ridge()
grid = GridSearchCV(rid, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'alpha': 10}


##### Best params

In [58]:
rid = Ridge(alpha = 10)
cv_scores = cross_validate(rid, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.0143, 'score_time': 0.003, 'test_rmsle': 0.1125, 'test_r2': 0.9198}


#### Lasso

In [59]:
param_grid = {'alpha':[0.01,0,0.1,1,5,10,20,100]}

In [60]:
lasso = Lasso()
grid = GridSearchCV(lasso, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'alpha': 0}


##### Best params

In [61]:
lasso = Lasso(alpha = 0)
cv_scores = cross_validate(lasso, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.6152, 'score_time': 0.0151, 'test_rmsle': 0.1193, 'test_r2': 0.9099}


#### Elastic Net

In [62]:
param_grid = {'alpha':[0.01,0.1,1,5,10,20,100],
              'l1_ratio':[0,0.01,0.1,0.5,0.8,1]}

In [63]:
enet = ElasticNet()
grid = GridSearchCV(enet, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'alpha': 0.01, 'l1_ratio': 0.01}


##### Best params

In [64]:
enet = ElasticNet(alpha = 0.01, l1_ratio = 0.01)
cv_scores = cross_validate(enet, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.3654, 'score_time': 0.0037, 'test_rmsle': 0.1117, 'test_r2': 0.9209}


#### XGBOOST

In [65]:
xgboost = XGBRegressor(eta = 0.01, subsample = 0.5)
cv_scores = cross_validate(xgboost, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 1.6094, 'score_time': 0.0121, 'test_rmsle': 0.1231, 'test_r2': 0.9032}


#### Random Forest Regressor

In [66]:
rf = RandomForestRegressor(max_depth=20, min_samples_leaf = 1, n_estimators = 150)
cv_scores = cross_validate(rf, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 7.185, 'score_time': 0.0295, 'test_rmsle': 0.1293, 'test_r2': 0.894}


----------------------------------------------------------------------------------------------------------------------
## Dataset Expanded with Best 10 + Best 10 Categorical Interactions
----------------------------------------------------------------------------------------------------------------------

In [68]:
X = df_train.drop('SalePrice_log', axis = 1)
y = df_train['SalePrice_log']

### Feature Engineering

In [69]:
# BEST 10
X['ExterQual_GarageCars'] = X['ExterQual'] * X['GarageCars']
X['GrLivArea_YearBuilt'] = X['GrLivArea'] * X['YearBuilt']
X['GrLivArea_OverallQual'] = X['GrLivArea'] * X['OverallQual']
X['GrLivArea_KitchenQual'] = X['GrLivArea'] * X['KitchenQual']
X['ExterQual_TotalBsmtSF'] = X['ExterQual'] * X['TotalBsmtSF']
X['BsmtQual_GrLivArea'] = X['BsmtQual'] * X['GrLivArea']
X['GrLivArea_GarageCars'] = X['GrLivArea'] * X['GarageCars']
X['TotalBsmtSF_YearBuilt'] = X['TotalBsmtSF'] * X['YearBuilt']
X['Fireplaces_YearBuilt'] = X['Fireplaces'] * X['YearBuilt']
X['KitchenQual_GarageCars'] = X['KitchenQual'] * X['GarageCars']

In [70]:
# BEST 10 CATEGORICAL
X['Foundation_PConc_GarageType_Attchd'] = X['Foundation_PConc'] * X['GarageType_Attchd']
X['GarageFinish_Unf_Foundation_PConc'] = X['GarageFinish_Unf'] * X['Foundation_PConc']
X['MasVnrType_None_Foundation_PConc'] = X['MasVnrType_None'] * X['Foundation_PConc']
X['Foundation_PConc_MSZoning_RL'] = X['Foundation_PConc'] * X['MSZoning_RL']
X['Foundation_PConc_CentralAir_N'] = X['Foundation_PConc'] * X['CentralAir_N']
X['CentralAir_Y_Foundation_PConc'] = X['CentralAir_Y'] * X['Foundation_PConc']
X['GarageFinish_Fin_GarageType_Attchd'] = X['GarageFinish_Fin'] * X['GarageType_Attchd']
X['Foundation_CBlock_GarageType_Attchd'] = X['Foundation_CBlock'] * X['GarageType_Attchd']
X['GarageFinish_Fin_GarageFinish_RFn'] = X['GarageFinish_Fin'] * X['GarageFinish_RFn']
X['BsmtFinType1_GLQ_GarageFinish_Unf'] = X['BsmtFinType1_GLQ'] * X['GarageFinish_Unf']

### Models

#### Linear Regression

In [71]:
pipe_lr = Pipeline([('f_regression', SelectKBest(k=200)), ('lr', LinearRegression())])
cv_scores = cross_validate(pipe_lr, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.2027, 'score_time': 0.0129, 'test_rmsle': 0.1144, 'test_r2': 0.917}


#### Ridge

In [72]:
param_grid = {'alpha':[0.01,0,0.1,1,5,10,20,100]}

In [73]:
rid = Ridge()
grid = GridSearchCV(rid, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'alpha': 10}


##### Best pamars

In [74]:
rid = Ridge(alpha = 10)
cv_scores = cross_validate(rid, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.0151, 'score_time': 0.0038, 'test_rmsle': 0.1107, 'test_r2': 0.9222}


#### Lasso

In [75]:
param_grid = {'alpha':[0.01,0,0.1,1,5,10,20,100]}

In [76]:
lasso = Lasso()
grid = GridSearchCV(lasso, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'alpha': 0}


##### Best params

In [77]:
lasso = Lasso(alpha = 0)
cv_scores = cross_validate(lasso, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.3813, 'score_time': 0.0038, 'test_rmsle': 0.1185, 'test_r2': 0.911}


#### Elastic Net

In [78]:
param_grid = {'alpha':[0.01,0.1,1,5,10,20,100],
              'l1_ratio':[0,0.01,0.1,0.5,0.8,1]}

In [79]:
enet = ElasticNet()
grid = GridSearchCV(enet, param_grid = param_grid, cv = 10)
grid.fit(X,y)
print(grid.best_params_)

{'alpha': 0.01, 'l1_ratio': 0.01}


##### Best params

In [80]:
enet = ElasticNet(alpha = 0.01, l1_ratio = 0.01)
cv_scores = cross_validate(enet, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 0.3627, 'score_time': 0.0037, 'test_rmsle': 0.1108, 'test_r2': 0.922}


#### XGBOOST

In [81]:
xgboost = XGBRegressor(eta = 0.01, subsample = 0.5)
cv_scores = cross_validate(xgboost, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 1.8319, 'score_time': 0.0157, 'test_rmsle': 0.1229, 'test_r2': 0.9039}


#### Random Forest Regressor

In [82]:
rf = RandomForestRegressor(max_depth=20, min_samples_leaf = 1, n_estimators = 150)
cv_scores = cross_validate(rf, X, y, scoring = scoring, cv = 10)
print(score_to_stats(cv_scores))

{'fit_time': 6.9672, 'score_time': 0.0317, 'test_rmsle': 0.1304, 'test_r2': 0.892}
