In [7]:
from sklearn import ensemble
import pandas as pd
import numpy as np
pd.set_option('display.max_columns',100)

In [29]:

housetrain_tree= pd.read_csv('../Data/housetrain_tree.csv')
housetest_tree= pd.read_csv('../Data/housetest_tree.csv')

print(housetrain_tree.shape)
print(housetest_tree.shape)

(1168, 86)
(292, 86)
(1460, 86)
(1459, 86)


In [35]:
#combine house train and house test for one hot encoding
house_combined= pd.concat([housetrain_tree, housetest_tree], ignore_index=True)

In [52]:
#dummify combined house data
encoded_house_combined=pd.get_dummies(house_combined, drop_first=True, dummy_na=True)

In [68]:
#split house data to train and test
housetrain_tree= encoded_house_combined[:1460]
housetest_tree=encoded_house_combined[1460:]
print(housetrain_tree.shape)
print(housetest_tree.shape)


In [70]:
#split 80/20 house train to private train and private test
from sklearn.model_selection import train_test_split

privtrain_tree, privtest_tree= train_test_split(housetrain_tree, test_size=0.2, random_state=0)

print(privtrain_tree.shape)
print(privtest_tree.shape)

In [4]:
randomForest = ensemble.RandomForestRegressor(oob_score = True)

In [5]:
y_train = np.log(privtrain_tree['SalePrice'])
x_train = privtrain_tree.drop('SalePrice', axis=1)
print(y_train.shape)
print(x_train.shape)

y_test = np.log(privtest_tree['SalePrice'])
x_test = privtest_tree.drop('SalePrice', axis=1)
print(y_test.shape)
print(x_test.shape)

(1168,)
(1168, 192)
(292,)
(292, 192)


In [13]:
from sklearn.model_selection import GridSearchCV
param_grid = { "n_estimators"      : [100, 250, 500],
           "max_features"      : [30, 40, 50, 60],
           "max_depth"         : [5, 10, 15, 20, 25]}
randomForest.set_params(random_state=67)
grid_search_tree = GridSearchCV(randomForest, param_grid, cv=5, n_jobs=-1, verbose=10, scoring='neg_mean_squared_error')
%time grid_search_tree.fit(x_train, y_train)

Fitting 5 folds for each of 60 candidates, totalling 300 fits
[CV] n_estimators=100, max_depth=5, max_features=30 ..................
[CV] n_estimators=100, max_depth=5, max_features=30 ..................
[CV] n_estimators=100, max_depth=5, max_features=30 ..................
[CV] n_estimators=100, max_depth=5, max_features=30 ..................
[CV] n_estimators=100, max_depth=5, max_features=30 ..................
[CV] n_estimators=250, max_depth=5, max_features=30 ..................
[CV] n_estimators=250, max_depth=5, max_features=30 ..................
[CV] n_estimators=250, max_depth=5, max_features=30 ..................
[CV]  n_estimators=100, max_depth=5, max_features=30, score=-0.01868918428563502, total=   0.4s
[CV] n_estimators=250, max_depth=5, max_features=30 ..................
[CV]  n_estimators=100, max_depth=5, max_features=30, score=-0.022214206830979768, total=   0.5s
[CV] n_estimators=250, max_depth=5, max_features=30 ..................
[CV]  n_estimators=100, max_depth=5

[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.6s


[CV]  n_estimators=250, max_depth=5, max_features=30, score=-0.018246267986539614, total=   0.8s
[CV] n_estimators=500, max_depth=5, max_features=30 ..................
[CV]  n_estimators=250, max_depth=5, max_features=30, score=-0.021391438276695145, total=   0.7s
[CV]  n_estimators=250, max_depth=5, max_features=30, score=-0.031208213775557027, total=   0.8s
[CV] n_estimators=500, max_depth=5, max_features=30 ..................
[CV] n_estimators=100, max_depth=5, max_features=40 ..................
[CV]  n_estimators=250, max_depth=5, max_features=30, score=-0.027082363952242478, total=   0.7s
[CV] n_estimators=100, max_depth=5, max_features=40 ..................


[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    1.4s


[CV]  n_estimators=100, max_depth=5, max_features=40, score=-0.02987431010268297, total=   0.4s
[CV]  n_estimators=250, max_depth=5, max_features=30, score=-0.02199716990382877, total=   0.8s
[CV] n_estimators=100, max_depth=5, max_features=40 ..................
[CV] n_estimators=100, max_depth=5, max_features=40 ..................
[CV]  n_estimators=100, max_depth=5, max_features=40, score=-0.020314339590421865, total=   0.6s
[CV] n_estimators=100, max_depth=5, max_features=40 ..................
[CV]  n_estimators=500, max_depth=5, max_features=30, score=-0.01823313173714044, total=   1.3s
[CV] n_estimators=250, max_depth=5, max_features=40 ..................
[CV]  n_estimators=500, max_depth=5, max_features=30, score=-0.030782318395265178, total=   1.4s
[CV] n_estimators=250, max_depth=5, max_features=40 ..................
[CV]  n_estimators=500, max_depth=5, max_features=30, score=-0.02142300912178157, total=   1.6s
[CV] n_estimators=250, max_depth=5, max_features=40 ...............

[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    2.5s


[CV]  n_estimators=100, max_depth=5, max_features=40, score=-0.022302830199863795, total=   0.3s
[CV] n_estimators=500, max_depth=5, max_features=40 ..................
[CV]  n_estimators=500, max_depth=5, max_features=30, score=-0.022144884171908536, total=   1.5s
[CV] n_estimators=500, max_depth=5, max_features=40 ..................
[CV]  n_estimators=250, max_depth=5, max_features=40, score=-0.030484533914460748, total=   0.9s
[CV] n_estimators=500, max_depth=5, max_features=40 ..................
[CV]  n_estimators=250, max_depth=5, max_features=40, score=-0.019649251837845293, total=   0.9s
[CV]  n_estimators=250, max_depth=5, max_features=40, score=-0.017816855290981717, total=   0.9s
[CV] n_estimators=500, max_depth=5, max_features=40 ..................
[CV] n_estimators=100, max_depth=5, max_features=50 ..................
[CV]  n_estimators=250, max_depth=5, max_features=40, score=-0.02192273981103615, total=   0.9s
[CV]  n_estimators=250, max_depth=5, max_features=40, score=-0.0

[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    3.6s


[CV]  n_estimators=500, max_depth=5, max_features=40, score=-0.03018820213317681, total=   1.6s
[CV] n_estimators=100, max_depth=5, max_features=50 ..................
[CV]  n_estimators=100, max_depth=5, max_features=50, score=-0.018056123095380443, total=   0.6s
[CV] n_estimators=100, max_depth=5, max_features=50 ..................
[CV]  n_estimators=100, max_depth=5, max_features=50, score=-0.029771053708661982, total=   0.8s
[CV]  n_estimators=100, max_depth=5, max_features=50, score=-0.0187223067267202, total=   0.7s
[CV] n_estimators=250, max_depth=5, max_features=50 ..................
[CV] n_estimators=250, max_depth=5, max_features=50 ..................
[CV]  n_estimators=500, max_depth=5, max_features=40, score=-0.0199683208023956, total=   1.6s
[CV] n_estimators=250, max_depth=5, max_features=50 ..................
[CV]  n_estimators=500, max_depth=5, max_features=40, score=-0.01798559815513293, total=   1.6s
[CV] n_estimators=250, max_depth=5, max_features=50 .................

[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    5.3s


[CV]  n_estimators=250, max_depth=5, max_features=50, score=-0.01874635483117207, total=   1.1s
[CV] n_estimators=500, max_depth=5, max_features=50 ..................
[CV]  n_estimators=250, max_depth=5, max_features=50, score=-0.03030949075088794, total=   1.1s
[CV] n_estimators=500, max_depth=5, max_features=50 ..................
[CV]  n_estimators=250, max_depth=5, max_features=50, score=-0.01752590827697005, total=   1.0s
[CV] n_estimators=100, max_depth=5, max_features=60 ..................
[CV]  n_estimators=250, max_depth=5, max_features=50, score=-0.02486176434817575, total=   0.9s
[CV] n_estimators=100, max_depth=5, max_features=60 ..................
[CV]  n_estimators=250, max_depth=5, max_features=50, score=-0.02187300221525224, total=   1.0s
[CV] n_estimators=100, max_depth=5, max_features=60 ..................
[CV]  n_estimators=100, max_depth=5, max_features=60, score=-0.01891077663196104, total=   0.6s
[CV] n_estimators=100, max_depth=5, max_features=60 .................

[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:    7.3s


[CV]  n_estimators=100, max_depth=5, max_features=60, score=-0.02177037568862678, total=   0.6s
[CV] n_estimators=500, max_depth=5, max_features=60 ..................
[CV]  n_estimators=500, max_depth=5, max_features=50, score=-0.021759350265429155, total=   1.9s
[CV] n_estimators=500, max_depth=5, max_features=60 ..................
[CV]  n_estimators=500, max_depth=5, max_features=50, score=-0.02490255890256217, total=   2.1s
[CV] n_estimators=500, max_depth=5, max_features=60 ..................
[CV]  n_estimators=250, max_depth=5, max_features=60, score=-0.03013115052899905, total=   1.1s
[CV] n_estimators=500, max_depth=5, max_features=60 ..................
[CV]  n_estimators=250, max_depth=5, max_features=60, score=-0.018623307112980464, total=   1.1s
[CV] n_estimators=500, max_depth=5, max_features=60 ..................
[CV]  n_estimators=250, max_depth=5, max_features=60, score=-0.02478368370187635, total=   1.0s
[CV] n_estimators=100, max_depth=10, max_features=30 ..............

[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    9.6s


[CV]  n_estimators=500, max_depth=5, max_features=60, score=-0.029972302627668188, total=   1.9s
[CV] n_estimators=250, max_depth=10, max_features=30 .................
[CV]  n_estimators=500, max_depth=5, max_features=60, score=-0.021588511098824946, total=   2.0s
[CV]  n_estimators=500, max_depth=5, max_features=60, score=-0.018747383173323195, total=   2.2s
[CV] n_estimators=250, max_depth=10, max_features=30 .................
[CV] n_estimators=250, max_depth=10, max_features=30 .................
[CV]  n_estimators=500, max_depth=5, max_features=60, score=-0.018043136169398566, total=   2.1s
[CV] n_estimators=250, max_depth=10, max_features=30 .................
[CV]  n_estimators=500, max_depth=5, max_features=60, score=-0.02441046536255094, total=   2.1s
[CV] n_estimators=500, max_depth=10, max_features=30 .................
[CV]  n_estimators=100, max_depth=10, max_features=30, score=-0.0209880484902117, total=   0.8s
[CV] n_estimators=500, max_depth=10, max_features=30 ............

[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:   11.8s


[CV]  n_estimators=100, max_depth=10, max_features=40, score=-0.023043264170712348, total=   1.1s
[CV] n_estimators=100, max_depth=10, max_features=40 .................
[CV]  n_estimators=100, max_depth=10, max_features=40, score=-0.015629474207032597, total=   1.1s
[CV] n_estimators=100, max_depth=10, max_features=40 .................
[CV]  n_estimators=500, max_depth=10, max_features=30, score=-0.023614052057324038, total=   2.5s
[CV] n_estimators=250, max_depth=10, max_features=40 .................
[CV]  n_estimators=100, max_depth=10, max_features=40, score=-0.015683958044539696, total=   1.2s
[CV] n_estimators=250, max_depth=10, max_features=40 .................
[CV]  n_estimators=500, max_depth=10, max_features=30, score=-0.01467838413623121, total=   2.5s
[CV] n_estimators=250, max_depth=10, max_features=40 .................
[CV]  n_estimators=500, max_depth=10, max_features=30, score=-0.016178763848502687, total=   2.6s
[CV] n_estimators=250, max_depth=10, max_features=40 .....

[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:   14.8s


[CV]  n_estimators=250, max_depth=10, max_features=40, score=-0.015002480703936293, total=   1.3s
[CV] n_estimators=100, max_depth=10, max_features=50 .................
[CV]  n_estimators=250, max_depth=10, max_features=40, score=-0.01986059638238524, total=   1.6s
[CV]  n_estimators=250, max_depth=10, max_features=40, score=-0.018022469715337785, total=   1.5s
[CV] n_estimators=100, max_depth=10, max_features=50 .................
[CV] n_estimators=100, max_depth=10, max_features=50 .................
[CV]  n_estimators=100, max_depth=10, max_features=50, score=-0.023812106818709572, total=   1.1s
[CV] n_estimators=100, max_depth=10, max_features=50 .................
[CV]  n_estimators=100, max_depth=10, max_features=50, score=-0.014461946225799585, total=   1.1s
[CV] n_estimators=100, max_depth=10, max_features=50 .................
[CV]  n_estimators=100, max_depth=10, max_features=50, score=-0.015627818356645288, total=   1.3s
[CV] n_estimators=250, max_depth=10, max_features=50 .....

[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:   18.9s


[CV] n_estimators=100, max_depth=10, max_features=60 .................
[CV]  n_estimators=250, max_depth=10, max_features=50, score=-0.01849907409002097, total=   1.8s
[CV] n_estimators=100, max_depth=10, max_features=60 .................
[CV]  n_estimators=100, max_depth=10, max_features=60, score=-0.022422864564222836, total=   1.0s
[CV] n_estimators=100, max_depth=10, max_features=60 .................
[CV]  n_estimators=100, max_depth=10, max_features=60, score=-0.015459671018177567, total=   1.1s
[CV] n_estimators=100, max_depth=10, max_features=60 .................
[CV]  n_estimators=100, max_depth=10, max_features=60, score=-0.015365702272833175, total=   1.4s
[CV] n_estimators=250, max_depth=10, max_features=60 .................
[CV]  n_estimators=500, max_depth=10, max_features=50, score=-0.023135434336102025, total=   3.5s
[CV] n_estimators=250, max_depth=10, max_features=60 .................
[CV]  n_estimators=100, max_depth=10, max_features=60, score=-0.01944701517836308, to

[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:   23.5s


[CV]  n_estimators=250, max_depth=10, max_features=60, score=-0.015344709852159586, total=   2.0s
[CV] n_estimators=100, max_depth=15, max_features=30 .................
[CV]  n_estimators=250, max_depth=10, max_features=60, score=-0.019036361507663527, total=   2.1s
[CV] n_estimators=100, max_depth=15, max_features=30 .................
[CV]  n_estimators=250, max_depth=10, max_features=60, score=-0.018459614192321224, total=   2.0s
[CV] n_estimators=100, max_depth=15, max_features=30 .................
[CV]  n_estimators=100, max_depth=15, max_features=30, score=-0.01583305247379809, total=   1.0s
[CV]  n_estimators=100, max_depth=15, max_features=30, score=-0.023123576758165024, total=   1.2s
[CV] n_estimators=100, max_depth=15, max_features=30 .................
[CV] n_estimators=100, max_depth=15, max_features=30 .................
[CV]  n_estimators=100, max_depth=15, max_features=30, score=-0.01487954597610153, total=   1.1s
[CV] n_estimators=250, max_depth=15, max_features=30 ......

[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:   28.0s


[CV]  n_estimators=250, max_depth=15, max_features=30, score=-0.01803940900157615, total=   1.4s
[CV] n_estimators=100, max_depth=15, max_features=40 .................
[CV]  n_estimators=100, max_depth=15, max_features=40, score=-0.02366613479864311, total=   0.9s
[CV] n_estimators=100, max_depth=15, max_features=40 .................
[CV]  n_estimators=100, max_depth=15, max_features=40, score=-0.015318565576094043, total=   1.2s
[CV] n_estimators=100, max_depth=15, max_features=40 .................
[CV]  n_estimators=100, max_depth=15, max_features=40, score=-0.015202596874114281, total=   1.1s
[CV] n_estimators=250, max_depth=15, max_features=40 .................
[CV]  n_estimators=500, max_depth=15, max_features=30, score=-0.022916095210949068, total=   2.8s
[CV] n_estimators=250, max_depth=15, max_features=40 .................
[CV]  n_estimators=500, max_depth=15, max_features=30, score=-0.015802741631702002, total=   2.9s
[CV] n_estimators=250, max_depth=15, max_features=40 ......

[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:   33.7s


[CV]  n_estimators=100, max_depth=15, max_features=50, score=-0.014136028016589754, total=   1.4s
[CV] n_estimators=250, max_depth=15, max_features=50 .................
[CV]  n_estimators=500, max_depth=15, max_features=40, score=-0.014488465273835472, total=   3.2s
[CV] n_estimators=250, max_depth=15, max_features=50 .................
[CV]  n_estimators=500, max_depth=15, max_features=40, score=-0.022657589053337094, total=   3.7s
[CV] n_estimators=250, max_depth=15, max_features=50 .................
[CV]  n_estimators=500, max_depth=15, max_features=40, score=-0.015200998398838069, total=   3.7s
[CV]  n_estimators=100, max_depth=15, max_features=50, score=-0.019023787382946345, total=   1.2s
[CV] n_estimators=250, max_depth=15, max_features=50 .................
[CV] n_estimators=250, max_depth=15, max_features=50 .................
[CV]  n_estimators=100, max_depth=15, max_features=50, score=-0.017884127669043074, total=   1.0s
[CV] n_estimators=500, max_depth=15, max_features=50 ....

[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed:   40.1s


[CV] n_estimators=250, max_depth=15, max_features=60 .................
[CV]  n_estimators=500, max_depth=15, max_features=50, score=-0.01441558905397915, total=   4.0s
[CV] n_estimators=500, max_depth=15, max_features=60 .................
[CV]  n_estimators=500, max_depth=15, max_features=50, score=-0.01910712099608059, total=   4.1s
[CV] n_estimators=500, max_depth=15, max_features=60 .................
[CV]  n_estimators=250, max_depth=15, max_features=60, score=-0.022955068870843972, total=   2.2s
[CV] n_estimators=500, max_depth=15, max_features=60 .................
[CV]  n_estimators=250, max_depth=15, max_features=60, score=-0.015152441504675372, total=   2.2s
[CV] n_estimators=500, max_depth=15, max_features=60 .................
[CV]  n_estimators=500, max_depth=15, max_features=50, score=-0.017927866934080414, total=   5.1s
[CV] n_estimators=500, max_depth=15, max_features=60 .................
[CV]  n_estimators=250, max_depth=15, max_features=60, score=-0.018994603614554625, to

[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   46.8s


[CV]  n_estimators=250, max_depth=20, max_features=30, score=-0.023239990567699806, total=   2.0s
[CV] n_estimators=500, max_depth=20, max_features=30 .................
[CV]  n_estimators=250, max_depth=20, max_features=30, score=-0.015958874745534434, total=   1.8s
[CV] n_estimators=500, max_depth=20, max_features=30 .................
[CV]  n_estimators=250, max_depth=20, max_features=30, score=-0.014256914374978188, total=   1.8s
[CV] n_estimators=500, max_depth=20, max_features=30 .................
[CV]  n_estimators=250, max_depth=20, max_features=30, score=-0.020396595254842723, total=   1.8s
[CV]  n_estimators=500, max_depth=15, max_features=60, score=-0.018303698821218316, total=   4.6s
[CV] n_estimators=100, max_depth=20, max_features=40 .................
[CV] n_estimators=100, max_depth=20, max_features=40 .................
[CV]  n_estimators=250, max_depth=20, max_features=30, score=-0.017961045702213036, total=   1.5s
[CV] n_estimators=100, max_depth=20, max_features=40 ....

[Parallel(n_jobs=-1)]: Done 205 tasks      | elapsed:   52.5s


[CV]  n_estimators=100, max_depth=20, max_features=50, score=-0.02281934282197715, total=   1.4s
[CV] n_estimators=100, max_depth=20, max_features=50 .................
[CV]  n_estimators=100, max_depth=20, max_features=50, score=-0.01511408982257566, total=   1.3s
[CV] n_estimators=100, max_depth=20, max_features=50 .................
[CV]  n_estimators=100, max_depth=20, max_features=50, score=-0.014832627599065515, total=   1.3s
[CV] n_estimators=250, max_depth=20, max_features=50 .................
[CV]  n_estimators=500, max_depth=20, max_features=40, score=-0.014420344735676612, total=   3.8s
[CV] n_estimators=250, max_depth=20, max_features=50 .................
[CV]  n_estimators=500, max_depth=20, max_features=40, score=-0.022509074646536897, total=   4.0s
[CV] n_estimators=250, max_depth=20, max_features=50 .................
[CV]  n_estimators=100, max_depth=20, max_features=50, score=-0.01945347652811508, total=   1.1s
[CV] n_estimators=250, max_depth=20, max_features=50 .......

[Parallel(n_jobs=-1)]: Done 226 tasks      | elapsed:  1.0min


[CV]  n_estimators=500, max_depth=20, max_features=50, score=-0.015052887567727833, total=   4.8s
[CV] n_estimators=500, max_depth=20, max_features=60 .................
[CV]  n_estimators=500, max_depth=20, max_features=50, score=-0.01775344555887272, total=   4.3s
[CV]  n_estimators=500, max_depth=20, max_features=50, score=-0.01939970453157779, total=   4.6s
[CV] n_estimators=500, max_depth=20, max_features=60 .................
[CV] n_estimators=500, max_depth=20, max_features=60 .................
[CV]  n_estimators=250, max_depth=20, max_features=60, score=-0.023001105554913123, total=   2.5s
[CV] n_estimators=500, max_depth=20, max_features=60 .................
[CV]  n_estimators=250, max_depth=20, max_features=60, score=-0.018279242398273592, total=   2.1s
[CV]  n_estimators=250, max_depth=20, max_features=60, score=-0.014727624513652207, total=   2.4s
[CV] n_estimators=500, max_depth=20, max_features=60 .................
[CV] n_estimators=100, max_depth=25, max_features=30 ......

[Parallel(n_jobs=-1)]: Done 249 tasks      | elapsed:  1.1min


[CV]  n_estimators=250, max_depth=25, max_features=30, score=-0.017867801529990842, total=   1.4s
[CV] n_estimators=100, max_depth=25, max_features=40 .................
[CV]  n_estimators=100, max_depth=25, max_features=40, score=-0.023191418436490082, total=   1.0s
[CV] n_estimators=100, max_depth=25, max_features=40 .................
[CV]  n_estimators=100, max_depth=25, max_features=40, score=-0.016081158517002817, total=   1.0s
[CV] n_estimators=100, max_depth=25, max_features=40 .................
[CV]  n_estimators=100, max_depth=25, max_features=40, score=-0.01522354701007644, total=   1.2s
[CV] n_estimators=250, max_depth=25, max_features=40 .................
[CV]  n_estimators=100, max_depth=25, max_features=40, score=-0.020023996303265387, total=   1.1s
[CV] n_estimators=250, max_depth=25, max_features=40 .................
[CV]  n_estimators=100, max_depth=25, max_features=40, score=-0.018277846516828577, total=   0.9s
[CV] n_estimators=250, max_depth=25, max_features=40 .....

[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:  1.2min


[CV]  n_estimators=250, max_depth=25, max_features=50, score=-0.022802074770792748, total=   1.6s
[CV] n_estimators=500, max_depth=25, max_features=50 .................
[CV]  n_estimators=500, max_depth=25, max_features=40, score=-0.017857072237444753, total=   4.3s
[CV] n_estimators=500, max_depth=25, max_features=50 .................
[CV]  n_estimators=250, max_depth=25, max_features=50, score=-0.014705616154966667, total=   2.3s
[CV]  n_estimators=250, max_depth=25, max_features=50, score=-0.017918977654693843, total=   2.1s
[CV] n_estimators=500, max_depth=25, max_features=50 .................
[CV] n_estimators=100, max_depth=25, max_features=60 .................
[CV]  n_estimators=250, max_depth=25, max_features=50, score=-0.014519059021868256, total=   2.2s
[CV] n_estimators=100, max_depth=25, max_features=60 .................
[CV]  n_estimators=250, max_depth=25, max_features=50, score=-0.019391976721480592, total=   2.4s
[CV] n_estimators=100, max_depth=25, max_features=60 ....

[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  1.4min finished


CPU times: user 4.3 s, sys: 152 ms, total: 4.46 s
Wall time: 1min 26s


GridSearchCV(cv=5, error_score='raise',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=25,
           max_features=100, max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=-1,
           oob_score=True, random_state=67, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'n_estimators': [100, 250, 500], 'max_depth': [5, 10, 15, 20, 25], 'max_features': [30, 40, 50, 60]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=10)

In [14]:
grid_search_tree.best_params_

{'max_depth': 15, 'max_features': 50, 'n_estimators': 250}

In [15]:
pred_test = grid_search_tree.predict(x_test)
np.sqrt(np.mean((pred_test - y_test)**2))

0.14031845164520926

In [9]:
randomForest.set_params(random_state=55, n_jobs=-1, max_features=100, max_depth=25, n_estimators=1000)
randomForest.fit(x_train, y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=25,
           max_features=100, max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=-1,
           oob_score=True, random_state=55, verbose=0, warm_start=False)

In [16]:
pred_test = randomForest.predict(x_test)
np.sqrt(np.mean((pred_test - y_test)**2))

0.14146642506083301

In [17]:
forest_importance = pd.DataFrame({"feature":list(x_train), "importance":randomForest.feature_importances_}).sort_values(['importance'], ascending=False)
forest_importance
forest_importance.to_csv('rf_importance.csv')

In [12]:
forest_importance

Unnamed: 0,feature,importance
56,OverallQual_clean,0.333073
41,Neighborhood_catN,0.151295
130,GrLivArea_clean,0.119154
189,TotalBath_clean,0.037530
118,TotalBsmtSF_clean,0.036326
58,YearBuilt_clean,0.032248
128,X1stFlrSF_clean,0.026871
185,Garage.interaction_catN,0.016420
111,BsmtFinSF1_clean,0.015992
159,GarageArea_clean,0.015420


In [72]:
housetrain_tree.to_csv('../Data/housetrain_tree.csv')
housetest_tree.to_csv('../Data/housetest_tree.csv')
privtrain_tree.to_csv('../Data/privtrain_tree.csv')
privtest_tree.to_csv('../Data/privtest_tree.csv')