In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

from tools.common_methods import get_train_test_data, print_results

In [2]:
X_train, X_test, y_train, y_test = get_train_test_data()

In [20]:
random_forest = RandomForestRegressor(random_state=1, n_jobs=-1)
random_forest.fit(X_train, y_train)

y_pred = random_forest.predict(X_test)

print_results(y_pred, y_test)

MSE: 53.05728123282293 	R2 score: 0.9613631159140079


In [4]:
rf_params = {'max_depth': [None, 5, 10, 13, 15, 17],
             'random_state': [1],
             'n_estimators': [2, 10, 20, 50, 87, 100],
             'n_jobs': [-1],
             'max_features': [3, 5, 10, 15, 20, 24]
             }

rf_grid = GridSearchCV(RandomForestRegressor(), rf_params, cv=5, n_jobs=-1, scoring='r2')
rf_grid.fit(X_train, y_train)
rf_grid.best_params_

{'max_depth': 17,
 'max_features': 15,
 'n_estimators': 100,
 'n_jobs': -1,
 'random_state': 1}

In [5]:
y_pred = rf_grid.best_estimator_.predict(X_test)

print_results(y_test, y_pred)

MSE: 51.539236703532055 	R2 score: 0.9667849500468195


In [7]:
rf_params = {'max_depth': [15, 16, 17, 18, 19, 20],
             'random_state': [1],
             'n_estimators': [87, 100, 110, 120, 150],
             'n_jobs': [-1],
             'max_features': [13, 14, 15, 16, 17, 18]
             }

rf_grid = GridSearchCV(RandomForestRegressor(), rf_params, cv=5, n_jobs=-1, scoring='r2')
rf_grid.fit(X_train, y_train)
print(rf_grid.best_params_)

y_pred = rf_grid.best_estimator_.predict(X_test)
print_results(y_test, y_pred)

{'max_depth': 16, 'max_features': 13, 'n_estimators': 150, 'n_jobs': -1, 'random_state': 1}
MSE: 47.76823004320308 	R2 score: 0.9692152183745617


In [8]:
rf_params = {'max_depth': [15, 16, 17, 18],
             'random_state': [1],
             'n_estimators': [120, 150, 170, 200, 220],
             'n_jobs': [-1],
             'max_features': [12, 13, 14, 15, 16]
             }

rf_grid = GridSearchCV(RandomForestRegressor(), rf_params, cv=5, n_jobs=-1, scoring='r2')
rf_grid.fit(X_train, y_train)
print(rf_grid.best_params_)

y_pred = rf_grid.best_estimator_.predict(X_test)
print_results(y_test, y_pred)

{'max_depth': 16, 'max_features': 13, 'n_estimators': 150, 'n_jobs': -1, 'random_state': 1}
MSE: 47.76823004320308 	R2 score: 0.9692152183745617


In [9]:
rf_params = {'max_depth': [15, 16, 17],
             'random_state': [1],
             'n_estimators': [140, 145, 150, 155, 160],
             'n_jobs': [-1],
             'max_features': [12, 13, 14]
             }

rf_grid = GridSearchCV(RandomForestRegressor(), rf_params, cv=5, n_jobs=-1, scoring='r2')
rf_grid.fit(X_train, y_train)
print(rf_grid.best_params_)

y_pred = rf_grid.best_estimator_.predict(X_test)
print_results(y_test, y_pred)

{'max_depth': 16, 'max_features': 13, 'n_estimators': 150, 'n_jobs': -1, 'random_state': 1}
MSE: 47.76823004320308 	R2 score: 0.9692152183745617


In [10]:
rf_params = {'max_depth': [16],
             'random_state': [1],
             'n_estimators': [146, 147, 148, 149, 150, 151, 152, 153],
             'n_jobs': [-1],
             'max_features': [13, 'auto']
             }

rf_grid = GridSearchCV(RandomForestRegressor(), rf_params, cv=5, n_jobs=-1, scoring='r2')
rf_grid.fit(X_train, y_train)
print(rf_grid.best_params_)

y_pred = rf_grid.best_estimator_.predict(X_test)
print_results(y_test, y_pred)

{'max_depth': 16, 'max_features': 13, 'n_estimators': 151, 'n_jobs': -1, 'random_state': 1}
MSE: 47.67195155832302 	R2 score: 0.9692772661441688


In [19]:
best_random_forest = RandomForestRegressor(random_state=1, n_jobs=-1, n_estimators=151, max_depth=16, max_features=13)
best_random_forest.fit(X_train, y_train)

y_pred = best_random_forest.predict(X_test)

print_results(y_pred, y_test)

MSE: 47.67195155832302 	R2 score: 0.9652607765396858
