In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [2]:
from sklearn.datasets import load_boston

In [6]:
import warnings
warnings.filterwarnings("ignore")

In [7]:
data_dict = load_boston()

In [8]:
x = pd.DataFrame(data_dict.data,columns=data_dict.feature_names)

In [9]:
y = data_dict.target

In [10]:
x

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48


### Train Test Split

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x,y)

### Model Training

In [13]:
rf_model = RandomForestRegressor()
rf_model.fit(x_train, y_train)

### Model Evaluation

#### On Test Dataset

In [14]:
y_pred = rf_model.predict(x_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error: ", mse, "\n")
rmse = np.sqrt(mse)
print("Root Mean Squared Error: ", rmse, "\n")
mae = mean_absolute_error(y_test,y_pred)
print("Mean Absolute Error: ", mae, "\n")
r2 = r2_score(y_test,y_pred)
print("R2 Score: ", r2)

Mean Squared Error:  7.383837259842525 

Root Mean Squared Error:  2.7173217070936824 

Mean Absolute Error:  2.0709448818897656 

R2 Score:  0.9036301106679056


#### On Train Dataset

In [15]:
y_pred_train = rf_model.predict(x_train)

mse = mean_squared_error(y_train, y_pred_train)
print("Mean Squared Error: ", mse, "\n")
rmse = np.sqrt(mse)
print("Root Mean Squared Error: ", rmse, "\n")
mae = mean_absolute_error(y_train, y_pred_train)
print("Mean Absolute Error: ", mae, "\n")
r2 = r2_score(y_train, y_pred_train)
print("R2 Score: ", r2)

Mean Squared Error:  1.6140130976253295 

Root Mean Squared Error:  1.2704381518300407 

Mean Absolute Error:  0.860791556728231 

R2 Score:  0.9813978401952618


### Hyperparameter Tuning

In [16]:
rf_model = RandomForestRegressor()

param_grid = {"n_estimators": np.arange(80,200),
             "criterion":["mse","mae"],
             "max_depth":np.arange(3,10),
             "min_samples_split":np.arange(10,15),
             "min_samples_leaf": np.arange(3,8),
             "random_state":[11]}

rscv_rf_model = RandomizedSearchCV(rf_model, param_grid, cv=5)
rscv_rf_model.fit(x_train,y_train)
rscv_rf_model.best_estimator_

In [19]:
rscv_rf_model

In [17]:
new_rf_model = rscv_rf_model.best_estimator_

### Evaluation of New Model

### On Test Dataset

In [18]:
y_pred = new_rf_model.predict(x_test)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error: ", mse, "\n")
rmse = np.sqrt(mse)
print("Root Mean Squared Error: ", rmse, "\n")
mae = mean_absolute_error(y_test,y_pred)
print("Mean Absolute Error: ", mae, "\n")
r2 = r2_score(y_test,y_pred)
print("R2 Score: ", r2)

Mean Squared Error:  8.117208324116342 

Root Mean Squared Error:  2.8490714845570904 

Mean Absolute Error:  2.098385140199601 

R2 Score:  0.8940585443106948


#### On Train Dataset

In [20]:
y_pred_train = new_rf_model.predict(x_train)

mse = mean_squared_error(y_train, y_pred_train)
print("Mean Squared Error: ", mse, "\n")
rmse = np.sqrt(mse)
print("Root Mean Squared Error: ", rmse, "\n")
mae = mean_absolute_error(y_train, y_pred_train)
print("Mean Absolute Error: ", mae, "\n")
r2 = r2_score(y_train, y_pred_train)
print("R2 Score: ", r2)

Mean Squared Error:  5.332386434318135 

Root Mean Squared Error:  2.309196058007664 

Mean Absolute Error:  1.4644102329175193 

R2 Score:  0.9385420696165683
