In [76]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import mean_squared_error 
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor


### Desision  Regressor Tree Algo

In [77]:
# Load the California Housing dataset
california = fetch_california_housing()
X = california.data
y = california.target



Train Test Split

In [78]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [79]:
regressor  = DecisionTreeRegressor()
regressor.fit(X_train, y_train)

In [80]:
Y_predict = regressor.predict(X_test)
print(Y_predict)

[0.425   1.203   5.00001 ... 5.00001 0.656   2.071  ]


In [81]:
mse = mean_squared_error(y_test, Y_predict)
# print(f"{mse:.2f}")
print(mse)

rmse = mean_squared_error(y_test, Y_predict, squared=False)
print(rmse)

0.4939142129400193
0.7027903050981987


Random Forest Regression 

In [82]:
model = RandomForestRegressor(n_estimators=50, random_state=42)

model.fit(X_train, y_train)

In [83]:
y_pred = model.predict(X_test)
Random_Forest_MSE = mean_squared_error(y_test, y_pred)
Random_Forest_MSE

0.2572979293772426

HyperParaMeter Tunning

In [84]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [85]:
param_grid = {
    'max_depth': [2, 5, 10, 20],
    'min_samples_split': [3, 2, 5, 10],
    'min_samples_leaf': [3, 1, 2, 4],
    'max_features': [1.0, 'sqrt']
}

In [86]:
regressor = DecisionTreeRegressor(random_state=41)
#regressor.fit(X_train, y_train)

In [87]:
grid_search = GridSearchCV(regressor, param_grid, cv=5)


In [88]:
grid_search.fit(X_train, y_train)

In [89]:
print("Best Hyperparameters:", grid_search.best_params_)

Best Hyperparameters: {'max_depth': 10, 'max_features': 1.0, 'min_samples_leaf': 4, 'min_samples_split': 10}


In [90]:
best_model = grid_search.best_estimator_

In [91]:
Y_predict = best_model.predict(X_test)
DTR_mse = mean_squared_error(y_test,Y_predict)
DTR_mse

0.40917439790955007

### HyperParameter Tunning for Random Forest

In [92]:
rfr_regressor = RandomForestRegressor(n_estimators = 10)

In [93]:
param_grid = {
    'max_depth': [2, 5, 10, 20],
    'min_samples_split': [3, 2, 5, 10],
    'min_samples_leaf': [3, 1, 2, 4],
    'max_features': [1.0, 'sqrt']
}

In [94]:
grid_search_RFR = GridSearchCV(rfr_regressor, param_grid, cv=5)

In [95]:
grid_search_RFR.fit(X_train, y_train)

In [None]:
print("Best Hyperparameters:", grid_search_RFR.best_params_)

Best Hyperparameters: {'max_depth': 20, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'min_samples_split': 2}


In [None]:
best_model = grid_search_RFR.best_estimator_

In [None]:
Y_predict = best_model.predict(X_test)
RFR_mse = mean_squared_error(y_test,Y_predict)
RFR_mse

0.276639958024201

### QNo-04 Making a Table for showing evaluation criteria

In [75]:
data = {"ERRORS": ["DTR_MSE", "DTR_RMSE", "Random_Forest_MSE", "DTR_HPT_MSE", "Random_Forest_HPT_MSE" ],
        "Values": [mse, rmse, Random_Forest_MSE, DTR_mse, RFR_mse]}


# data = {"DTR_MSE": [mse],
#         "DTR_RMSE": [rmse],
#         "Random_Forest_MSE": [Random_Forest_MSE],
#         "DTR_HPT_MSE": [DTR_mse],
#         "Random_Forest_HPT_MSE":[RFR_mse] }

result = pd.DataFrame(data)
result

Unnamed: 0,ERRORS,Values
0,DTR_MSE,0.498092
1,DTR_RMSE,0.705756
2,Random_Forest_MSE,0.257298
3,DTR_HPT_MSE,0.409174
4,Random_Forest_HPT_MSE,0.27664
