In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import joblib


In [2]:
X = joblib.load("../models/X_processed.pkl")
y = joblib.load("../models/y.pkl")

print("X shape:", X.shape)
print("y shape:", y.shape)


X shape: (1338, 11)
y shape: (1338,)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

print("Train size:", X_train.shape)
print("Test size:", X_test.shape)


Train size: (1070, 11)
Test size: (268, 11)


In [4]:
rf_model = RandomForestRegressor(
    random_state=42
)

rf_model.fit(X_train, y_train)


In [5]:
rf_preds = rf_model.predict(X_test)

rf_mae = mean_absolute_error(y_test, rf_preds)
rf_mse = mean_squared_error(y_test, rf_preds)
rf_rmse = np.sqrt(rf_mse)
rf_r2 = r2_score(y_test, rf_preds)

rf_metrics = {
    "Model": "Random Forest",
    "MAE": rf_mae,
    "MSE": rf_mse,
    "RMSE": rf_rmse,
    "R2": rf_r2
}

rf_metrics


{'Model': 'Random Forest',
 'MAE': 2540.9934754008714,
 'MSE': 21051201.598936763,
 'RMSE': 4588.1588463060825,
 'R2': 0.8644033906321982}

In [6]:
gb_model = GradientBoostingRegressor(
    random_state=42
)

gb_model.fit(X_train, y_train)


In [7]:
gb_preds = gb_model.predict(X_test)

gb_mae = mean_absolute_error(y_test, gb_preds)
gb_mse = mean_squared_error(y_test, gb_preds)
gb_rmse = np.sqrt(gb_mse)
gb_r2 = r2_score(y_test, gb_preds)

gb_metrics = {
    "Model": "Gradient Boosting",
    "MAE": gb_mae,
    "MSE": gb_mse,
    "RMSE": gb_rmse,
    "R2": gb_r2
}

gb_metrics


{'Model': 'Gradient Boosting',
 'MAE': 2405.961837013303,
 'MSE': 18798533.400645092,
 'RMSE': 4335.727551477963,
 'R2': 0.8789134492758081}

In [8]:
comparison_df = pd.DataFrame([rf_metrics, gb_metrics])
comparison_df


Unnamed: 0,Model,MAE,MSE,RMSE,R2
0,Random Forest,2540.993475,21051200.0,4588.158846,0.864403
1,Gradient Boosting,2405.961837,18798530.0,4335.727551,0.878913


In [9]:
joblib.dump(rf_model, "../models/random_forest_regressor.pkl")
joblib.dump(gb_model, "../models/gradient_boosting_regressor.pkl")


['../models/gradient_boosting_regressor.pkl']