In [1]:
# 4_Model_Evaluation.ipynb

import joblib
from sklearn.metrics import mean_squared_error, r2_score

# Load the preprocessed data
X_train, X_test, y_train, y_test = joblib.load('data/split_data.pkl')

# Load all the trained models
models = {
    'Adaboost': joblib.load('models/adaboost_model.pkl'),
    'DecisionTree': joblib.load('models/decisiontree_model.pkl'),
    'GradientBoosting': joblib.load('models/gradientboosting_model.pkl'),
    'KNeighbors': joblib.load('models/kneighbors_model.pkl'),
    'Lasso': joblib.load('models/lasso_model.pkl'),
    'Ridge': joblib.load('models/ridge_model.pkl'),
    'XGBoost': joblib.load('models/xgboost_model.pkl')
}

# Initialize dictionaries to store evaluation results
mse_results = {}
r2_results = {}

# Evaluate each model
for model_name, model in models.items():
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mse_results[model_name] = mse
    r2_results[model_name] = r2
    print(f"{model_name} - Mean Squared Error: {mse:.2f}, R² Score: {r2:.2f}")

# Find the best model based on R² Score
best_model_name = max(r2_results, key=r2_results.get)
best_model = models[best_model_name]

print(f"\nBest Model: {best_model_name} with R² Score: {r2_results[best_model_name]:.2f}")


Adaboost - Mean Squared Error: 3.96, R² Score: 0.81
DecisionTree - Mean Squared Error: 2.18, R² Score: 0.90
GradientBoosting - Mean Squared Error: 1.56, R² Score: 0.93
KNeighbors - Mean Squared Error: 1.74, R² Score: 0.92
Lasso - Mean Squared Error: 2.53, R² Score: 0.88
Ridge - Mean Squared Error: 2.32, R² Score: 0.89
XGBoost - Mean Squared Error: 1.54, R² Score: 0.93

Best Model: XGBoost with R² Score: 0.93
