# Model Evaluations


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.metrics import  accuracy_score ,mean_squared_error , mean_absolute_error ,r2_score

# Load Linear Model


In [None]:
# loading thee preprocessed data @sabinvankathmandu
linear_model = joblib.load("../model/linear_regression_model.pkl")

In [None]:
# loading thee preprocessed data @sabinvankathmandu
X_train = pd.read_pickle("../data/X_train.pkl")
X_test = pd.read_pickle("../data/X_test.pkl")
y_train = pd.read_pickle("../data/y_train.pkl")
y_test = pd.read_pickle("../data/y_test.pkl")

In [None]:
# Linear model predictions
y_train_pred = linear_model.predict(X_train)
y_test_pred = linear_model.predict(X_test)



In [None]:
y_test_pred

# Evaluate Linear Regression

In [None]:
# Evaluate Linear Regression
mae_lr = mean_absolute_error(y_test, y_test_pred)
mse_lr = mean_squared_error(y_test, y_test_pred)
r2_lr = r2_score(y_test, y_test_pred)
rmse = np.sqrt(mse_lr)

print(f"Linear Regression MAE {mae_lr:.4f}, MSE: {mse_lr:.4f}, R²: {r2_lr:.4f} RMSE {rmse:.4f}")

# Visualize actual vs precited linear regression

In [None]:
# Creating the actual vs predicted price linear plot @sabinvankathmandu
plt.figure(figsize=(8, 6))
sns.scatterplot(x=y_test, y=y_test_pred, alpha=0.7)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "--r", lw=2)  # Identity line
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.show()

# loading lasso and ridge models


In [None]:
# Load the saved models
ridge_best = joblib.load("../model/ridge_model.pkl")
lasso_best = joblib.load("../model/lasso_model.pkl")

print("Ridge and Lasso models loaded!")

In [None]:
# make predictions
# Ridge Model Predictions
y_pred_ridge = ridge_best.predict(X_test)

# Lasso Model Predictions
y_pred_lasso = lasso_best.predict(X_test)


# Evaluate Ridge Model


In [None]:
# Evaluate Ridge Model
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

# Evaluate Lasso Model
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

# Display Results
print(f"Ridge Model MSE: {mse_ridge:.4f}, R²: {r2_ridge:.4f}")
print(f"Lasso Model MSE: {mse_lasso:.4f}, R²: {r2_lasso:.4f}")


# Visualize Actual vs Predicted Values

In [None]:
plt.figure(figsize=(12, 6))

# Ridge Model: Actual vs Predicted
sns.scatterplot(x=y_test, y=y_pred_ridge, label="Ridge", alpha=0.7)

# Lasso Model: Actual vs Predicted
sns.scatterplot(x=y_test, y=y_pred_lasso, label="Lasso", alpha=0.7)

# Identity Line (for reference)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "--r", lw=2)

# Titles and labels
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted Prices (Ridge & Lasso)")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(12, 6))

# Linear Regression Residuals
sns.histplot(y_test - y_test_pred, bins=30, kde=True, color="blue", label="Linear Regression", alpha=0.6)

# Ridge Residuals
sns.histplot(y_test - y_pred_ridge, bins=30, kde=True, color="green", label="Ridge", alpha=0.6)

# Lasso Residuals
sns.histplot(y_test - y_pred_lasso, bins=30, kde=True, color="orange", label="Lasso", alpha=0.6)

plt.xlabel("Residuals (Error)")
plt.title("Distribution of Residuals (Linear, Ridge & Lasso)")
plt.legend()
plt.show()


# Residual analysis

In [None]:
plt.figure(figsize=(12, 6))

# Ridge Residuals
sns.histplot(y_test - y_pred_ridge, bins=30, kde=True, color="green", label="Ridge Residuals", alpha=0.6)

# Lasso Residuals
sns.histplot(y_test - y_pred_lasso, bins=30, kde=True, color="orange", label="Lasso Residuals", alpha=0.6)

plt.xlabel("Residuals")
plt.title("Distribution of Residuals (Ridge & Lasso)")
plt.legend()
plt.show()


# Feature Importance (Ridge and Lasso Models)

In [None]:
# Get feature names
feature_names = X_train.columns

# Get coefficients for Ridge and Lasso models
ridge_coefs = ridge_best.coef_
lasso_coefs = lasso_best.coef_

# Create DataFrame for easier comparison
coefs_df = pd.DataFrame({
    "Feature": feature_names,
    "Ridge Coefficients": ridge_coefs,
    "Lasso Coefficients": lasso_coefs
})

# Sort by absolute importance
coefs_df['Ridge Importance'] = np.abs(coefs_df['Ridge Coefficients'])
coefs_df['Lasso Importance'] = np.abs(coefs_df['Lasso Coefficients'])
coefs_df_sorted = coefs_df.sort_values(by="Ridge Importance", ascending=False)

# Plot Feature Importance (Ridge & Lasso)
plt.figure(figsize=(12, 8))
sns.barplot(x="Ridge Importance", y="Feature", data=coefs_df_sorted, color="blue", label="Ridge", alpha=0.7)
sns.barplot(x="Lasso Importance", y="Feature", data=coefs_df_sorted, color="orange", label="Lasso", alpha=0.7)
plt.title("Feature Importance (Ridge & Lasso Models)")
plt.legend()
plt.show()


# Model evaluation metrics comparisons


In [None]:
# Evaluate Ridge and Lasso models
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
r2_ridge = r2_score(y_test, y_pred_ridge)

mse_lasso = mean_squared_error(y_test, y_pred_lasso)
r2_lasso = r2_score(y_test, y_pred_lasso)

# Display Results
results_df = pd.DataFrame({
    "Model": ["Linear Regression", "Ridge", "Lasso"],
    "MSE": [mse_lr, mse_ridge, mse_lasso],
    "R²": [r2_lr, r2_ridge, r2_lasso]
})

print(results_df)


# Save evaluatoins results


In [None]:
# Create a DataFrame for the results
results_df = pd.DataFrame({
    "Model": ["Ridge", "Lasso"],
    "MSE": [mse_ridge, mse_lasso],
    "R²": [r2_ridge, r2_lasso]
})

# Save results to CSV
results_df.to_csv("../model/ridge_lasso_evaluation_results.csv", index=False)
print("Evaluation results saved to CSV!")
