# Model Evaluation

In this notebook, we will evaluate the performance of our trained linear regression model using various metrics. We will calculate the Mean Squared Error (MSE) and R-squared values, and visualize the residuals to understand the model's performance better.

In [None]:
import sys
import os
if not os.path.exists('../models'):
    os.makedirs('../models')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, r2_score

# Add the src directory to the Python path
sys.path.append(os.path.abspath('../src'))

# Import the load_model function
from model import load_model

# Load the processed data
data = pd.read_csv('../data/processed/processed_data.csv')
X = data.drop('Price', axis=1)  # Features
y = data['Price']  # Target variable

# Load the trained model
model = load_model('../models/linear_regression_model.pkl')  

In [None]:
# Make predictions
y_pred = model.predict(X)

# Calculate evaluation metrics
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

In [3]:
# Visualize residuals
residuals = y - y_pred

plt.figure(figsize=(10, 6))
sns.scatterplot(x=y_pred, y=residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('Residuals vs Predicted Values')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.show()

In [None]:
# Visualize actual vs predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y, y_pred)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.title('Actual vs Predicted Values')
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.show()