# 04 - Model Training
This notebook trains regression models and evaluates performance on validation data.

In [31]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
%matplotlib inline

In [32]:
# Load processed features and targets
X_train = np.load("/Users/alaminbinshafiq/Desktop/Projects/housing-price-prediction/data/X_train.npy")
X_val = np.load("/Users/alaminbinshafiq/Desktop/Projects/housing-price-prediction/data/X_val.npy")
y_train = pd.read_csv("/Users/alaminbinshafiq/Desktop/Projects/housing-price-prediction/data/y_train.csv").values.ravel()
y_val = pd.read_csv("/Users/alaminbinshafiq/Desktop/Projects/housing-price-prediction/data/y_val.csv").values.ravel()

In [33]:
# Train Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_preds = lr_model.predict(X_val)

In [34]:
# Train Random Forest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_val)

In [35]:
# Train XGBoost
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
xgb_model.fit(X_train, y_train)
xgb_preds = xgb_model.predict(X_val)

In [36]:
# Evaluation Function
def evaluate_model(name, y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{name} RMSE: {rmse:.2f}")
    print(f"{name} R^2 Score: {r2:.2f}\n")
    return rmse, r2

In [37]:
# Evaluate All Models
evaluate_model("Linear Regression", y_val, lr_preds)
evaluate_model("Random Forest", y_val, rf_preds)
evaluate_model("XGBoost", y_val, xgb_preds)

Linear Regression RMSE: 886560482.18
Linear Regression R^2 Score: 0.88

Random Forest RMSE: 864614578.20
Random Forest R^2 Score: 0.89

XGBoost RMSE: 631114048.00
XGBoost R^2 Score: 0.92



(631114048.0, 0.9177200794219971)