# Hyperparameter Tuning
This notebook optimizes the trained model's hyperparameters to improve performance.


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import pickle
import streamlit as st
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


## Load Processed Data
We load the processed training dataset to fine-tune the model.



In [None]:
# Load processed dataset
data = pd.read_csv("../data/processed_train.csv")

# Define target variable
target = "SalePrice"

# Separate features (X) and target (y)
X = data.drop(columns=[target])
y = data[target]

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Processed dataset loaded successfully.")


## Define Hyperparameter Grid
We define a grid of hyperparameters to test different model configurations.


In [None]:
# Define hyperparameter grid
param_grid = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 10, 20, 30],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}

# Initialize the model
rf = RandomForestRegressor(random_state=42)

# Grid search cross-validation
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, 
                           cv=3, n_jobs=-1, verbose=2, scoring="r2")


## Train with Hyperparameter Tuning
We train the model using GridSearchCV to find the optimal hyperparameters.


In [None]:
# Train the model with hyperparameter tuning
grid_search.fit(X_train, y_train)

# Get best parameters
best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")


## Evaluate the Optimized Model
We use the best model found in hyperparameter tuning and evaluate its performance.


In [None]:
# Get the best estimator from GridSearchCV
best_model = grid_search.best_estimator_

# Generate predictions
y_pred = best_model.predict(X_test)

# Compute evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Display metrics
print(f"Optimized Model Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared Score (R²): {r2:.2f}")


## Save Optimized Model
We save the best model from hyperparameter tuning for deployment.


In [None]:
# Ensure models directory exists
os.makedirs("../models", exist_ok=True)

# Save the optimized model
with open("../models/optimized_model.pkl", "wb") as f:
    pickle.dump(best_model, f)

print("Optimized model saved successfully.")


## Summary
- Loaded processed training data.
- Defined a grid of hyperparameters.
- Used GridSearchCV for model tuning.
- Evaluated the best-tuned model.
- Saved the optimized model.
