# Final Model Training
This notebook finalizes the model training using the entire dataset for deployment.



In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import pickle
import os
import streamlit as st
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


## Load Processed Data
We load the cleaned and processed dataset to train the final model.


In [None]:
# Load processed dataset
data = pd.read_csv("../data/processed_train.csv")

# Define target variable
target = "SalePrice"

# Separate features (X) and target (y)
X = data.drop(columns=[target])
y = data[target]

print("Processed dataset loaded successfully.")


## Train the Final Model
We train the final model using the entire dataset with the best hyperparameters.



In [None]:
# Load best hyperparameters from tuning
best_params = {'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}

# Initialize the final model with best hyperparameters
final_model = RandomForestRegressor(**best_params, random_state=42)

# Train the model on the entire dataset
final_model.fit(X, y)

print("Final model training completed.")



## Evaluate the Final Model
We evaluate the final model's performance using the dataset.


In [None]:
# Generate predictions
y_pred = final_model.predict(X)

# Compute evaluation metrics
mae = mean_absolute_error(y, y_pred)
mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

# Display metrics
print(f"Final Model Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared Score (RÂ²): {r2:.2f}")


## Save Final Model
We save the trained final model for deployment.


In [None]:
# Ensure models directory exists
os.makedirs("../models", exist_ok=True)

# Save the final trained model
with open("../models/final_model.pkl", "wb") as f:
    pickle.dump(final_model, f)

print("Final model saved successfully.")


## Summary
- Loaded the cleaned and processed dataset.
- Trained the final model using the entire dataset.
- Evaluated the model's performance.
- Saved the final model for deployment.
