# Model Training
Training multiple machine learning models to predict housing prices.


In [None]:
import pandas as pd
import streamlit as st
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error


## Load Processed Data
Loading the transformed dataset for model training.


In [None]:
st.title("Model Training")

# Load processed dataset
data = pd.read_csv("../data/processed_train.csv")

# Separate features and target variable
X = data.drop(columns=["SalePrice"])
y = data["SalePrice"]

st.write("Processed Data Sample", data.head())



## Splitting Data into Training and Testing Sets
Dividing data into 80% training and 20% testing.


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

st.write("Training and Testing Data Split Completed!")


## Training Multiple Models
Comparing Linear Regression and Random Forest Regression.


In [None]:
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42)
}

model_performance = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    model_performance[name] = mae

st.write("Model Performance (Lower MAE is better)", model_performance)


## Save Best Model
Saving the best performing model for later use.


In [None]:
import pickle

best_model = min(model_performance, key=model_performance.get)
final_model = models[best_model]

with open("../models/trained_model.pkl", "wb") as f:
    pickle.dump(final_model, f)

st.write(f"Best Model '{best_model}' Saved Successfully!")
