# House Price Prediction - Model Development

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

## 2. Load Dataset
Loading the Ames Housing dataset from OpenML (which corresponds to the 'House Prices' dataset).

In [None]:
# Fetch dataset
housing = fetch_openml(name="house_prices", as_frame=True)
df = housing.frame
df.head()

## 3. Data Preprocessing
We need to select 6 specific features.
Selected features: 
1. `OverallQual`
2. `GrLivArea`
3. `GarageCars`
4. `FullBath`
5. `YearBuilt`
6. `TotalBsmtSF`

And the Target: `SalePrice`

In [None]:
features = ['OverallQual', 'GrLivArea', 'GarageCars', 'FullBath', 'YearBuilt', 'TotalBsmtSF']
target = 'SalePrice'

X = df[features].copy()
y = df[target]

# Check for missing values
print(X.isnull().sum())

### Handling Missing Values
Imputing missing values with median.

In [None]:
X = X.fillna(X.median())
print("Missing values after imputation:")
print(X.isnull().sum())

### Feature Scaling

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

## 4. Model Training
Using Random Forest Regressor.

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

## 5. Evaluation

In [None]:
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R2 Score: {r2:.4f}")

## 6. Save Model

In [None]:
joblib.dump(model, 'house_price_model.pkl')
joblib.dump(scaler, 'scaler.pkl')
print("Model and scaler saved successfully.")