In [1]:
# Import Libraries
import numpy as np
import pandas as pd
import joblib
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
# Load the dataset
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['Price'] = data.target  # Target variable

In [3]:
# Handle missing values (if any)
df.fillna(df.median(), inplace=True)

In [4]:
# Split into train and test sets
X = df.drop(columns=['Price'])
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Train RandomForest Model with Hyperparameter Tuning
param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [10, 20, None]}
rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train_scaled, y_train)

In [7]:
# Best Model
best_model = grid_search.best_estimator_

In [8]:
# Model Evaluation
y_pred = best_model.predict(X_test_scaled)
print(f"RMSE: {mean_squared_error(y_test, y_pred, squared=False)}")
print(f"MAE: {mean_absolute_error(y_test, y_pred)}")
print(f"R² Score: {r2_score(y_test, y_pred)}")

RMSE: 0.5044841750221551
MAE: 0.3271167686567187
R² Score: 0.8057825556190614


In [9]:
# Save the model & scaler
joblib.dump(best_model, 'house_price_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [10]:
from fastapi import FastAPI
import joblib
import numpy as np
from pydantic import BaseModel

# Load trained model and scaler
model = joblib.load("house_price_model.pkl")
scaler = joblib.load("scaler.pkl")

In [11]:
# Initialize FastAPI
app = FastAPI()

In [12]:
# Define request body schema
class HouseFeatures(BaseModel):
    MedInc: float
    HouseAge: float
    AveRooms: float
    AveBedrms: float
    Population: float
    AveOccup: float
    Latitude: float
    Longitude: float

@app.get("/")
def home():
    return {"message": "House Price Prediction API is running"}

@app.post("/predict")
def predict_price(features: HouseFeatures):
    data = np.array([[features.MedInc, features.HouseAge, features.AveRooms, 
                      features.AveBedrms, features.Population, features.AveOccup, 
                      features.Latitude, features.Longitude]])
    
    # Scale input data
    data_scaled = scaler.transform(data)
    
    # Predict price
    prediction = model.predict(data_scaled)
    return {"predicted_price": float(prediction[0])}