<a href="https://colab.research.google.com/github/Hadia-git-sketch/Syntecxhub_HousePricePrediction/blob/main/house_price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import joblib
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


# 1. DATA LOADING & CLEANING
raw_data = {
    'area_sqft': [900, 1200, 1500, 1800, 2250, 2700, 3500, 4500, 5400, 1100],
    'bedrooms': [2, 2, 3, 3, 4, 4, 5, 5, 6, 2],
    'bathrooms': [1, 2, 2, 3, 3, 4, 4, 5, 5, 1],
    'age_years': [15, 10, 8, 5, 3, 2, 1, 0, 1, 12],
    'price_pkr': [8500000, 12000000, 15500000, 19000000, 25000000, 31000000, 42000000, 55000000, 68000000, 10500000]
}
df = pd.DataFrame(raw_data)

# Data Cleaning: Removing any potential nulls
df.dropna(inplace=True)


# 2. FEATURE SELECTION & MODEL TRAINING
features = df[['area_sqft', 'bedrooms', 'bathrooms', 'age_years']]
target = df['price_pkr']

# Split: 80% Train, 20% Test
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Training Linear Regression
house_model = LinearRegression()
house_model.fit(x_train, y_train)

# 3. EVALUATION & INTERPRETATION
y_pred = house_model.predict(x_test)

# Metrics
rmse_val = np.sqrt(mean_squared_error(y_test, y_pred))
r2_val = r2_score(y_test, y_pred)

print("--- Project Evaluation Metrics ---")
print(f"RMSE: PKR {rmse_val:,.2f}")
print(f"R² Score: {r2_val:.4f}")

# Coefficient Interpretation
impact_study = pd.DataFrame(house_model.coef_, features.columns, columns=['PKR Impact'])
print("\n--- Feature Coefficients (Price change per unit) ---")
print(impact_study)

# Save the finalized model
joblib.dump(house_model, 'pkr_house_predictor.pkl')


# 4. INTERACTIVE INTERFACE (Gradio)
def estimate_price(area, beds, baths, age):
    # Load the saved model logic
    predictor = joblib.load('pkr_house_predictor.pkl')

    # Format input and predict
    input_data = np.array([[area, beds, baths, age]])
    prediction = predictor.predict(input_data)[0]

    # Return formatted PKR string
    return f"PKR {max(0, prediction):,.0f}"



app = gr.Interface(
    fn=estimate_price,
    inputs=[
        gr.Number(label="Total Area (Sq Ft)", value=1800),
        gr.Number(label="Number of Bedrooms", value=3),
        gr.Number(label="Number of Bathrooms", value=2),
        gr.Number(label="House Age (Years)", value=5)
    ],
    outputs=gr.Textbox(label="Estimated Property Value"),
    title="Pakistan Real Estate Price Estimator",
    description="Professional tool to predict house prices in PKR using trained Linear Regression analysis."
)

if __name__ == "__main__":
    app.launch()

--- Project Evaluation Metrics ---
RMSE: PKR 404,787.24
R² Score: 0.9998

--- Feature Coefficients (Price change per unit) ---
              PKR Impact
area_sqft   13223.959120
bedrooms   609707.748074
bathrooms  166798.821930
age_years  236992.146201
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://17e40ce282f04546ec.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
