<a href="https://colab.research.google.com/github/Shreyansh-023/House-price-predicting-model/blob/master/AI_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error
import gradio as gr
import re
from tensorflow.keras.callbacks import EarlyStopping


city_data = {
    "Bengaluru": {"file": "bengaluru_converted.csv"},
    "Ahmedabad": {"file": "ahmedabad_converted.csv"},
    "Mumbai": {"file": "mumbai_converted.csv"},
    "Delhi": {"file": "delhi_converted.csv"}
}


def preprocess_data(df, city):
    def convert_sqft_to_number(x):
        try:
            if isinstance(x, str):
                tokens = x.split('-')
                if len(tokens) == 2:
                    return (float(tokens[0]) + float(tokens[1])) / 2
                return float(x)
            return float(x) if pd.notna(x) else None
        except (ValueError, TypeError):
            return None

    if city == "Bengaluru":
        df = df.drop(columns=['area_type', 'availability', 'society'], errors='ignore')
        df['location'] = df['location'].fillna('Unknown')
        df['total_sqft'] = df['total_sqft'].apply(convert_sqft_to_number)
        df['bath'] = pd.to_numeric(df['bath'], errors='coerce')
        df['balcony'] = pd.to_numeric(df['balcony'], errors='coerce').fillna(0)
        df['bhk'] = df['size'].apply(lambda x: int(x.split(' ')[0]) if isinstance(x, str) and x.split(' ')[0].isdigit() else None)
        df = df.drop(columns=['size'], errors='ignore')
    elif city == "Mumbai":
        df['location'] = df['locality'].fillna('Unknown')
        df['total_sqft'] = df['area'].apply(convert_sqft_to_number)
        df['bath'] = pd.to_numeric(df['bathroom_num'], errors='coerce')
        df['balcony'] = pd.to_numeric(df['balcony_num'], errors='coerce').fillna(0)
        df['bhk'] = pd.to_numeric(df['bedroom_num'], errors='coerce')
        df = df.drop(columns=[col for col in df.columns if col not in ['location', 'total_sqft', 'bath', 'balcony', 'bhk', 'Price_Lakhs']], errors='ignore')
    elif city == "Delhi":
        df['location'] = df['Address'].fillna('Unknown')
        df['total_sqft'] = df['area'].apply(convert_sqft_to_number)
        df['bath'] = pd.to_numeric(df['Bathrooms'], errors='coerce')
        df['balcony'] = pd.to_numeric(df['Balcony'], errors='coerce').fillna(0)
        df['bhk'] = pd.to_numeric(df['Bedrooms'], errors='coerce')
        df = df.drop(columns=[col for col in df.columns if col not in ['location', 'total_sqft', 'bath', 'balcony', 'bhk', 'Price_Lakhs']], errors='ignore')
    elif city == "Ahmedabad":
        df['location'] = df['Location'].fillna('Unknown')
        df['total_sqft'] = df['Total_Area'].apply(convert_sqft_to_number)
        df['bath'] = pd.to_numeric(df['Baths'], errors='coerce')
        df['balcony'] = df['Balcony'].map({'Yes': 1, 'No': 0}).fillna(0)
        df['bhk'] = df['Property Title'].apply(lambda x: int(re.search(r'(\d+)\s*BHK', str(x)).group(1)) if re.search(r'(\d+)\s*BHK', str(x)) else None)
        df = df.drop(columns=[col for col in df.columns if col not in ['location', 'total_sqft', 'bath', 'balcony', 'bhk', 'Price_Lakhs']], errors='ignore')

    df = df.dropna(subset=['total_sqft', 'bath', 'bhk', 'Price_Lakhs'])
    le = LabelEncoder()
    df['location'] = le.fit_transform(df['location'])
    location_mapping = dict(zip(le.classes_, le.transform(le.classes_)))

    return df, le, location_mapping


def train_models(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    ff_model = keras.Sequential([
        keras.layers.Input(shape=(X_train_scaled.shape[1],)),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(1)
    ])
    ff_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    ff_model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, verbose=0,
                 validation_split=0.2, callbacks=[early_stopping])

    cnn_input = tf.keras.Input(shape=(X_train_scaled.shape[1], 1))
    x = tf.keras.layers.Conv1D(32, kernel_size=2, activation='relu')(cnn_input)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(16, activation='relu')(x)
    cnn_output = tf.keras.layers.Dense(1)(x)
    cnn_model = tf.keras.Model(inputs=cnn_input, outputs=cnn_output)
    cnn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    cnn_model.fit(X_train_scaled.reshape(-1, X_train_scaled.shape[1], 1), y_train,
                  epochs=20, batch_size=32, verbose=0,
                  validation_split=0.2, callbacks=[early_stopping])

    return ff_model, cnn_model, scaler, X_test_scaled, y_test


all_cities_valid = True
for city in city_data:
    print(f"Processing {city}...")
    try:
        df = pd.read_csv(city_data[city]["file"])
        df, le, location_mapping = preprocess_data(df, city)
        city_data[city]["df"] = df
        city_data[city]["le"] = le
        city_data[city]["location_mapping"] = location_mapping
        print(f"{city} data loaded with {len(df)} rows.")

        X = df[['location', 'total_sqft', 'bath', 'balcony', 'bhk']]
        y = df['Price_Lakhs']

        print(f"Training models for {city}...")
        ff_model, cnn_model, scaler, X_test_scaled, y_test = train_models(X, y)
        city_data[city]["ff_model"] = ff_model
        city_data[city]["cnn_model"] = cnn_model
        city_data[city]["scaler"] = scaler
        city_data[city]["X_test_scaled"] = X_test_scaled
        city_data[city]["y_test"] = y_test
        print(f"{city} models trained successfully.")
    except FileNotFoundError:
        print(f"Dataset for {city} not found.")
        city_data[city]["df"] = None
        all_cities_valid = False
    except Exception as e:
        print(f"Error processing {city}: {str(e)}")
        city_data[city]["df"] = None
        all_cities_valid = False

if not all_cities_valid:
    print("Warning: Some cities failed to load. Interface may be limited to successful cities.")


def predict_models(city, location, sqft, bath, balcony, bhk):
    if city not in city_data or city_data[city]["df"] is None:
        return "Invalid City", "Invalid City"
    location_mapping = city_data[city]["location_mapping"]
    if location not in location_mapping:
        return "Invalid Location", "Invalid Location"
    loc_encoded = location_mapping[location]
    input_data = np.array([[loc_encoded, sqft, bath, balcony, bhk]])
    scaler = city_data[city]["scaler"]
    input_data = pd.DataFrame([[loc_encoded, sqft, bath, balcony, bhk]], columns=['location', 'total_sqft', 'bath', 'balcony', 'bhk'])
    input_scaled = scaler.transform(input_data)
    ff_model = city_data[city]["ff_model"]
    cnn_model = city_data[city]["cnn_model"]
    ff_price = ff_model.predict(input_scaled, verbose=0)[0][0]
    cnn_price = cnn_model.predict(input_scaled.reshape(-1, input_scaled.shape[1], 1), verbose=0)[0][0]
    return f"₹{ff_price:.2f} Lakhs", f"₹{cnn_price:.2f} Lakhs"


def check_accuracy(city):
    if city not in city_data or city_data[city]["df"] is None:
        return "Invalid City", "Invalid City"
    ff_model = city_data[city]["ff_model"]
    cnn_model = city_data[city]["cnn_model"]
    X_test_scaled = city_data[city]["X_test_scaled"]
    y_test = city_data[city]["y_test"]
    ff_pred = ff_model.predict(X_test_scaled, verbose=0)
    cnn_pred = cnn_model.predict(X_test_scaled.reshape(-1, X_test_scaled.shape[1], 1), verbose=0)
    ff_mae = mean_absolute_error(y_test, ff_pred)
    cnn_mae = mean_absolute_error(y_test, cnn_pred)
    return f"Feedforward Accuracy: {ff_mae:.2f}", f"CNN Accuracy: {cnn_mae:.2f}"


def update_locations(city):
    if city not in city_data or city_data[city]["df"] is None:
        return [""]
    locs = list(city_data[city]["location_mapping"].keys())
    return locs if locs else ["No locations available"]


with gr.Blocks(theme=gr.themes.Monochrome(), title="India House Price Prediction") as iface:
    iface.css = """
    <style>
        body, .gradio-container {
            background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
            min-height: 100vh;
            padding: 20px;
            font-family: 'Inter', sans-serif;
        }

        h1 {
            font-size: 2.5rem;
            font-weight: bold;
            margin-bottom: 0.5rem;
            text-align: center;
            color: #f6e05e;
        }

        h3 {
            font-size: 1.25rem;
            text-align: center;
            color: #e2e8f0;
            margin-bottom: 1.5rem;
        }

        .input-group {
            background-color: #2d3748;
            color: white;
            padding: 1.5rem;
            border-radius: 1rem;
            box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2);
            margin-bottom: 1.5rem;
        }

        .gr-button {
            background-color: #f6e05e;
            color: #1a202c;
            font-weight: 600;
            padding: 0.75rem 1.5rem;
            border-radius: 0.5rem;
            transition: all 0.2s ease-in-out;
        }

        .gr-button:hover {
            background-color: #ecc94b;
            transform: scale(1.05);
        }

        .gr-dropdown, .gr-slider, .gr-number, .gr-textbox {
            background-color: #4a5568;
            color: white;
            border: none;
            padding: 0.5rem;
            border-radius: 0.5rem;
            font-weight: 500;
        }

        .gr-slider .range-label, .gr-number input, .gr-dropdown select {
            color: white;
        }
    </style>
    """

    gr.Markdown("""
    <h1>🏡 India House Price Prediction</h1>
    <h3>Compare predictions from Feedforward and CNN models across cities</h3>
    """)


    with gr.Row():
        with gr.Column(scale=1, elem_classes=["input-group"]):
            city_input = gr.Dropdown(choices=list(city_data.keys()), label="City", value="Bengaluru")
            location_input = gr.Dropdown(choices=update_locations("Bengaluru"), label="Location")
            sqft_input = gr.Number(label="Total Square Feet", value=1000)
            bath_input = gr.Slider(1, 5, step=1, label="Bathrooms", value=2)
            balcony_input = gr.Slider(0, 3, step=1, label="Balcony", value=1)
            bhk_input = gr.Slider(1, 5, step=1, label="BHK", value=2)
            submit_btn = gr.Button("Predict", variant="primary")

        with gr.Column(scale=1, elem_classes=["input-group"]):
            ff_output = gr.Textbox(label="Feedforward Prediction", interactive=False)
            cnn_output = gr.Textbox(label="CNN Prediction", interactive=False)

    with gr.Row():
        with gr.Column(scale=1, elem_classes=["input-group"]):
            acc_btn = gr.Button("Check Accuracy", variant="secondary")
            ff_acc = gr.Textbox(label="Feedforward Accuracy", interactive=False)
            cnn_acc = gr.Textbox(label="CNN Accuracy", interactive=False)

    def update_location_dropdown(city):
        locs = update_locations(city)
        return gr.update(choices=locs, value=locs[0] if locs else None)

    city_input.change(fn=update_location_dropdown, inputs=city_input, outputs=location_input)

    submit_btn.click(fn=predict_models,
                     inputs=[city_input, location_input, sqft_input, bath_input, balcony_input, bhk_input],
                     outputs=[ff_output, cnn_output])
    acc_btn.click(fn=check_accuracy, inputs=[city_input], outputs=[ff_acc, cnn_acc])

iface.launch(share=True, inline=False, debug=True)

Processing Bengaluru...
Bengaluru data loaded with 13201 rows.
Training models for Bengaluru...
Bengaluru models trained successfully.
Processing Ahmedabad...
Ahmedabad data loaded with 15156 rows.
Training models for Ahmedabad...
Ahmedabad models trained successfully.
Processing Mumbai...
Mumbai data loaded with 71938 rows.
Training models for Mumbai...
Mumbai models trained successfully.
Processing Delhi...
Delhi data loaded with 7738 rows.
Training models for Delhi...
Delhi models trained successfully.
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://97b2a09f8af3631c66.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
