In [None]:
# =========================
# 1. Imports
# =========================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gradio as gr

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

# =========================
# 2. Load Dataset
# =========================
data = fetch_california_housing(as_frame=True)
df = data.frame

TARGET = "MedHouseVal"
FEATURES = df.columns.drop(TARGET)

X = df[FEATURES]
y = df[TARGET]

# =========================
# 3. Train / Test Split
# =========================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

# =========================
# 4. Baseline Linear Model
# =========================
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)
r2_lr = r2_score(y_test, y_pred_lr)
rmse_lr = np.sqrt(mean_squared_error(y_test, y_pred_lr))

# =========================
# 5. Random Forest Model
# =========================
rf = RandomForestRegressor(
    n_estimators=200,
    max_depth=20,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)
r2_rf = r2_score(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))

# =========================
# 6. Feature Importance (CLEANED)
# =========================
feature_importance = (
    pd.Series(rf.feature_importances_, index=FEATURES)
    .sort_values(ascending=False)
)

fi_df = feature_importance.reset_index()
fi_df.columns = ["feature", "importance"]

# =========================
# 7. Prediction Function
# =========================
def predict_price(*inputs):
    arr = np.array(inputs).reshape(1, -1)
    pred = rf.predict(arr)[0]
    return round(pred * 100000, 2)  # convert to USD

# =========================
# 8. Visualization Functions
# =========================
def price_distribution():
    fig, ax = plt.subplots()
    sns.histplot(df[TARGET] * 100000, bins=30, kde=True, ax=ax)
    ax.set_title("House Price Distribution")
    ax.set_xlabel("Price ($)")
    return fig

def correlation_heatmap():
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.heatmap(df.corr(), cmap="coolwarm", ax=ax)
    ax.set_title("Correlation Heatmap")
    return fig

def geo_price_map():
    fig, ax = plt.subplots(figsize=(8, 6))
    sc = ax.scatter(
        df["Longitude"],
        df["Latitude"],
        c=df[TARGET],
        cmap="viridis",
        s=10
    )
    plt.colorbar(sc, ax=ax, label="Median Price (√ó$100k)")
    ax.set_title("California Housing Prices (Geo)")
    ax.set_xlabel("Longitude")
    ax.set_ylabel("Latitude")
    return fig

def feature_importance_plot():
    fig, ax = plt.subplots()
    sns.barplot(data=fi_df, x="importance", y="feature", ax=ax)
    ax.set_title("Feature Importance (Random Forest)")
    return fig

def render_plot(choice):
    return {
        "Price Distribution": price_distribution,
        "Correlation Heatmap": correlation_heatmap,
        "Geo Price Map": geo_price_map,
        "Feature Importance": feature_importance_plot
    }[choice]()

# =========================
# 9. Gradio App
# =========================
with gr.Blocks(title="üè† House Price Prediction") as demo:
    gr.Markdown("""
    <div style="text-align:center">
        <h1>üè† California House Price Prediction</h1>
        <p>ML-powered price estimation with visual insights</p>
    </div>
    """)

    # ---- Prediction Tab ----
    with gr.Tab("üîÆ Predict Price"):
        inputs = [gr.Number(label=col) for col in FEATURES]
        output = gr.Number(label="Predicted Price ($)")
        gr.Button("Predict").click(
            fn=predict_price,
            inputs=inputs,
            outputs=output
        )

    # ---- Visual Insights Tab ----
    with gr.Tab("üìä Visual Insights"):
        chart = gr.Dropdown(
            choices=[
                "Price Distribution",
                "Correlation Heatmap",
                "Geo Price Map",
                "Feature Importance"
            ],
            value="Price Distribution",
            label="Select Visualization"
        )
        plot_output = gr.Plot()
        chart.change(render_plot, chart, plot_output)

    # ---- Model Metrics Tab ----
    with gr.Tab("üìà Model Metrics"):
        gr.Markdown(f"""
        **Linear Regression**
        - R¬≤: `{r2_lr:.3f}`
        - RMSE: `{rmse_lr:.2f}`

        **Random Forest**
        - R¬≤: `{r2_rf:.3f}`
        - RMSE: `{rmse_rf:.2f}`
        """)

# =========================
# 10. Launch
# =========================
if __name__ == "__main__":
    demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://533fc2420e19086e3d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
