In [9]:
import gradio as gr
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt

# Load the model
saved_model = joblib.load('xgb_model_log.pkl')

# Load the frequency-encoded dataset
dublin_aggregated_df = pd.read_csv('dublin_aggregated_df.csv')  # Must include 'neighbourhood', 'neighbourhood_freq', 'property_type', 'property_type_freq', and 'price'

# Prediction function
def predict_and_plot(
    host_id,
    host_response_rate,
    host_is_superhost,
    host_listings_count,
    accommodates,
    bathrooms,
    bedrooms,
    beds,
    avg_rating,
    number_of_reviews,
    neighbourhood,
    property_type
):
    try:
        # Frequency encode with fallback if value is not found
        neighbourhood_freq = dublin_aggregated_df[
            dublin_aggregated_df['neighbourhood'] == neighbourhood
        ]['neighbourhood_freq'].mean()

        property_type_freq = dublin_aggregated_df[
            dublin_aggregated_df['property_type'] == property_type
        ]['property_type_freq'].mean()

        if pd.isna(neighbourhood_freq):
            neighbourhood_freq = dublin_aggregated_df['neighbourhood_freq'].mean()
        if pd.isna(property_type_freq):
            property_type_freq = dublin_aggregated_df['property_type_freq'].mean()

        # Prepare input dataframe
        property_details = pd.DataFrame({
            'host_id': [int(host_id)],
            'host_response_rate': [host_response_rate],
            'host_is_superhost': [1 if host_is_superhost.lower() == 'yes' else 0],
            'host_listings_count': [host_listings_count],
            'accommodates': [accommodates],
            'bathrooms': [bathrooms],
            'bedrooms': [bedrooms],
            'beds': [beds],
            'avg_rating': [avg_rating],
            'number_of_reviews': [number_of_reviews],
            'neighbourhood_freq': [neighbourhood_freq],
            'property_type_freq': [property_type_freq]
        })

        # Predict log price and transform back
        log_price = saved_model.predict(property_details)
        predicted_price = np.exp(log_price[0])

        # Compare with averages (if price column exists)
        if 'price' in dublin_aggregated_df.columns:
            avg_neigh_price = dublin_aggregated_df[
                dublin_aggregated_df['neighbourhood'] == neighbourhood
            ]['price'].mean()

            avg_type_price = dublin_aggregated_df[
                dublin_aggregated_df['property_type'] == property_type
            ]['price'].mean()
        else:
            avg_neigh_price = np.nan
            avg_type_price = np.nan

        # Bar chart
        labels = ['Predicted Price']
        values = [predicted_price]
        colors = ['blue']

        if not np.isnan(avg_neigh_price):
            labels.append(f'Avg {neighbourhood}')
            values.append(avg_neigh_price)
            colors.append('green')

        if not np.isnan(avg_type_price):
            labels.append(f'Avg {property_type}')
            values.append(avg_type_price)
            colors.append('orange')

        fig, ax = plt.subplots()
        ax.bar(labels, values, color=colors)
        ax.set_ylabel('Price in GBP')
        ax.set_title('Predicted vs Average Prices')
        plt.xticks(rotation=15)

        return f"GBP {predicted_price:.2f}", fig

    except Exception as e:
        return f"Error: {str(e)}", None


# Gradio UI
interface = gr.Interface(
    fn=predict_and_plot,
    inputs=[
        gr.Textbox(label="Host ID"),
        gr.Number(label="Host Response Rate (e.g., 0.95)"),
        gr.Radio(choices=["yes", "no"], label="Is Superhost?"),
        gr.Number(label="Number of Host Listings", precision=0),
        gr.Number(label="Accommodates", precision=0),
        gr.Number(label="Bathrooms"),
        gr.Number(label="Bedrooms"),
        gr.Number(label="Beds"),
        gr.Number(label="Average Rating"),
        gr.Number(label="Number of Reviews", precision=0),
        gr.Textbox(label="Neighbourhood"),
        gr.Textbox(label="Property Type")
    ],
    outputs=[
        gr.Text(label="Predicted Price"),
        gr.Plot(label="Comparison Chart")
    ],
    title="Dublin Airbnb Price Estimator",
    description="This tool uses an XGBoost model to predict Airbnb prices in Dublin based on your inputs."
)

interface.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7b56b31eaf853bea72.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


