# ***House Price Prediction and Geospatial Analysis***

In [1]:
pip install dash dash_bootstrap_components


Collecting dash
  Downloading dash-2.18.1-py3-none-any.whl.metadata (10 kB)
Collecting dash_bootstrap_components
  Downloading dash_bootstrap_components-1.6.0-py3-none-any.whl.metadata (5.2 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.1-py3-none-any.whl (7.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Downloading dash_table-5.0.0-py3-none-any.whl 

In [14]:

# Import necessary libraries
import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler

In [15]:
# Load the dataset
url = "https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv"
df = pd.read_csv(url)
df.dropna(inplace=True)  # Drop missing values

In [16]:
# Feature Engineering
df['rooms_per_household'] = df['total_rooms'] / df['households']
df['bedrooms_per_room'] = df['total_bedrooms'] / df['total_rooms']
df['population_per_household'] = df['population'] / df['households']

# Preprocess data and train the model
features = ['total_rooms', 'total_bedrooms', 'population', 'households', 'rooms_per_household', 'bedrooms_per_room', 'population_per_household']
X = df[features]
y = df['median_house_value']


In [17]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity,rooms_per_household,bedrooms_per_room,population_per_household
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY,6.984127,0.146591,2.555556
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY,6.238137,0.155797,2.109842
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY,8.288136,0.129516,2.80226
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY,5.817352,0.184458,2.547945
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY,6.281853,0.172096,2.181467


In [18]:
# Split the data and scale it
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [19]:
# Train a Random Forest model with hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20],
    'min_samples_split': [2, 5]
}
model = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=3)
model.fit(X_train_scaled, y_train)

In [20]:
# Function to predict house prices based on input features
def predict_price(total_rooms, total_bedrooms, population, households):
    input_df = pd.DataFrame([[total_rooms, total_bedrooms, population, households]], columns=features[:4])
    input_df['rooms_per_household'] = input_df['total_rooms'] / input_df['households']
    input_df['bedrooms_per_room'] = input_df['total_bedrooms'] / input_df['total_rooms']
    input_df['population_per_household'] = input_df['population'] / input_df['households']
    scaled_input = scaler.transform(input_df)
    predicted_price = model.predict(scaled_input)[0]
    return predicted_price

# Initialize Dash app with a Bootstrap theme
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.CYBORG])

# Card component styling to match the UI
card_style = {
    'padding': '20px',
    'borderRadius': '20px',
    'boxShadow': '0 8px 12px rgba(0, 0, 0, 0.15)',
    'backgroundColor': '#f0f4f7'
}

# Slider styling
slider_style = {
    'padding': '20px',
    'borderRadius': '15px',
    'boxShadow': '0 4px 8px rgba(0, 0, 0, 0.1)',
    'backgroundColor': '#ffffff'
}


In [21]:

# Create layout components
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H1("House Price Prediction Dashboard", className='text-center text-primary mb-4',
                        style={'fontFamily': 'Arial', 'fontWeight': 'bold', 'fontSize': '2.5em'}), width=12)
    ]),

    dbc.Row([
        # Column for feature sliders and prediction button
        dbc.Col([
            # Sliders wrapped in rounded cards with minimalistic UI
            dbc.Card([
                html.Label("Total Rooms", className='text-secondary', style={'fontSize': '1.2em'}),
                dcc.Slider(id='total-rooms-slider', min=1, max=10000, step=100, value=2000,
                           marks={i: str(i) for i in range(0, 10001, 2000)},
                           tooltip={'placement': 'bottom', 'always_visible': True})
            ], style=slider_style),

            dbc.Card([
                html.Label("Total Bedrooms", className='text-secondary', style={'fontSize': '1.2em'}),
                dcc.Slider(id='total-bedrooms-slider', min=1, max=2000, step=50, value=500,
                           marks={i: str(i) for i in range(0, 2001, 500)},
                           tooltip={'placement': 'bottom', 'always_visible': True})
            ], style=slider_style),

            dbc.Card([
                html.Label("Population", className='text-secondary', style={'fontSize': '1.2em'}),
                dcc.Slider(id='population-slider', min=1, max=10000, step=100, value=3000,
                           marks={i: str(i) for i in range(0, 10001, 2000)},
                           tooltip={'placement': 'bottom', 'always_visible': True})
            ], style=slider_style),

            dbc.Card([
                html.Label("Households", className='text-secondary', style={'fontSize': '1.2em'}),
                dcc.Slider(id='households-slider', min=1, max=2000, step=50, value=500,
                           marks={i: str(i) for i in range(0, 2001, 500)},
                           tooltip={'placement': 'bottom', 'always_visible': True})
            ], style=slider_style),

            # Prediction button with a sleek style
            dbc.Button("Predict House Price", id='update-button', color='primary', size='lg', className='mt-4',
                       style={'borderRadius': '25px', 'backgroundColor': '#007bff', 'fontSize': '1.2em'}),
            html.Div(id='prediction-output', className='mt-4 text-primary', style={'fontSize': '28px'})
        ], width=4, className='mb-4'),

        # Column for main map visualization
        dbc.Col([
            dbc.Card([
                dcc.Graph(id='map-graph', className='mt-4')
            ], style=card_style)
        ], width=8)
    ]),

    dbc.Row([
        # Bottom row for additional charts
        dbc.Col([
            dbc.Card([
                dcc.Graph(id='price-distribution', className='mt-4')
            ], style=card_style)
        ], width=6),
        dbc.Col([
            dbc.Card([
                dcc.Graph(id='price-trend', className='mt-4')
            ], style=card_style)
        ], width=6)
    ])
], fluid=True, className='bg-light')


# Define callback to update the house price prediction and visualizations
@app.callback(
    [Output('prediction-output', 'children'),
     Output('map-graph', 'figure'),
     Output('price-distribution', 'figure'),
     Output('price-trend', 'figure')],
    [Input('total-rooms-slider', 'value'),
     Input('total-bedrooms-slider', 'value'),
     Input('population-slider', 'value'),
     Input('households-slider', 'value')]
)
def update_dashboard(total_rooms, total_bedrooms, population, households):
    # Update house price prediction
    predicted_price = predict_price(total_rooms, total_bedrooms, population, households)
    prediction_text = f"Predicted House Price: ${predicted_price:,.2f}"

    # Create map figure using Plotly Express with a polished, modern style
    map_fig = px.scatter_mapbox(df, lat="latitude", lon="longitude", color="median_house_value",
                                size="total_rooms", zoom=5, height=500, title="House Prices Distribution",
                                mapbox_style="carto-positron")

    # Create price distribution figure
    price_distribution_fig = px.histogram(df, x="median_house_value", nbins=50, title="Price Distribution",
                                          color_discrete_sequence=["#007bff"])

    # Create price trend figure
    price_trend_fig = px.scatter(df, x="total_rooms", y="median_house_value", trendline="ols",
                                 title="Price Trend by Total Rooms", color_discrete_sequence=["#6c757d"])

    return prediction_text, map_fig, price_distribution_fig, price_trend_fig


# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)

<IPython.core.display.Javascript object>

In [13]:
# Create map figure using Plotly Express
map_fig = px.scatter_mapbox(df, lat="latitude", lon="longitude", color="median_house_value",
                            size="total_rooms", zoom=5, height=500,
                            mapbox_style="carto-positron")

map_fig


# **HAPPY COADING**