Libraries

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
from geopandas import GeoDataFrame
import plotly.express as px
import plotly.graph_objects as go
import dash
from dash import dcc, html, Input, Output, State, ctx
import dash_bootstrap_components as dbc
import os
import joblib
import sklearn
import openrouteservice
from openrouteservice import convert


Data

In [None]:
# --- Configuration ---
# File paths for data 
# The Analyzed folder must be downloaded before running. See README for more information
CSV_FILE_PATH = ("Data/Analyzed/TripData_2011.csv", "Data/Analyzed/TripData_2012.csv", "Data/Analyzed/TripData_2013.csv", "Data/Analyzed/TripData_2014.csv",
                "Data/Analyzed/TripData_2015.csv", "Data/Analyzed/TripData_2016.csv", "Data/Analyzed/TripData_2017.csv", "Data/Analyzed/TripData_2018.csv",
                "Data/Analyzed/TripData_2019.csv", "Data/Analyzed/TripData_2020.csv", "Data/Analyzed/TripData_2021.csv", "Data/Analyzed/TripData_2022.csv",
                "Data/Analyzed/TripData_2023.csv", "Data/Analyzed/TripData_2024.csv")
SHAPEFILE_PATH = "taxi_zones/taxi_zones.shp"

Code

In [None]:
# Initialize client with ORS API key
# The MLP model will not operate without an active key. Visit https://api.openrouteservice.org to 
# sign up and get a free API key
ors_status_msg = "✅ ORS Client successfully initialized"

# Insert key here
ors_key = "YOUR KEY HERE"
try:
    client = openrouteservice.Client(key=ors_key)
    # Test to ensure that API key is valid
    coords = [[8.681495, 49.41461], [8.687872, 49.420318]]
    client.directions(coordinates=coords, profile="driving-car")
    ors_client = client
except Exception as e:
    ors_status_msg = f"❌ ORS Client failed to initialize: {str(e)}. Please enter valid ORS key before proceeding"

print(ors_status_msg)

In [None]:
# --- Load in MLP Models ---

# Load in MLP models for the scatter map

mlp_models = {
    # Fare models
    "mlp_fare_2011": joblib.load("Models/model_2011_fare.pkl"),
    "mlp_fare_2012": joblib.load("Models/model_2012_fare.pkl"),
    "mlp_fare_2013": joblib.load("Models/model_2013_fare.pkl"),
    "mlp_fare_2014": joblib.load("Models/model_2014_fare.pkl"),
    "mlp_fare_2015": joblib.load("Models/model_2015_fare.pkl"),
    "mlp_fare_2016": joblib.load("Models/model_2016_fare.pkl"),
    "mlp_fare_2017": joblib.load("Models/model_2017_fare.pkl"),
    "mlp_fare_2018": joblib.load("Models/model_2018_fare.pkl"),
    "mlp_fare_2019": joblib.load("Models/model_2019_fare.pkl"),
    "mlp_fare_2020": joblib.load("Models/model_2020_fare.pkl"),
    "mlp_fare_2021": joblib.load("Models/model_2021_fare.pkl"),
    "mlp_fare_2022": joblib.load("Models/model_2022_fare.pkl"),
    "mlp_fare_2023": joblib.load("Models/model_2023_fare.pkl"),
    "mlp_fare_2024": joblib.load("Models/model_2024_fare.pkl"),

    # Time models
    "mlp_time_2011": joblib.load("Models/model_2011_time.pkl"),
    "mlp_time_2013": joblib.load("Models/model_2013_time.pkl"),
    "mlp_time_2014": joblib.load("Models/model_2014_time.pkl"),
    "mlp_time_2015": joblib.load("Models/model_2015_time.pkl"),
    "mlp_time_2016": joblib.load("Models/model_2016_time.pkl"),
    "mlp_time_2017": joblib.load("Models/model_2017_time.pkl"),
    "mlp_time_2018": joblib.load("Models/model_2018_time.pkl"),
    "mlp_time_2019": joblib.load("Models/model_2019_time.pkl"),
    "mlp_time_2020": joblib.load("Models/model_2020_time.pkl"),
    "mlp_time_2021": joblib.load("Models/model_2021_time.pkl"),
    "mlp_time_2022": joblib.load("Models/model_2022_time.pkl"),
    "mlp_time_2023": joblib.load("Models/model_2023_time.pkl"),
    "mlp_time_2024": joblib.load("Models/model_2024_time.pkl")
}



In [None]:
# --- Data Loading and Preprocessing ---
def load_and_prepare_data(csv_path, shp_path):
    """
    Loads taxi trip data and shapefiles, merges them, calculates zone statistics,
    and prepares data for scatter plot.

    Args:
        csv_path (str): Path to the taxi trip data CSV file.
        shp_path (str): Path to the taxi zones shapefile.

    Returns:
        tuple: A tuple containing:
            - geopandas.GeoDataFrame: GeoDataFrame with aggregated statistics per zone per time zone (for map/bars).
            - pd.DataFrame: DataFrame containing relevant columns from original trips for scatter plot.
            - list: A list of unique time zones found in the data.
            Returns (None, None, None) if file loading fails.
    """
    stats_gdf = GeoDataFrame()
    scatter_df = pd.DataFrame()
    zone_gdf = GeoDataFrame()

    for path in csv_path:
        stats_gdf_prior = stats_gdf
        scatter_df_prior = scatter_df
        zone_gdf_prior = zone_gdf
        
        # --- Load Trip Data ---
        print(f"Loading trip data from: {path}")
        # Load columns needed for ALL visualizations 
        required_cols = [
            'PULocationID', 'Trip_Time', 'Time Zone',
            'trip_distance', 'fare_amount', 'cluster' # Added for scatter plot
        ]
        df_2024 = pd.read_csv(path, usecols=required_cols)
        print("Unique pickup locations: ", len(df_2024['PULocationID'].unique()))

        # --- Basic Cleaning (Original Data) ---
        # Drop rows with NA in essential columns needed for filtering or plotting
        essential_cols_for_scatter = ['PULocationID', 'Trip_Time', 'Time Zone', 'trip_distance', 'fare_amount', 'cluster']
        df_2024 = df_2024.dropna(subset=essential_cols_for_scatter)
        print(f"Shape after dropping NA in essential columns: {df_2024.shape}")

        # Ensure cluster is treated as categorical for coloring
        # Convert cluster to string if it's not already, to ensure discrete colors
        if not pd.api.types.is_string_dtype(df_2024['cluster']):
            print("Converting 'cluster' column to string type for discrete coloring.")
            df_2024['cluster'] = df_2024['cluster'].astype(str)


        # Get unique time zones before aggregation
        unique_time_zones = sorted(df_2024['Time Zone'].unique().tolist())
        print(f"Unique Time Zones found: {unique_time_zones}")

        # --- Load Shapefile ---
        print(f"Loading shapefile from: {shp_path}")
        gdf = gpd.read_file(shp_path)
        gdf = gdf.to_crs('EPSG:4326') # Use standard EPSG code for WGS84 lat/lon
        gdf = gdf[['LocationID', 'zone', 'borough', 'geometry']] # Keep geometry for aggregated stats

        # --- Merge Trip Data with Geometry/Zone Info ---
        print("Merging trip data and shapefile...")
        # Merge to add borough and zone name to each trip record
        # Use inner merge to only keep trips starting in valid zones defined in the shapefile
        merged_df = gdf.merge(df_2024, left_on='LocationID', right_on='PULocationID', how='inner')
        print(f"Merge complete. Shape after merge: {merged_df.shape}")
        # Drop redundant/unneeded columns after merge
        merged_df = merged_df.drop(columns=['PULocationID', 'LocationID']) # Keep zone, borough, geometry

        # --- Get latitude and longitude for every zone for multi-layer perceptron models

        # Load in WGS84 for later mapping
        zone_gdf = gpd.read_file("taxi_zones/taxi_zones.shp")

        # Step 1: Reproject to a projected CRS for accurate geometry operations
        projected = zone_gdf.to_crs(epsg=2263)  # NY State Plane (feet)

        # Step 2: Calculate centroids in projected CRS
        projected['centroid'] = projected.geometry.centroid

        # Step 3: Convert centroids back to lat/lon (WGS84)
        centroids = projected.set_geometry('centroid').to_crs(epsg=4326)

        # Step 4: Add lat/lon to original zone_gdf
        zone_gdf['lat'] = centroids.geometry.y
        zone_gdf['lon'] = centroids.geometry.x

        # --- Prepare Data for Scatter Plot ---
        # Select only the columns needed for filtering and the scatter plot itself
        # Keep 'borough' and 'Time Zone' for filtering in the callback
        scatter_cols = ['trip_distance', 'Trip_Time', 'fare_amount', 'cluster', 'borough', 'Time Zone']
        # Create scatter_df from the merged data 
        scatter_df = merged_df[scatter_cols].copy()
        print(f"Scatter plot data prepared. Shape: {scatter_df.shape}")


        # --- Prepare Aggregated Data for Map/Bar Chart ---
        print("Calculating aggregated statistics per zone and time zone...")
        # Group the merged data by zone, borough, LocationID, geometry AND Time Zone
        # Use the 'merged_df' which already has geometry associated
        zone_time_stats = merged_df.groupby(['zone', 'borough', 'geometry', 'Time Zone'])['Trip_Time'].agg(['mean', 'count', 'median']).reset_index()
        print(f"Zone-Time stats calculated. Shape: {zone_time_stats.shape}")

        # Create the final GeoDataFrame directly from aggregated stats
        stats_gdf = GeoDataFrame(zone_time_stats, geometry='geometry', crs='EPSG:4326')
        print("Aggregated GeoDataFrame created.")

        scatter_df = pd.concat([scatter_df_prior, scatter_df])
        stats_gdf = pd.concat([stats_gdf_prior, stats_gdf])
        zone_gdf = pd.concat([zone_gdf_prior, zone_gdf])

    # Return aggregated stats, scatter data, and time zones 
    return stats_gdf, scatter_df, unique_time_zones, zone_gdf

# --- Load Data ---
zone_time_stats_gdf, scatter_data_df, time_zones, zone_gdf = load_and_prepare_data(CSV_FILE_PATH, SHAPEFILE_PATH)

# Check if data loading was successful
if zone_time_stats_gdf is None or scatter_data_df is None:
    print("Exiting: Data loading failed.")
    exit() 

# Get unique boroughs for the filter dropdown, including an 'All' option
all_boroughs = ['All'] + sorted(zone_time_stats_gdf['borough'].unique().tolist())

# Prepare time zone options for dropdown
all_time_zones = ['All'] + time_zones

# Dropdown options for years 2011-2024
year_options = [{'label': str(year), 'value': year} for year in range(2011, 2025)]


# --- Dash App Initialization ---
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server # Expose server for deployment

# --- App Layout ---
app.layout = dbc.Container([
    dbc.Row(
        dbc.Col(html.H1("NYC Taxi Trip Analysis (2024)", className="text-center my-4"), width=12)
    ),
    dbc.Row([
        # Left Column: Map and Filters
        dbc.Col([
            dbc.Alert("Data limited to January 2024.", color="#e3e3e3", className="mb-4"),
            dbc.Card([
                dbc.CardHeader("Filters"),
                dbc.CardBody([
                    html.Label("Select Borough:"),
                    dcc.Dropdown(
                        id='borough-filter',
                        options=[{'label': b, 'value': b} for b in all_boroughs],
                        value='All', clearable=False, className="mb-3"
                    ),
                    html.Label("Select Time Zone:"),
                    dcc.Dropdown(
                        id='time-zone-filter',
                        options=[{'label': tz, 'value': tz} for tz in all_time_zones],
                        value='All', clearable=False, className="mb-3"
                    ),
                ]),
            ], className="mb-4"),
            dbc.Card([
                dbc.CardHeader("Average Trip Time by Zone"),
                dbc.CardBody([
                    dcc.Graph(id='choropleth-map', config={'displayModeBar': False})
                ])
            ]),
            dbc.Card([
                dbc.CardHeader("Multi Layer Perceptron Visualization for Estimated Fare per Mile and Estimated Trip Distance"),
                dbc.CardBody([
                    # First row: status + pickup/dropoff summary
                    dbc.Row([
                        dbc.Col(html.Div(id="ors-status", children=html.Div(ors_status_msg)), width=4),
                        dbc.Col(html.Div(id="pickup-output"), width=4),
                        dbc.Col(html.Div(id="dropoff-output"), width=4),
                    ], className="mb-3"),
                    # Dropdowns: year and time zone
                    dbc.Row([
                        dbc.Col([
                            html.Label("Select Year:"),
                            dcc.Dropdown(id='year-filter', options=year_options, value=2024)
                        ], width=6),
                        dbc.Col([
                            html.Label("Select Time Zone:"),
                            dcc.Dropdown(id='time-zone-mlp-filter', options=time_zones, value='Rush Hour')
                        ], width=6),
                    ], className="mb-3"),
                    # Reset button
                    dbc.Row([
                        dbc.Col(html.Button("Reset", id="reset-btn", n_clicks=0, className="btn btn-secondary"), width="auto")
                    ], className="mb-4"),
                    dcc.Graph(id='mlp-map', config={'displayModeBar': False}),
                    dcc.Store(id="selection-store", data=[]),
                    ])
            ], className = "mb-4")
        ], md=6),

        # Right Column: Charts
        dbc.Col([
            # 3D plot
            dbc.Card([
                dbc.CardHeader("Trip Cluster Visualization (Sample)"), 
                dbc.CardBody([
                    dcc.Graph(id='scatter-3d-plot', config={'displayModeBar': True})
                ])
            ], className="mb-4"),

            # Bar Chart 
            dbc.Card([
                dbc.CardHeader("Number of Trips by Borough"),
                dbc.CardBody([
                    dcc.Graph(id='bar-chart-trip-count', config={'displayModeBar': False})
                ])
            ]),
        ], md=6),
    ])
], fluid=True)

# --- Callbacks ---
@app.callback(
    [Output('choropleth-map', 'figure'),
     Output('scatter-3d-plot', 'figure'), 
     Output('bar-chart-trip-count', 'figure')],
    [Input('borough-filter', 'value'),
     Input('time-zone-filter', 'value')]
)

def update_visualizations(selected_borough, selected_time_zone):
    """
    Updates the choropleth map, 3D scatter plot, and trip count bar chart
    based on the selected borough and time zone.

    Args:
        selected_borough (str): The borough selected in the dropdown filter.
        selected_time_zone (str): The time zone selected in the dropdown filter.

    Returns:
        tuple: A tuple containing the updated figures for the map, scatter plot, and count bar chart.
    """
    # --- Filter Data ---
    # Filter aggregated data for map
    filtered_agg_gdf = zone_time_stats_gdf.copy()
    # Filter unaggregated data for scatter plot
    filtered_scatter_df = scatter_data_df.copy()

    if selected_borough != 'All':
        filtered_agg_gdf = filtered_agg_gdf[filtered_agg_gdf['borough'] == selected_borough]
        # Filter scatter data based on borough
        filtered_scatter_df = filtered_scatter_df[filtered_scatter_df['borough'] == selected_borough]

    if selected_time_zone != 'All':
        filtered_agg_gdf = filtered_agg_gdf[filtered_agg_gdf['Time Zone'] == selected_time_zone]
        # Filter scatter data based on time zone
        filtered_scatter_df = filtered_scatter_df[filtered_scatter_df['Time Zone'] == selected_time_zone]

    # --- Prepare Figures ---
    # Define layout for empty figures
    empty_layout = go.Layout(title='No data for selected filters', xaxis={'visible': False}, yaxis={'visible': False}, paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)')
    empty_fig = go.Figure(layout=empty_layout)


    # --- Create Choropleth Map ---
    # Aggregate results after filtering by Time Zone for the map
    if filtered_agg_gdf.empty:
         map_data_gdf = GeoDataFrame(columns=['zone', 'borough', 'geometry', 'total_count', 'weighted_mean_sum', 'median_time', 'final_mean_time'], geometry='geometry', crs='EPSG:4326')
    else:
        filtered_agg_gdf['weighted_mean_time'] = filtered_agg_gdf['mean'] * filtered_agg_gdf['count']
        map_data_agg = filtered_agg_gdf.groupby(['zone', 'borough', 'geometry']).agg(
            total_count=('count', 'sum'),
            weighted_mean_sum=('weighted_mean_time', 'sum'),
            median_time=('median', 'first')
        ).reset_index()
        map_data_agg['final_mean_time'] = map_data_agg['weighted_mean_sum'] / map_data_agg['total_count']
        map_data_agg['final_mean_time'] = map_data_agg['final_mean_time'].fillna(0)
        map_data_agg['median_time'] = map_data_agg['median_time'].fillna(0)
        map_data_gdf = GeoDataFrame(map_data_agg, geometry='geometry', crs='EPSG:4326')

    # Generate map figure or empty figure
    if map_data_gdf.empty or map_data_gdf['total_count'].sum() == 0:
        map_fig = go.Figure(layout=empty_layout.update(title='No map data for selected filters'))
    else:
        center_point = map_data_gdf.geometry.union_all().centroid
        map_fig = px.choropleth_map(
            map_data_gdf,
            geojson=map_data_gdf.__geo_interface__,
            color="final_mean_time", locations="zone", featureidkey="properties.zone",
            center={"lat": center_point.y, "lon": center_point.x},
            zoom=9.8 if selected_borough == 'All' else 10.35,
            color_continuous_scale="Viridis",
            range_color=[0, zone_time_stats_gdf['mean'].quantile(0.95) if not zone_time_stats_gdf.empty else 1],
            labels={"final_mean_time": "Avg. Trip Time (min)", "total_count": "Number of Trips", "median_time": "Median Trip Time (min)", "zone": "Zone", "borough": "Borough"},
            custom_data=['zone', 'borough', 'total_count', 'median_time']
        )
        map_fig.update_traces(
            hovertemplate="<b>Zone: %{customdata[0]}</b><br>Borough: %{customdata[1]}<br>Avg. Trip Time: %{z:.1f} min<br>Number of Trips: %{customdata[2]}<br>Median Trip Time: %{customdata[3]:.1f} min<extra></extra>",
        )
        map_fig.update_layout(
            coloraxis_colorbar=dict(title="Avg. Time<br>(min)"),
            margin={"r":0,"t":0,"l":0,"b":0},
        )
        map_fig.update_geos(visible=False, projection_type="mercator")

    # --- Create 3D Scatter Plot ---
    # Generate scatter plot figure 
    if filtered_scatter_df.empty:
        scatter_fig = go.Figure(layout=empty_layout.update(title='No scatter data for selected filters'))
    else:
        # Take a random sample (up to 1000 points) from the filtered data
        sample_size = min(1000, len(filtered_scatter_df))
        scatter_sample_df = filtered_scatter_df.sample(n=sample_size, random_state=42) # Use random_state for reproducibility
        scatter_sample_df['cluster'] = ["abnormal" if int(x) == -1 else "normal" for x in scatter_sample_df['cluster']]
        scatter_fig = px.scatter_3d(
            scatter_sample_df,
            x='trip_distance',
            y='Trip_Time',
            z='fare_amount',
            color='cluster', # Color points by cluster ID
            labels={ # More descriptive labels
                'trip_distance': 'Trip Distance (miles)',
                'Trip_Time': 'Trip Time (min)',
                'fare_amount': 'Fare Amount ($)',
                'cluster': 'Cluster ID'
            },
            title=f"Trip Characteristics by Cluster (Sample: {sample_size} trips)",
            # Add hover data
            hover_data=['borough', 'Time Zone']
        )
        scatter_fig.update_layout(
             margin=dict(l=0, r=0, b=0, t=40), # Adjust top margin for title
             legend_title_text='Cluster', # Explicitly set legend title
             # Default eye position is often around x=1.25, y=1.25, z=1.25
             scene_camera = dict(
                  up=dict(x=0, y=0, z=1),          # Sets the 'up' direction (usually z-axis)
                  center=dict(x=0, y=0, z=-0.5),      # Sets the point the camera looks at
                  eye=dict(x=1.5, y=1.5, z=0.5)    # Sets the camera position (x, y, z)
              )
        )
        # Make markers smaller
        scatter_fig.update_traces(marker=dict(size=3.5))


    # --- Create Bar Chart (Trip Count) ---
    # Calculate borough stats based on the filtered *aggregated* data
    if filtered_agg_gdf.empty:
         borough_stats = pd.DataFrame(columns=['borough', 'count', 'mean']) # Empty DataFrame
    else:
        # Use the already calculated 'weighted_mean_time' and 'count' from filtered_agg_gdf
        # Need to re-aggregate filtered_agg_gdf by borough for the bar chart
        borough_agg = filtered_agg_gdf.groupby('borough').agg(
            total_count = ('count', 'sum'),
            weighted_mean_sum = ('weighted_mean_time', 'sum')
        ).reset_index()

        # Calculate weighted average time per borough
        borough_agg['mean'] = borough_agg['weighted_mean_sum'] / borough_agg['total_count']
        borough_agg['mean'] = borough_agg['mean'].fillna(0)

        borough_stats = borough_agg[['borough', 'total_count', 'mean']].rename(columns={'total_count': 'count'})


    # Generate count bar chart or empty figure
    if borough_stats.empty or borough_stats['count'].sum() == 0:
         bar_count_fig = go.Figure(layout=empty_layout.update(title='No count data for selected filters'))
    else:
        bar_count_fig = px.bar(
            borough_stats,
            x='borough', y='count',
            labels={'count': 'Number of Trips', 'borough': 'Borough'},
            color='borough', text='count'
        )
        bar_count_fig.update_traces(texttemplate='%{text:,}', textposition='outside')
        bar_count_fig.update_layout(
            xaxis_title=None, yaxis_title="Number of Trips",
            showlegend=False, margin=dict(t=20, b=0, l=0, r=0)
        )
        bar_count_fig.update_traces(
            hovertemplate="<b>Borough: %{x}</b><br>Number of Trips: %{y:,}<extra></extra>"
        )

    # *** MODIFICATION: Return map, scatter, and count figures ***
    return map_fig, scatter_fig, bar_count_fig

# Handle selection of pick-up/drop-off points and reset logic for mlp_map
@app.callback(
    Output("selection-store", "data"),
    Input("mlp-map", "clickData"),
    Input("reset-btn", "n_clicks"),
    Input("time-zone-mlp-filter", "value"),
    Input("year-filter", "value"),
    State("selection-store", "data"),
    prevent_initial_call=True
)
def store_selection(clickData, reset_clicks, time_zone_clicks, year_clicks, current_selections):
    trigger = ctx.triggered_id

    # All selected points should be reset when any of these parameters are triggered/clicked
    if trigger in ["reset-btn", 'time-zone-mlp-filter', "year-filter"]:
        return []

    # If a point on the mlp map has been clicked
    if trigger == "mlp-map" and clickData and "points" in clickData:
        point = clickData["points"][0]
        loc_id = point["hovertext"]

        # Get selected point and store it as either 1)pick-up or 2)drop-off
        if loc_id not in current_selections:
            return current_selections + [loc_id]
        else:
            return current_selections

    return dash.no_update

# Update map and display selected points
@app.callback(
    Output("mlp-map", "figure"),
    Output("pickup-output", "children"),
    Output("dropoff-output", "children"),
    Output("ors-status", "children"),
    Input("selection-store", "data"),
    Input("year-filter", "value"),
    Input("time-zone-mlp-filter", "value")

)
def update_map_and_output(selections, year_selection, shift):

    # Verify that points are actually populated
    pickup = selections[0] if len(selections) > 0 else None
    dropoff = selections[1] if len(selections) > 1 else None

    # --- Create Scatter Map ---

    # Create the figure using px.scatter_map
    scatter_map_fig = px.scatter_map(
        zone_gdf,
        lat="lat",
        lon="lon",
        hover_name="zone",
        hover_data=["LocationID", "borough"],
        zoom=9,
        height=700
    )

    scatter_map_fig.update_layout(
        mapbox_style="carto-positron",
        margin={"r":0,"t":40,"l":0,"b":0}
    )

    # If both points are selected, draw a line between them and call MLP algorithm
    if pickup and dropoff:

        # Get pickup and dropoff zones
        pickup_row = zone_gdf[zone_gdf["zone"] == pickup].iloc[0]
        dropoff_row = zone_gdf[zone_gdf["zone"] == dropoff].iloc[0]

        # Coordinates must be in (lon, lat) format
        pickup_coords = (pickup_row["lon"], pickup_row["lat"])
        dropoff_coords = (dropoff_row["lon"], dropoff_row["lat"])

        try:
            # Call ORS directions API
            route = ors_client.directions(
                coordinates=[pickup_coords, dropoff_coords],
                profile='driving-car',
                format='geojson'
            )
        except Exception as e:
            return scatter_map_fig, f"🟦 Pickup: {pickup}", f"🟩 Dropoff: {dropoff}", html.Div(
                "⚠️ Routing error: ORS is unavailable, please select points that can be routed to eachother by taxi",
                style={"color": "red", "fontWeight": "bold"}
            )
        # Extract distance between points (in meters) and convert to miles
        distance_meters = route["features"][0]["properties"]["segments"][0]["distance"]
        distance_miles = distance_meters * 0.000621371

        # Convert from int to string and vice-versa
        shift_int = 0
        if shift == 'Rush Hour':
            shift_int = 1
        elif shift == 'Mid Day':
            shift_int = 2
        elif shift == 'Night Shift':
            shift_int = 3
        else:
            shift_int = 0

        # Supply LocationId for 1) pickup and 2) dropoff, the selected shift, and distance between points
        # Calculate estimated fare and time
        model_input = [[pickup_row["LocationID"], dropoff_row["LocationID"], shift_int, distance_miles]]
        pred_fare = float(np.exp(mlp_models[f"mlp_fare_{year_selection}"].predict(model_input)))
        pred_time = float(np.exp(mlp_models[f"mlp_time_{year_selection}"].predict(model_input)))

        # Display trip details for user
        hover_text = (
            f"<b>🚖 Trip Info</b><br>"
            f"📍 From: {pickup}<br>"
            f"➡️ To: {dropoff}<br>"
            f"🕐 Shift: {shift}<br>"
            f"📏 Distance: {distance_miles:.2f} mile(s)<br>"
            f"💵 Fare per Mile: ${pred_fare:.2f}<br>"
            f"🕒 Time: {pred_time:.1f} min"
        )

        # Draw line between points
        scatter_map_fig.add_trace(go.Scattermap(
            mode="lines+markers",
            lat=[pickup_row["lat"], dropoff_row["lat"]],
            lon=[pickup_row["lon"], dropoff_row["lon"]],
            marker=dict(size=12, color="green"),
            line=dict(width=3, color="green"),
            hoverinfo="text",
            hovertext=hover_text,
            showlegend=False,
            name="Trip Route"
        ))

    scatter_map_fig.update_layout(clickmode='event+select')
    
    pickup_msg = f"🟦 Pickup: {pickup}" if pickup else f"🟦 Click on map to select pickup"
    dropoff_msg = f"🟩 Dropoff: {dropoff}" if dropoff else f"🟩 Click on map to select dropoff"
    return scatter_map_fig, pickup_msg, dropoff_msg, ors_status_msg

# --- Run the App ---
if __name__ == '__main__':
    if 'app' in locals() and isinstance(app, dash.Dash) and zone_time_stats_gdf is not None and scatter_data_df is not None:
        print("Starting Dash server...")
        print("Access the dashboard in your web browser at http://127.0.0.1:8053/")
        app.run(port=8053, debug=True)
    else:
        print("Dash server not started. Check console for data loading errors.")