## 4.2.5 Extracting the Planned Speed Over Segments -Genova

In [None]:
# Data handling and manipulation
import numpy as np
import pandas as pd
import geopandas as gpd
import gtfs_functions as gtfs

# Database interaction
import psycopg2
import psycopg2.extras as extras
from psycopg2 import Error
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from sqlalchemy import create_engine

# Visualization and plotting
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import folium as fl
from IPython.display import display, HTML, clear_output


# Database connection
db_host = "localhost"
db_port = "5432"
db_user = "postgres"
db_pass = "bankmisr1420"
db_name = "genova"

def load_gtfs_feed(file_path, start_date, end_date):
    """
    Loads the GTFS data from a ZIP file and plots the segments.
    
    Parameters:
    - file_path: Path to the GTFS zip file
    - start_date: Start date of the feed (format: YYYY-MM-DD)
    - end_date: End date of the feed (format: YYYY-MM-DD)
    """
    # Load GTFS feed
    feed = gtfs.Feed(file_path, start_date=start_date, end_date=end_date)
    segments_gdf = feed.segments

    # Plot GTFS segments
    segments_gdf.plot()
    return segments_gdf

# Load and plot GTFS feed
gtfs_file = "/home/mahmoud/Desktop/MobilityDataScience/Book/Urban Mobility Chapter/GTFS_AMT_example.zip"
gtfs_segments = load_gtfs_feed(gtfs_file, '2024-03-01', '2024-03-31')

In [None]:
from sqlalchemy import create_engine

def save_to_postgis(gdf, table_name, db_host, db_port, db_user, db_pass, db_name):
    """
    Saves a GeoDataFrame to a PostgreSQL/PostGIS database.
    
    Parameters:
    - gdf: GeoDataFrame to save
    - table_name: Name of the table where data will be stored
    - db_host: Database host
    - db_port: Database port (default is 5432 for PostgreSQL)
    - db_user: Database user
    - db_pass: Database password
    - db_name: Name of the database
    """
    # Construct the connection URL using the provided parameters
    db_url = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
    
    # Create the SQLAlchemy engine using the constructed URL
    engine = create_engine(db_url)
    
    # Save the GeoDataFrame to PostGIS (PostgreSQL) using to_postgis()
    gdf.to_postgis(table_name, engine, if_exists='replace')
    
    print(f"Data saved to {table_name} table in the {db_name} database.")

save_to_postgis(gtfs_segments, "segments", db_host, db_port, db_user, db_pass, db_name)


In [None]:
import psycopg2
import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import dash_bootstrap_components as dbc

# Database connection settings for Genova
db_host = "localhost"
db_port = "5432"
db_user = "postgres"
db_pass = "bankmisr1420"
db_name = "genova"

# Fetch detailed segment speed data for Genova
def fetch_segment_speeds():
    """
    Fetches the detailed speed values per segment from the Genova PostGIS database.
    
    Returns:
    - DataFrame containing speed values and segment details
    """
    conn = psycopg2.connect(host=db_host, port=db_port, dbname=db_name, user=db_user, password=db_pass)
    query = """
    SELECT (s.distance_m / EXTRACT(EPOCH FROM (c.t_arrival - c.t_departure)) * 3.6) AS speedKMH,
           c.from_stop_id, c.from_stop_name, c.to_stop_id, c.to_stop_name
    FROM connections AS c
    JOIN segments AS s
    ON c.route_id=s.route_id 
       AND c.direction_id=s.direction_id 
       AND c.from_stop_id=s.start_stop_id 
       AND c.to_stop_id=s.end_stop_id
    WHERE date between '2024-03-01' and '2024-03-31'   
    AND EXTRACT(EPOCH FROM (c.t_arrival - c.t_departure)) > 0;
    """
    df = pd.read_sql_query(query, conn)
    conn.close()
    
    # Add a column that concatenates 'from_stop_name' and 'to_stop_name' for easy selection
    df['segment'] = df['from_stop_name'] + " -> " + df['to_stop_name']
    
    return df

# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Load data for Genova
speed_df = fetch_segment_speeds()

# Layout of the Dash app
app.layout = dbc.Container([
    html.H1("Genova Speed Distribution per Segment"),
    
    # Dropdown for selecting segment
    dcc.Dropdown(
        id="segment-dropdown",
        options=[{"label": seg, "value": seg} for seg in sorted(speed_df['segment'].unique())],
        value=speed_df['segment'].unique()[0],  # Default value
        clearable=False,
        style={"width": "80%"}
    ),
    
    # Graph to display speed distribution
    dcc.Graph(id="speed-distribution-plot")
])

# Callback to update the graph based on selected segment
@app.callback(
    Output("speed-distribution-plot", "figure"),
    Input("segment-dropdown", "value")
)
def update_graph(selected_segment):
    # Filter data for the selected segment
    segment_df = speed_df[speed_df['segment'] == selected_segment]
    
    # Create histogram for the speed distribution
    fig = px.histogram(segment_df, x="speedkmh", nbins=30, title=f"Speed Distribution for {selected_segment}",
                       labels={"speedkmh": "Speed (km/h)"})
    fig.update_layout(xaxis_title="Speed (km/h)", yaxis_title="Frequency")
    
    return fig

# Run the app
if __name__ == "__main__":
    app.run_server(debug=True, port=8060)
