In [None]:
from pathlib import Path

import os
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import branca.colormap as cm
from folium.features import GeoJsonTooltip

import geopandas as gpd
import datetime as dt

import osmnx as ox
from shapely import Point
import folium

# Questions to Explore

## Understand Land use relative to transport
- Where do trips start and end?
- For each hour of the day, which streets are destinations, which are origins. Separate weekday/weekend?
## Reduce Congestion
- Where are the congested zones? What time are they congested? Plot routes which pass through congested zones. Can they be diverted?
- Do the different weight classes have similar congestion zones?
## Reduce car use 
- Compare each trip to the shortest distance. How many trips are far less efficient than the shortest path?
- Which trips could have been replaced by public transport
- Which trips could have been replaced by cycling?

In [None]:
df_trips = pd.read_parquet("../data/processed/all_trips.parquet")

df_trips.StartDate = pd.to_datetime(df_trips.StartDate)
df_trips.EndDate = pd.to_datetime(df_trips.EndDate)

df_trips["StartHour"] = df_trips.StartDate.dt.hour
df_trips["EndHour"] = df_trips.EndDate.dt.hour
df_trips["Date"] = df_trips.StartDate.dt.date

def classify_day(hour):
        return 'weekend' if hour in [1, 7] else 'weekday'

# Apply the function to StartWDay and EndWDay columns to classify the days
df_trips['StartDayType'] = df_trips['StartWDay'].apply(classify_day)
df_trips['EndDayType'] = df_trips['EndWDay'].apply(classify_day)

# Merge the two classifications into one, considering if the trip starts and ends on the same day
df_trips['DayType'] = df_trips.apply(lambda row: 'weekend' if row['StartDayType'] == 'weekend' and row['EndDayType'] == 'weekend' else 'weekday', axis=1)

# Trip starts and ends within the city

In [None]:
siegburg = ox.geocode_to_gdf("Siegburg")
siegburg

m = folium.Map(location=(float(siegburg.lat), float(siegburg.lon)), zoom_level=17)

# boundary
s_boundary = folium.GeoJson(data=siegburg.to_json())
s_boundary.add_to(m)

# centroid
s_centroid = folium.GeoJson(data=Point(0.5*(siegburg["bbox_east"] - siegburg["bbox_west"]) + siegburg["bbox_west"], 0.5*(siegburg["bbox_north"] - siegburg["bbox_south"]) + siegburg["bbox_south"]))
s_centroid.add_to(m)

#m

In [None]:
def add_geometry(df, p):
    locations = df.loc[:, [f"{p}LocLat", f"{p}LocLon"]].copy()
    locations = locations.apply(lambda row: Point(row[f"{p}LocLon"], row[f"{p}LocLat"]), axis=1)
    df= df.copy()
    df.loc[:, f"{p}Point"] = locations
    return df

def filter_origin_destination_in_location_bbox(df_trips:pd.DataFrame, location_name:str, max_distance=100) -> pd.DataFrame:
    
    print(f"total trips: {df_trips.shape[0]}")
    location = ox.geocode_to_gdf(location_name)
    
    # First Filter Pass: Get trip starts within the Siegburg bounding box
    trip_starts = df_trips.loc[(location.loc[0, "bbox_south"] < df_trips["StartLocLat"]) & (df_trips["StartLocLat"] < location.loc[0, "bbox_north"]) &
                                (location.loc[0, "bbox_west"] < df_trips["StartLocLon"]) & (df_trips["StartLocLon"] < location.loc[0, "bbox_east"]), :]

    trip_ends = df_trips.loc[(location.loc[0, "bbox_south"] < df_trips["EndLocLat"]) & (df_trips["EndLocLat"] < location.loc[0, "bbox_north"]) &
                                (location.loc[0, "bbox_west"] < df_trips["EndLocLon"]) & (df_trips["EndLocLon"] < location.loc[0, "bbox_east"]), :]

    for p in ["Start", "End"]:
        trip_ends = add_geometry(trip_ends, p)
        trip_starts = add_geometry(trip_starts, p) 
    trips_in_bbox = pd.concat([trip_starts, trip_ends])

    print(f"trips in bbox: {trips_in_bbox.shape[0]}")

    # Second stage of filtering
    location_graph = ox.graph_from_address(location_name)

    start_coords = trips_in_bbox.StartPoint
    start_nearest_edge, dist_start_edge = ox.distance.nearest_edges(G=location_graph, X=[p.x for p in start_coords], Y=[p.y for p in start_coords], return_dist=True)
    trips_in_bbox["StartNearestEdge"] = start_nearest_edge
    trips_in_bbox["StartDistanceToEdge"] = dist_start_edge

    end_coords = trips_in_bbox.EndPoint
    end_nearest_edge, dist_end_edge = ox.distance.nearest_edges(G=location_graph, X=[p.x for p in end_coords], Y=[p.y for p in end_coords], return_dist=True)
    trips_in_bbox["EndNearestEdge"] = end_nearest_edge
    trips_in_bbox["EndDistanceToEdge"] = dist_end_edge

    trips_in_bbox = trips_in_bbox.copy()
    trips_near_network = trips_in_bbox.loc[(trips_in_bbox.StartDistanceToEdge < max_distance) | (trips_in_bbox.EndDistanceToEdge < max_distance), :]

    # Trips with start and end points within 100m from a street edge
    print(f"trips within {max_distance}m of network: {trips_near_network.shape[0]}")
    
    return trips_near_network, location_graph

siegburg, siegburg_graph = filter_origin_destination_in_location_bbox(df_trips, "siegburg")

## Trips starting or leaving each street per hour

In [None]:
def count_trips_per_street(data:pd.DataFrame, trip_starts=True):
    if trip_starts:
        trip_type = "Start"
    else:
        trip_type = "End"

    nodes = pd.pivot_table(data=data, values=f"{trip_type}DistanceToEdge", columns=f"{trip_type}NearestEdge", index=f"{trip_type}Hour", aggfunc="count")
    trip_counts = pd.DataFrame(nodes.loc[:6, :].sum().rename("0-6"))
    trip_counts["7-12"] = nodes.loc[7:12, :].sum()
    trip_counts["13-18"] = nodes.loc[13:18, :].sum()
    trip_counts["19-23"] = nodes.loc[18:, :].sum()
    trip_counts = trip_counts.rename(columns={c:f"{trip_type}Counts_{c}"for c in trip_counts.columns})
    trip_counts.index = pd.MultiIndex.from_tuples(trip_counts.index, names=('u', 'v', 'key'))

    return trip_counts

def color_trips(trip_counts:pd.DataFrame, min_val:int = 0, max_val:int = 100) -> pd.DataFrame:
    colormap = cm.LinearColormap(colors=["white", "red"], vmin=min_val, vmax=max_val)
    trip_colors = trip_counts.copy().rename(columns = {c:c.replace("Counts", "Colors") for c in trip_counts.columns})
    for c in trip_colors.columns:
        trip_colors[c] = trip_colors[c].map(colormap)

    return trip_colors

start_counts = count_trips_per_street(siegburg, trip_starts=True)
start_colors = color_trips(start_counts)

end_counts = count_trips_per_street(siegburg, trip_starts=False)
end_colors = color_trips(end_counts)

In [None]:
nodes, streets = ox.graph_to_gdfs(siegburg_graph)
streets = streets.join(start_counts, how="inner").join(start_colors, how="inner").join(end_counts, how="inner").join(end_colors, how="inner")

In [None]:
streets.head()

In [None]:
start_or_end = "End"
period = "7-12"


m = folium.Map(location=ox.geocode("siegburg"), zoom_start=16, tiles="CartoDb dark_matter")

style_function = lambda x: {
    'color': x['properties'][f'{start_or_end}Colors_{period}'],
    'weight': 3
    }

folium.GeoJson(streets, style_function=style_function).add_to(m)


# Create the tooltip
tooltip = GeoJsonTooltip(
    fields=[f'{start_or_end}Counts_{period}'],  
    aliases=[f'Trip {start_or_end} ({period}):'], 
)

gj = folium.GeoJson(
    streets,
    style_function=style_function,
    tooltip=tooltip
)
gj.add_to(m)


loc = f'Number of Trip {start_or_end}s during the period {period}'
title_html = '''
             <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format(loc)  

m.get_root().html.add_child(folium.Element(title_html))

m

# Weight Classes per Hour

In [None]:
def plot_trip_counts_by_weight_class(df):
    trip_counts = df.loc[:, ["VehicleWeightClass", "StartHour", "DayType", "Date", "Mode"]].groupby(
        ["Date", "VehicleWeightClass", "StartHour", "DayType"]).count().reset_index()

    # Get unique values of DayType
    for i in range(1, 4):
        # Filter data for the specific VehicleWeightClass and DayType
        filtered_data = trip_counts[(trip_counts['VehicleWeightClass'] == i)]
        
        # Append trace for the specific VehicleWeightClass and DayType
        fig = px.box(
            filtered_data,
            x='StartHour',
            y='Mode',
            color="DayType",  
            title=f"Trip starts for Vehichle Weight Class {i}"
        )
        fig.show()

plot_trip_counts_by_weight_class(df_trips)

In [None]:
H3_RESOLUTION = 5
gdf_trips = gpd.read_parquet(f"../data/processed/all_trips_h3_{H3_RESOLUTION}.parquet")

In [None]:
gdf_trips.columns

In [None]:
import pydeck as pdk


COLOR_BREWER_MY_COLOR = [  ## https://colorbrewer2.org/#type=diverging&scheme=PuOr&n=10
    [127 , 59  , 8],
    [179 , 88  , 6],
    [224 , 130 , 20],
    [253 , 184 , 99],
    [254 , 224 , 182],
    [216 , 218 , 235],
    [178 , 171 , 210],
    [128 , 115 , 172],
    [84  , 39  , 136],
    [45  , 0   , 75],
]

heatmap_layer = pdk.Layer(
    "HeatmapLayer",
    data=gdf_trips[["StartLocLon", "StartLocLat"]].sample(frac=0.5),
    opacity=0.7,
    intensity=12,
    get_position=["StartLocLon", "StartLocLat"],
    aggregation=pdk.types.String("SUM"),
    color_range=COLOR_BREWER_MY_COLOR,
    # threshold=1,
    # get_weight="count",
    pickable=True,
)

# Set the viewport location
view_state = pdk.ViewState(latitude=50,
                           longitude=8,
                           zoom=5,
                           bearing=0,
                           pitch=0)

# Render
r = pdk.Deck(layers=[heatmap_layer],
             initial_view_state=view_state,
             # map_style=pdk.map_styles.CARTO_ROAD,
             # tooltip={"html": """<b>Lat, Lon:</b> {lat}, {lon} <br /><b>Count:</b> {count}"""},
            )
r #.to_html("h3_centroids_ColumnLayer.html")