In [31]:
import pandas as pd
import geopandas as gpd
import numpy as np
import plotly.express as px

In [32]:
survey_path = "../../results/surveys/egt_2010/cleaned"
spatial_path = "../../results/surveys/egt_2010/spatial.parquet"
output_path = "../../results/transit/reference.parquet"

In [33]:
if "papermill" in locals():
    survey_path = papermill.input["survey"]
    spatial_path = papermill.input["spatial"]
    output_path = papermill.output[0]

In [34]:
df_persons = pd.read_parquet("{}/persons.parquet".format(survey_path))
df_trips = pd.read_parquet("{}/trips.parquet".format(survey_path))
df_legs = pd.read_parquet("{}/legs.parquet".format(survey_path))
df_spatial = gpd.read_parquet(spatial_path)

In [35]:
# Relevant transit modes
modes = ["rail", "subway", "tram", "bus"]

In [36]:
# Extract relevant trips that are pt and valid
df_trips = df_trips[
    (df_trips["mode"] == "pt") & 
    df_trips["is_valid"]
][[
    "person_id", "trip_id", "departure_time"
]].copy()

In [37]:
# Indentify legs that have other modes than pt or walk
# we only want to route those that don't access by bike, for instance
df_remove = df_legs[~df_legs["mode"].isin(["pt", "walk"])]

# We only want to keep trips that have a vehiclular leg
df_keep = df_legs[df_legs["transit_mode"].isin(modes)]

relevant_trips = set(df_trips["trip_id"]) & set(df_keep["trip_id"])
relevant_trips -= set(df_remove["trip_id"])

# Final selection of trips and legs
df_trips = df_trips[df_trips["trip_id"].isin(relevant_trips)]
df_legs = df_legs[df_legs["trip_id"].isin(relevant_trips)]

In [38]:
# Perform counting of legs by mode
df_modes = df_legs.groupby(["transit_mode", "trip_id"]).size().reset_index(name = "legs")
df_modes = df_modes[df_modes["transit_mode"].isin(modes)]

df_modes = df_modes.set_index(["trip_id", "transit_mode"])
df_modes = df_modes.reindex(pd.MultiIndex.from_product([
    relevant_trips, modes
], names = ["trip_id", "transit_mode"]))

df_modes["legs"] = df_modes["legs"].fillna(0).astype(int)
df_modes = df_modes.reset_index()

In [39]:
df_plot = df_modes.groupby("transit_mode")["legs"].sum().reset_index(name = "count")
px.bar(df_plot, x = "transit_mode", y = "count")

In [40]:
# Perform counting of transfers
df_transfers = df_modes.groupby("trip_id")["legs"].sum().reset_index(name = "transfers")
df_transfers["transfers"] -= 1
df_transfers["transfers"] = np.minimum(df_transfers["transfers"], 3)
assert np.all(df_transfers["transfers"] >= 0)

In [41]:
df_plot = df_transfers.groupby("transfers").size().reset_index(name = "count")
px.bar(df_plot, x = "transfers", y = "count")

In [42]:
# Restructure modes for merging
df_modes = df_modes.pivot(index = "trip_id", columns = "transit_mode")
df_modes.columns = ["legs_{}".format(c[1]) for c in df_modes.columns]
df_modes = df_modes.reset_index()

In [43]:
# Merge in weights
df_trips = pd.merge(df_trips, df_persons[["person_id", "weight"]], on = "person_id")

# Merge in legs
df_trips = pd.merge(df_trips, df_modes, on = "trip_id")

# Merge in transfers
df_trips = pd.merge(df_trips, df_transfers, on = "trip_id")

In [44]:
# Prepare spatial data
df_spatial["origin_x"] = df_spatial["origin_geometry"].x
df_spatial["origin_y"] = df_spatial["origin_geometry"].y
df_spatial["destination_x"] = df_spatial["destination_geometry"].x
df_spatial["destination_y"] = df_spatial["destination_geometry"].y

# Merge in spatial data
df_trips = pd.merge(df_trips, df_spatial[[
    "trip_id",
    "origin_x", "origin_y",
    "destination_x", "destination_y"
]])

In [45]:
# Output
df_trips["request_index"] = np.arange(len(df_trips))
df_trips.to_parquet(output_path)