In [None]:
import geopandas as gpd
import gzip, re, pickle
import pandas as pd
import numpy as np

In [None]:
if "snakemake" in locals():
    demand_path = snakemake.input["demand"]
    simulation_path = snakemake.input["simulation"]
    output_path = snakemake.output[0]

else:
    demand_path = "../../results/paris/demand/profile/demand_main_economy_1000.gpkg"
    simulation_path = "../../results/paris/matsim/output_main_economy_1000_100_14"
    output_path = "../../results/paris/analysis/main_economy_1000_100_14.pickle"

In [None]:
# Prepare output
output = dict()

In [None]:
# Load demand
df_demand = gpd.read_file(demand_path)

In [None]:
time_regex = re.compile(rb"time=\"(.+?)\"")
request_regex = re.compile(rb"request=\"(.+?)\"")
person_regex = re.compile(rb"person=\"(.+?)\"")
unshared_ride_time_regex = re.compile(rb"unsharedRideTime=\"(.+?)\"")

df_events = df_demand[["request_id"]].copy().set_index("request_id")

departure_times = {}
unshared_ride_time = {}

# Read events
with gzip.open("{}/output_events.xml.gz".format(simulation_path)) as f:
    for line in f:
        if b"submitted" in line:
            time = float(time_regex.search(line).group(1))
            request = ":".join(person_regex.search(line).group(1).split(b",")[0].decode().split(":")[1:3])
            df_events.loc[request, "submission_time"] = time
            unshared_ride_time[request] = float(unshared_ride_time_regex.search(line).group(1))

        if b"passenger waiting" in line:
            time = float(time_regex.search(line).group(1))
            request = ":".join(person_regex.search(line).group(1).split(b",")[0].decode().split(":")[1:3])
            df_events.loc[request, "departure_time"] = time 
            departure_times[request] = time

        if b"passenger picked up" in line:
            time = float(time_regex.search(line).group(1))
            request = ":".join(person_regex.search(line).group(1).split(b",")[0].decode().split(":")[1:3])
            df_events.loc[request, "pickup_time"] = time 
            df_events.loc[request, "wait_time"] = time - departure_times[request]

        if b"passenger dropped off" in line:
            time = float(time_regex.search(line).group(1))
            request = ":".join(person_regex.search(line).group(1).split(b",")[0].decode().split(":")[1:3])
            df_events.loc[request, "dropoff_time"] = time 
            df_events.loc[request, "travel_time"] = time - departure_times[request]
            df_events.loc[request, "detour_factor"] = (time - departure_times[request]) / unshared_ride_time[request]

        if b"PassengerRequest rejected" in line:
            time = float(time_regex.search(line).group(1))
            request = ":".join(person_regex.search(line).group(1).split(b",")[0].decode().split(":")[1:3])
            df_events.loc[request, "rejection_time"] = time

In [None]:
# Merge times
df_demand = pd.merge(df_demand[[
    "request_id", "passenger_profile",
]], df_events, on = "request_id", how = "left")

In [None]:
# Duplicate for general passenger profile
df_demand = pd.concat([df_demand, df_demand.assign(passenger_profile = "all")])

In [None]:
df_daily = pd.DataFrame(index = pd.Index(df_demand["passenger_profile"].unique(), name = "passenger_profile")).reset_index()

df_daily = pd.merge(df_daily,
    df_demand.groupby("passenger_profile").size().reset_index(name = "requests"),
    on = "passenger_profile", how = "left")

df_daily = pd.merge(df_daily,
    df_demand[~df_demand["rejection_time"].isna()].groupby("passenger_profile").size().reset_index(name = "rejections"),
    on = "passenger_profile", how = "left")

df_daily["rejection_rate"] = df_daily["rejections"] / df_daily["requests"]

df_mean = df_demand[["passenger_profile", "wait_time", "travel_time", "detour_factor"]].copy()
df_mean = df_mean.groupby("passenger_profile")[["wait_time", "travel_time", "detour_factor"]].mean().reset_index()
df_daily = pd.merge(df_daily, df_mean, how = "left", on = "passenger_profile")

output["daily"] = df_daily

In [None]:
df_hourly = pd.DataFrame(index = pd.MultiIndex.from_product([
    np.arange(24), df_demand["passenger_profile"].unique()], 
    names = ["hour", "passenger_profile"])).reset_index()

for slot in ["submission", "departure", "pickup", "dropoff", "rejection"]:
    df_partial = df_demand[[slot + "_time", "passenger_profile"]].copy()
    df_partial["hour"] = df_partial[slot + "_time"] // 3600
    df_partial = df_partial.groupby(["hour", "passenger_profile"]).size().reset_index(name = slot + "s")
    df_hourly = pd.merge(df_hourly, df_partial, how = "left")

df_mean = df_demand[["departure_time", "passenger_profile", "wait_time", "travel_time", "detour_factor"]].copy()
df_mean["hour"] = df_mean["departure_time"] // 3600
df_mean = df_mean.groupby(["hour", "passenger_profile"])[["wait_time", "travel_time", "detour_factor"]].mean().reset_index()
df_hourly = pd.merge(df_hourly, df_mean, how = "left", on = ["hour", "passenger_profile"])

df_hourly["rejection_rate"] = df_hourly["rejections"] / df_hourly["submissions"]

df_hourly = df_hourly.fillna(0.0)

output["hourly"] = df_hourly

In [None]:
with open(output_path, "wb+") as f:
    pickle.dump(output, f)