In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import scipy.stats as ss
import plotly.express as px
import shapely.geometry as sgeo
import yaml

In [None]:
if "snakemake" in locals():
    airport_path = snakemake.input["airport"]
    homes_path = snakemake.input["homes"]
    output_path = snakemake.output[0]

    seed = int(snakemake.wildcards["seed"])
    airport_name = snakemake.wildcards["airport"]

else:
    airport_path = "../../resources/demand/cdg.yml"
    homes_path = "../../resources/idf_homes.gpkg"
    output_path = "../../results/demand/cdg.gpkg"
    seed = 0
    airport_name = "cdg"

In [None]:
# Initialize RNG
random_state = np.random.RandomState(seed)

In [None]:
# Load data
with open(airport_path) as f:
    airport = yaml.load(f, yaml.FullLoader)

# Load homes
df_homes = gpd.read_file(homes_path)[["geometry"]]

In [None]:
# Extract data
profiles = list(airport["profiles"].keys())

# Total
passengers = airport["passengers"]

# Profile share
profile_share = np.array([
    airport["profiles"][profile]["share"]
    for profile in profiles
])
profile_share /= profile_share.sum()

# Departure shares
departure_share = np.array(airport["temporal"])
departure_share /= departure_share.sum()

In [None]:
df_plot = pd.DataFrame({ "Profile": profiles, "Percentage [%]": profile_share * 1e2 })
px.bar(df_plot, x = "Profile", y = "Percentage [%]", title = "Share of user profiles")

In [None]:
# Obtain approximate mean group size per profile
group_size_per_profile = []
estimation_samples = int(1e6)

for profile in profiles:
    config = airport["profiles"][profile]["group_size"]
    samples = ss.poisson(mu = config["mean"]).rvs(estimation_samples, random_state = random_state)
    samples = samples[samples <= config["upper"]]

    print("Mean group size for", profile, "is", np.mean(samples), "based on", len(samples), "samples")
    group_size_per_profile.append(np.mean(samples))

group_size_per_profile = np.array(group_size_per_profile)

In [None]:
df_plot = pd.DataFrame({ "Profile": profiles, "Avg. group size": group_size_per_profile })
px.bar(df_plot, x = "Profile", y = "Avg. group size", title = "Average group size by user profile")

In [None]:
# Generate profiles by time
departures_per_profile = np.zeros((len(profiles), 24))

for profile_index, profile in enumerate(profiles):
    weights = np.zeros((24,))

    for distribution in airport["profiles"][profile]["temporal"]:
        distribution = ss.norm(loc = distribution["mean"], scale = distribution["std"])
        weights += distribution.pdf(np.arange(24))

    weights /= np.sum(weights)
    departures_per_profile[profile_index, :] = weights

In [None]:
df_plot = pd.concat([
    pd.DataFrame({ "Profile": profile, "Hour": np.arange(24), "Percentage [%]": departures_per_profile[profile_index] * 1e2 })
    for profile_index, profile in enumerate(profiles)
])

px.line(df_plot, color = "Profile", x = "Hour", y = "Percentage [%]", title = "Distribution of departures per hour for profiles")

In [None]:
df_plot = pd.DataFrame({ "Hour": np.arange(24), "Percentage [%]": departure_share * 1e2 })
px.line(df_plot, x = "Hour", y = "Percentage [%]", title = "Overall hourly deprature share for airport")

In [None]:
# Perform weighting with seed based on hourly profiles per profile
# Note that the weight is given in *passengers*

weights = departures_per_profile.copy()

# Match profile marginals
for profile_index in range(len(profiles)):
    target = profile_share[profile_index]
    current = np.sum(weights[profile_index, :])
    weights[profile_index, :] *= target / current

# Match hourly marginals
for hour in range(24):
    target = departure_share[hour]
    current = np.sum(weights[:, hour])
    weights[:, hour] *= target / current

In [None]:
# Convert to data frame
df_demand = pd.concat([
    pd.DataFrame({ 
        "hour": np.arange(24), "profile": profile, 
        "passengers": weights[profile_index,:] * passengers,
        "requests": weights[profile_index,:] * passengers / group_size_per_profile[profile_index]
    })
    for profile_index, profile in enumerate(profiles)
])

In [None]:
df_plot = df_demand.rename(columns = { "hour": "Hour", "passengers": "Passengers", "profile": "Profile" })
px.bar(df_plot, x = "Hour", y = "Passengers", color = "Profile", title = "Passengers by hour")

In [None]:
df_plot = df_demand.rename(columns = { "hour": "Hour", "requests": "Requests", "profile": "Profile" })
px.bar(df_plot, x = "Hour", y = "Requests", color = "Profile", title = "Requests by hour")

In [None]:
df_plot = pd.concat([
    pd.DataFrame({ "Profile": profile, "Hour": np.arange(24), "Percentage [%]": departures_per_profile[profile_index] * 1e2, "Data": "Target" })
    for profile_index, profile in enumerate(profiles)
] + [
    pd.DataFrame({ "Profile": profile, "Hour": np.arange(24), "Percentage [%]": weights[profile_index] / weights[profile_index].sum() * 1e2, "Data": "Output" })
    for profile_index, profile in enumerate(profiles)
])

px.line(df_plot, color = "Profile", x = "Hour", y = "Percentage [%]", line_dash = "Data", title = "Hourly distribution comparison")

In [None]:
# Airport geometry
df_airport = gpd.GeoDataFrame(pd.DataFrame({ 
    "geometry": [sgeo.Point(*airport["location"]["coordinates"])] }), crs = airport["location"]["crs"])
df_airport = df_airport.to_crs(df_homes.crs)
airport_geometry = df_airport["geometry"].values[0]

In [None]:
# Statistical rounding / TRS
weights = df_demand["requests"]
multipliers = np.floor(weights)
multipliers += random_state.random_sample(len(weights)) <= np.mod(weights, 1.0)
df_demand["requests"] = multipliers
df_demand["requests"] = df_demand["requests"].astype(int)

In [None]:
# Expand data set
df_demand = df_demand.iloc[np.repeat(np.arange(len(df_demand)), df_demand["requests"].values)]
df_demand = df_demand.drop(columns = ["passengers", "requests"])

In [None]:
# Sample geometries
total_requests = len(df_demand)

half = total_requests // 2
geometries = df_homes["geometry"].sample(n = total_requests, replace = True, random_state = random_state)

df_demand["geometry"] = [
    sgeo.LineString([geometry, airport_geometry])
    for geometry in geometries[:half]
] + [
    sgeo.LineString([airport_geometry, geometry])
    for geometry in geometries[half:]
]

df_demand = gpd.GeoDataFrame(df_demand, crs = df_homes.crs)

In [None]:
# Sample group size
assignment_samples = int(1e6)

for profile in profiles:
    config = airport["profiles"][profile]["group_size"]
    samples = ss.poisson(mu = config["mean"]).rvs(estimation_samples, random_state = random_state)
    samples = samples[samples <= config["upper"]]

    f = df_demand["profile"] == profile
    df_demand.loc[f, "group_size"] = samples[:np.count_nonzero(f)]

In [None]:
# Sample departure time
offset = random_state.random_sample(len(df_demand)) * 3600.0
df_demand["departure_time"] = df_demand["hour"] * 3600 + offset
df_demand = df_demand.drop(columns = ["hour"])

In [None]:
# Generate additional columns
df_demand["origin_x"] = df_demand["geometry"].apply(lambda p: p.coords[0][0])
df_demand["origin_y"] = df_demand["geometry"].apply(lambda p: p.coords[0][1])
df_demand["destination_x"] = df_demand["geometry"].apply(lambda p: p.coords[1][0])
df_demand["destination_y"] = df_demand["geometry"].apply(lambda p: p.coords[1][1])

In [None]:
# Write name
df_demand["airport"] = airport_name
df_demand["request_index"] = np.arange(len(df_demand))

In [None]:
# Write output
df_demand.to_file(output_path)