In [102]:
import geopandas as gpd
import pandas as pd
import shapely.geometry as sgeo
import yaml
import numpy as np
import scipy.stats

In [103]:
if "snakemake" in locals():
    demand_path = snakemake.input["demand"]
    enrichment_path = snakemake.input["enrichment"]
    output_path = snakemake.output[0]
    seed = int(snakemake.params["seed"])

else:
    demand_path = "../../results/paris/demand/filtered/demand_main_1000.gpkg"
    enrichment_path = "../../resources/paris/enrichment.yml"
    output_path = "../../results/paris/demand/enriched/demand_main_1000.gpkg"
    seed = 1000

In [104]:
# Initialie RNG
random_state = np.random.RandomState(seed)
sample_size = 10000

In [105]:
# Load demand
df_demand = gpd.read_file(demand_path)

In [106]:
# Load enrichment 
with open(enrichment_path) as f:
    enrichment = yaml.load(f, yaml.FullLoader)

In [107]:
# Define distribution factories
distribution_factories = {}

def uniform_distribution_factory(definition):
    return scipy.stats.uniform(
        loc = definition["lower"], 
        scale = definition["upper"] - definition["lower"]) 

distribution_factories["uniform"] = uniform_distribution_factory

def poisson_distribution_factory(definition):
      return scipy.stats.poisson(mu = definition["mean"])

distribution_factories["poisson"] = poisson_distribution_factory

def normal_distribution_factory(definition):
    return scipy.stats.norm(loc = definition["mean"], scale = definition["std"])

distribution_factories["normal"] = normal_distribution_factory

def truncated_normal_factory(definition):
    lower, upper = None, None

    if "lower" in definition:
        lower = (definition["lower"] - definition["mean"]) / definition["std"]

    if "upper" in definition:
        upper = (definition["upper"] - definition["mean"]) / definition["std"]

    return scipy.stats.truncnorm(loc = definition["mean"], scale = definition["std"], 
        a = lower, b = upper)
    
distribution_factories["truncaed_normal"] = truncated_normal_factory

In [108]:
# Check profiles
profiles = df_demand["passenger_profile"].unique()

for profile in profiles: 
    assert profile in enrichment["profiles"]

In [109]:
# Profile filters
profile_filters = {}

for profile in profiles:
    profile_filters[profile] = df_demand["passenger_profile"] == profile

In [110]:
# Define sampling procedure
def sample(count, definition):
    if isinstance(definition, dict) and "distribution" in definition:
        distribution = distribution_factories[definition["distribution"]](definition)
        values = []

        while len(values) < count:
            candidates = distribution.rvs(sample_size)

            if "minimum" in definition:
                candidates = [candidates >= definition["minimum"]]

            if "maximum" in definition:
                candidates = [candidates <= definition["minimum"]]

            values += list(candidates)

        return np.array(values)[:count]
    else:
        return definition

In [111]:
# Maximum wait time
if not "maximum_wait_time" in df_demand.columns:
    for profile in profiles:
        definition, f = enrichment["profiles"][profile]["maximum_wait_time"], profile_filters[profile]
        df_demand.loc[f, "maximum_wait_time"] = sample(np.count_nonzero(f), definition)

df_demand["maximum_wait_time"] = df_demand["maximum_wait_time"].astype(float)

In [112]:
# Interaction time
if not "interaction_time" in df_demand.columns:
    for profile in profiles:
        definition, f = enrichment["profiles"][profile]["interaction_time"], profile_filters[profile]
        df_demand.loc[f, "interaction_time"] = sample(np.count_nonzero(f), definition)

df_demand["interaction_time"] = df_demand["interaction_time"].astype(float)

In [113]:
# Prebooking time
if not "prebooking_time" in df_demand.columns:
    for profile in profiles:
        if "prebooking_time" in enrichment["profiles"][profile]:
            definition, f = enrichment["profiles"][profile]["prebooking_time"], profile_filters[profile]
            df_demand.loc[f, "prebooking_time"] = sample(np.count_nonzero(f), definition)

        else:
            df_demand.loc[f, "prebooking_time"] = 0.0

df_demand["prebooking_time"] = df_demand["prebooking_time"].astype(float)

In [None]:
# Pooling
if not "is_poolable" in df_demand.columns:
    for profile in profiles:
        f = profile_filters[profile]
        df_demand.loc[f, "is_poolable"] = enrichment["profiles"][profile]["poolable"]

if not "detour_factor" in df_demand.columns:
    for profile in profiles:
            f = profile_filters[profile]
            definition = enrichment["profiles"][profile]["detour_factor"]
            df_demand.loc[f, "detour_factor"] = sample(np.count_nonzero(f), definition)

df_demand["is_poolable"] = df_demand["is_poolable"].astype(bool)
df_demand["detour_factor"] = df_demand["detour_factor"].astype(float)

In [116]:
# Group size
if not "group_size" in df_demand.columns:
    for profile in profiles:
        definition, f = enrichment["profiles"][profile]["group_size"], profile_filters[profile]
        df_demand.loc[f, "group_size"] = sample(np.count_nonzero(f), definition)

df_demand["group_size"] = df_demand["group_size"].astype(int)

In [None]:
# Add request ID
df_demand["request_id"] = np.arange(len(df_demand)).astype(str)
df_demand["request_id"] += ":" + df_demand["passenger_profile"]

# Convert departure time
df_demand["departure_time"] = df_demand["reference_time"]

In [118]:
# Output
df_demand.to_file(output_path)