In [308]:
import geopandas as gpd
import pandas as pd
import numpy as np
import plotly.express as px

In [309]:
if "snakemake" in locals():
    target_path = snakemake.input["target"]
    distribution_path = snakemake.input["distribution"]
    output_path = snakemake.output[0]
    attributes = snakemake.params["attributes"]
    weight = snakemake.params["weight"] if "weight" in snakemake.params.keys() else "weight"
    match = snakemake.params["match"] if "match" in snakemake.params.keys() else []
    seed = snakemake.params["seed"]
    
else:
    target_path = "../../../results/brussels/population/passengers_with_departure_hour_seed2000.parquet"
    distribution_path = "../../../results/brussels/airport/daily_totals.parquet"
    output_path = "../../../results/brussels/population/passengers_with_access_seed2000.parquet"
    attributes = ["is_access"]
    
    weight = "weight"
    match = []
    seed = 2000

In [310]:
# Load data
df_target = gpd.read_parquet(target_path)
df_distribution = pd.read_parquet(distribution_path)

In [311]:
# Random
random = np.random.RandomState(seed)

In [312]:
if len(match) == 0:
    df_sample = df_distribution[attributes + [weight]].sample(
        n = len(df_target), replace = True, weights = weight, random_state = random)
    
    for column in attributes:
        df_target[column] = df_sample[column].values

In [313]:
if len(match) > 0:
    df_slots = df_distribution[match].drop_duplicates()

    df_sample = pd.DataFrame({
        column: [np.nan] * len(df_target) for column in attributes
    })

    for index, row in df_slots.iterrows():
        f_target = np.ones((len(df_target),), dtype = bool)
        f_distribution = np.ones((len(df_distribution),), dtype = bool)

        for column in match:
            f_target &= df_target[column] == row[column]
            f_distribution &= df_distribution[column] == row[column]

        df_sample = df_distribution.loc[f_distribution, attributes + [weight]].sample(
            n = np.count_nonzero(f_target), replace = True, weights = weight, random_state = random)
        
        for column in attributes:
            df_target.loc[f_target, column] = df_sample[column].values

In [314]:
# Output
df_target.to_parquet(output_path)