In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import plotly.express as px

In [None]:
if "snakemake" in locals():
    input_population_path = snakemake.input["population"]
    input_spatial_path = snakemake.input["spatial"]
    output_path = snakemake.output[0]
    seed = snakemake.params["seed"]
    only_passengers = snakemake.params["only_passengers"]
    
else:
    input_population_path = "../../../results/belgium/population/weighted_population.parquet"
    input_spatial_path = "../../../results/belgium/census/spatial.parquet"
    
    output_path = "../../../results/belgium/population/discretized_population.parquet"

    seed = 0
    only_passengers = True

In [None]:
# Initialize random numbers
random = np.random.RandomState(seed)

In [None]:
# Load generated population data
df_population = pd.read_parquet(input_population_path)

In [None]:
# Load spatial data
df_spatial = gpd.read_parquet(input_spatial_path)

In [None]:
# Calculate individuals
f = df_population["weight"] > 0.0

if only_passengers:
    f &= df_population["is_passenger"]

df_population = df_population[f].copy()

individuals = np.floor(df_population["weight"])

residuals = df_population["weight"] - individuals
sample = random.random_sample(len(df_population))
individuals += sample < residuals

In [None]:
# Generate individuals
multiplicator = np.repeat(
    np.arange(len(df_population)), individuals.astype(int))

df_population = df_population.iloc[multiplicator]
df_population = df_population.drop(columns = ["weight"])

In [None]:
df_plot = df_population.groupby("sex").size().reset_index(name = "passengers")
df_plot["sex"] = df_plot["sex"].replace({ 1: "M", 2: "F" })
px.bar(
    df_plot, x = "sex", y = "passengers",
    title = "Generated passengers by sex"
)

In [None]:
df_plot = df_population.groupby("age_class").size().reset_index(name = "passengers")
df_plot["age_class"] = df_plot["age_class"].astype(str)
px.bar(
    df_plot, x = "age_class", y = "passengers",
    title = "Generated passengers by age"
)

In [None]:
# Output
df_population.to_parquet(output_path)