In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
if "snakemake" in locals():
    input_paths = snakemake.input
    output_path = snakemake.output[0]
    attributes = snakemake.params["attributes"]
    
else:
    input_paths = [
        "../../../results/belgium/census/sectors.parquet",
        "../../../results/belgium/census/municipalities.parquet",
    ]

    output_path = "../../../results/belgium/demand/initial_population.parquet"

    attributes = ["municipality_id", "sector_index", "sex", "age_class"]

In [3]:
df_population = pd.DataFrame({ "weight": [1.0] })

for path in input_paths:
    df_marginal = pd.read_parquet(path)
    df_marginal = df_marginal[list(set(df_marginal.columns) & set(attributes))]
    df_marginal = df_marginal.drop_duplicates()
    
    overlap = set(df_marginal.columns) & set(df_population.columns)

    if len(overlap) > 0:
        # Inner join
        df_population = pd.merge(df_population, df_marginal, on = list(overlap), how = "inner")
    else:
        # Cross join
        df_population = pd.merge(df_population, df_marginal, how = "cross")

In [4]:
# Add passenger yes/no
df_population = pd.merge(df_population, pd.DataFrame({
    "is_passenger": [True, False]
}), how = "cross")

In [5]:
# Output population table
df_population.to_parquet(output_path)