In [None]:
import pandas as pd
import numpy as np

In [None]:
if "snakemake" in locals():
    input_municipalities_path = snakemake.input["municipalities"]
    input_passenger_totals_path = snakemake.input["daily"]
    input_group_sizes_path = snakemake.input["group_sizes"]
    input_locations_path = snakemake.input["locations"]

    output_municipalities_path = snakemake.output["municipalities"]
    output_passengers_path = snakemake.output["passengers"]
    output_young_passengers_path = snakemake.output["young"]
    output_missing_locations_path = snakemake.output["missing_locations"]
    
else:
    input_municipalities_path = "../../../results/madrid/census/municipalities.parquet"
    input_passenger_totals_path = "../../../results/madrid/airport/daily_totals.parquet"
    input_group_sizes_path = "../../../results/madrid/airport/group_sizes.parquet"
    input_locations_path = "../../../results/madrid/osm/locations.parquet"

    output_municipalities_path = "../../../results/madrid/marginals/municipalities.parquet"
    output_passengers_path = "../../../results/madrid/marginals/passengers.parquet"
    output_young_passengers_path = "../../../results/madrid/marginals/young_passengers.parquet"
    output_missing_locations_path = "../../../results/madrid/marginals/missing_locations.parquet"

In [None]:
# Read input
df_municipalities = pd.read_parquet(input_municipalities_path)

df_totals = pd.read_parquet(input_passenger_totals_path)
df_group_sizes = pd.read_parquet(input_group_sizes_path)

df_locations = pd.read_parquet(input_locations_path)

In [None]:
# Reduce municipalities
df_municipalities = df_municipalities.groupby([
    "municipality_id", "age_class", "sex"
])["weight"].sum().reset_index()

In [None]:
# Passenger total
mean_group_size = np.sum(df_group_sizes["group_size"] * df_group_sizes["weight"])
daily_total = df_totals["passengers"].max() / mean_group_size

df_passenger_total = pd.DataFrame({
    "is_passenger": [True],
    "weight": [daily_total]
})

In [None]:
# Young passengers
df_young = pd.DataFrame({
    "is_passenger": [True],
    "age_class": [0],
    "weight": [0.0]
})

In [None]:
# Make sure that we don't have passengers in sectors where we don't have locations
df_missing = pd.DataFrame({
    "municipality_id": sorted(list(set(df_municipalities["municipality_id"]) - set(df_locations["municipality_id"])))
})

df_missing["is_passenger"] = True
df_missing["weight"] = 0.0

In [None]:
# Output
df_municipalities.to_parquet(output_municipalities_path)
df_passenger_total.to_parquet(output_passengers_path)
df_young.to_parquet(output_young_passengers_path)
df_missing.to_parquet(output_missing_locations_path)