In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

In [None]:
if "snakemake" in locals():
    input_path = snakemake.input[0]
    output_path = snakemake.output[0]
else:
    input_path = "../../../../resources/brussels/airport/luggage_distribution_per_province_cabin_class.xlsx"
    output_path = "../../../../results/brussels/airport/luggage.parquet"

In [None]:
# Load data
df = pd.read_excel(input_path)

In [None]:
# Cleanup
df["province"] = df["province_Belgium"].ffill().replace({
    "Antwerpen": "ant",
    "Brussel": "bru",
    "Henegouwen": "hen",
    "Limburg": "lim",
    "Luik": "lui",
    "Luxemburg": "lux",
    "Namen": "nam",
    "Oost-Vlaanderen": "ovl",
    "Vlaams Brabant": "vbr",
    "Waals Brabant": "wbr",
    "West-Vlaanderen": "wvl"
})

assert not np.any(df["province"].isna())
df = df.drop(columns = ["province_Belgium"])

df["cabin_class"] = df["cabin_class"].str.lower().str.strip()

In [None]:
# Aggregate
df = df.melt(
    ["province", "cabin_class"], var_name = "luggage_size", value_name = "weight"
)

df["luggage_size"] = df["luggage_size"].astype(int)

In [None]:
# Output
df.to_parquet(output_path)

In [None]:
px.bar(df, x = "luggage_size", color = "province", y = "weight", facet_col = "cabin_class")