In [None]:
import pandas as pd
import plotly.express as px

In [None]:
if "snakemake" in locals():
    input_path = snakemake.input[0]
    output_path = snakemake.output[0]
else:
    input_path = "../../../../resources/brussels/airport/resp_by_mun_age_sex_TML.csv"
    output_path = "../../../../results/brussels/airport/survey.parquet"

In [None]:
# Load passenger data
df_passengers = pd.read_csv(input_path, dtype = {
    "NIS5_Code": str
})

In [None]:
# Clean structure
df_passengers["Age_Group"] = df_passengers["Age_Group"].replace({ "65+": "65-1000" })
df_passengers["age_class"] = df_passengers["Age_Group"].apply(lambda x: x.split("-")[0]).astype(int)

df_passengers = df_passengers.rename(columns = {
    "NIS5_Code": "municipality_id", 
    "Resp_by_year": "annual_respondents",
    "Probability": "probability"
})

df_passengers["sex"] = df_passengers["Gender"].replace({ "Male": 1, "Female": 2 }).astype(int)
df_passengers["municipality_id"] = df_passengers["municipality_id"].astype(int)

df_passengers = df_passengers[["municipality_id", "sex", "age_class", 
    "annual_respondents", "probability"]]

In [None]:
df_plot = df_passengers.groupby("sex")["annual_respondents"].sum().reset_index()
df_plot["sex"] = df_plot["sex"].replace({ 1: "M", 2: "F" })
px.bar(
    df_plot, x = "sex", y = "annual_respondents",
    title = "Survey respondents by sex"
)

In [None]:
df_plot = df_passengers.groupby("age_class")["annual_respondents"].sum().reset_index()
df_plot["age_class"] = df_plot["age_class"].astype(str)

px.bar(
    df_plot, x = "age_class", y = "annual_respondents",
    title = "Survey respondents by age class"
)

In [None]:
# Output
df_passengers.to_parquet(output_path)