In [None]:
import pandas as pd
import geopandas as gpd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import shapely.geometry as sgeo

## Population

In [None]:
daily_totals_path = "../../../results/belgium/airport/daily_totals.parquet"
sectors_path = "../../../results/belgium/census/sectors.parquet"
municipalities_path = "../../../results/belgium/census/municipalities.parquet"
survey_path = "../../../results/belgium/airport/survey.parquet"
spatial_path = "../../../results/belgium/census/spatial.parquet"
departure_hours_path = "../../../results/belgium/airport/departure_hours.parquet"
group_sizes_path = "../../../results/belgium/airport/group_sizes.parquet"
passenger_profiles_path = "../../../results/belgium/airport/passenger_profiles.parquet"

output_path = "/home/shoerl/temp/maia"

In [None]:
df_sector_reference = pd.read_parquet(sectors_path)[["sector_index", "weight"]].rename(columns = {
    "weight": "reference"
})

df_municipality_reference = pd.read_parquet(municipalities_path).rename(columns = {
    "weight": "reference"
}).groupby(["municipality_id", "age_class", "sex"])["reference"].sum().reset_index()

### Sector counts

In [None]:
df_sample = []

for seed in np.arange(10) * 1000:
    df_partial = pd.read_parquet("../../../results/belgium/population/discretized_population_seed{}.parquet".format(seed))
    df_partial = df_partial.groupby("sector_index").size().reset_index(name = "model")
    df_partial["seed"] = seed
    df_sample.append(df_partial)

df_sample = pd.concat(df_sample)

df_sample = df_sample.groupby("sector_index")["model"].aggregate(["mean", "min", "max"])
df_sample["min"] = df_sample["mean"] - df_sample["min"]
df_sample["max"] = df_sample["max"] - df_sample["mean"]
df_sample["model"] = df_sample["mean"]

In [None]:
df_comparison = pd.merge(df_sample, df_sector_reference, on = "sector_index")
figure = px.scatter(df_comparison, x = "reference", y = "model", error_y = "max", error_y_minus = "min")

with open("{}/validation_sectors.png".format(output_path), "wb") as f:
    f.write(figure.to_image("png", width = 600, height = 400, scale = 4))

figure.show()

### Municipalities

In [None]:
df_sample = []

for seed in np.arange(10) * 1000:
    df_partial = pd.read_parquet("../../../results/belgium/population/discretized_population_seed{}.parquet".format(seed))
    df_partial = df_partial.groupby(["municipality_id", "age_class", "sex"]).size().reset_index(name = "model")
    df_partial["seed"] = seed
    df_sample.append(df_partial)

df_sample = pd.concat(df_sample)

In [None]:
df_comparison = df_sample.groupby(["municipality_id", "sex", "age_class"])["model"].aggregate(["mean", "min", "max"]).reset_index()
df_comparison["min"] = df_comparison["mean"] - df_comparison["min"]
df_comparison["max"] = df_comparison["max"] - df_comparison["mean"]
df_comparison["model"] = df_comparison["mean"]
df_comparison = pd.merge(df_comparison, df_municipality_reference[["municipality_id", "sex", "age_class", "reference"]], on = ["municipality_id", "age_class", "sex"])

In [None]:
figure = px.scatter(df_comparison, x = "reference", y = "model", error_y = "max", error_y_minus = "min")

with open("{}/validation_municipalities.png".format(output_path), "wb") as f:
    f.write(figure.to_image("png", width = 600, height = 400, scale = 4))

figure.show()

### 

## Passengers

In [None]:
df_survey = pd.read_parquet(survey_path)
df_survey = df_survey.rename(columns = { "probability": "reference" })
df_survey = df_survey[["municipality_id", "sex", "age_class", "reference"]]

df_total = pd.read_parquet(daily_totals_path)
passenger_scale = df_total["passengers"].max() / 1.72

df_survey["reference"] *= passenger_scale

In [None]:
df_sample = []

for seed in np.arange(10) * 1000:
    df_partial = pd.read_parquet("../../../results/belgium/population/discretized_passengers_seed{}.parquet".format(seed))
    df_partial = df_partial.groupby(["municipality_id", "age_class", "sex"]).size().reset_index(name = "model")
    df_partial["seed"] = seed
    df_partial["model"] = df_partial["model"] / df_partial["model"].sum()
    df_partial["model"] *= passenger_scale
    df_sample.append(df_partial)

df_sample = pd.concat(df_sample)

In [None]:
df_comparison = df_sample.groupby(["municipality_id", "sex", "age_class"])["model"].aggregate(["mean", "min", "max"]).reset_index()
df_comparison["min"] = df_comparison["mean"] - df_comparison["min"]
df_comparison["max"] = df_comparison["max"] - df_comparison["mean"]
df_comparison["model"] = df_comparison["mean"]
df_comparison = pd.merge(df_comparison, df_survey[["municipality_id", "sex", "age_class", "reference"]], on = ["municipality_id", "age_class", "sex"])

In [None]:
figure = px.scatter(df_comparison, x = "reference", y = "model", error_y = "max", error_y_minus = "min")

with open("{}/validation_survey.png".format(output_path), "wb") as f:
    f.write(figure.to_image("png", width = 600, height = 400, scale = 4))

figure.show()

In [None]:
df_comparison["difference"] = df_comparison["model"] - df_comparison["reference"]
df_comparison["difference_max"] = df_comparison["model"] + df_comparison["max"] - df_comparison["reference"]
df_comparison["difference_min"] = df_comparison["model"] - df_comparison["min"] - df_comparison["reference"]

In [None]:
figure = px.ecdf(df_comparison, x = ["difference", "difference_max", "difference_min"])

with open("{}/validation_errors_cdf.png".format(output_path), "wb") as f:
    f.write(figure.to_image("png", width = 600, height = 400, scale = 4))

figure.show()

In [None]:
df_municipalities = gpd.read_parquet(spatial_path)
df_municipalities = df_municipalities.dissolve("municipality_id").reset_index()

In [None]:
df_origins = df_sample.groupby(["municipality_id", "seed"])["model"].sum().reset_index()
df_origins = df_origins.groupby("municipality_id")["model"].mean().reset_index()

In [None]:
df_plot = pd.merge(
    df_municipalities, 
    df_origins,
    on = "municipality_id")

plt.figure(dpi = 300)
ax = df_plot.plot("model", legend = True, vmax = 1000, legend_kwds = { "label": "Daily passengers", "orientation": "vertical" }, ax = plt.gca())
ax.set_axis_off()

### Departure times

In [None]:
df_departure_hours = pd.read_parquet(departure_hours_path)
df_departure_hours = df_departure_hours.set_index(["passenger_profile", "departure_hour"])
df_departure_hours = df_departure_hours.reindex(pd.MultiIndex.from_product([[
    "business", "economy"
], list(range(24))], names = ["passenger_profile", "departure_hour"])).reset_index() # .fillna(0.0)

In [None]:
df_sample = []

for seed in np.arange(10) * 1000:
    df_partial = gpd.read_file("../../../results/belgium/trips/passenger_trips_seed{}.gpkg".format(seed))
    
    df_partial = df_partial[["passenger_profile", "reference_time", "group_size"]]
    df_partial["seed"] = seed
    df_sample.append(df_partial)

df_sample = pd.concat(df_sample)

In [None]:
df_passenger_profiles = pd.read_parquet(passenger_profiles_path)
df_passenger_profiles

In [None]:
df_comparison = df_sample.groupby(["passenger_profile", "seed"]).size().reset_index(name = "model")
df_total = df_comparison.groupby("seed")["model"].sum().reset_index(name = "total")
df_comparison = pd.merge(df_comparison, df_total, on = "seed")
df_comparison["model"] = df_comparison["model"] / df_comparison["total"]
df_comparison = df_comparison.groupby("passenger_profile")["model"].aggregate(["min", "max", "mean"]).reset_index()
df_comparison

In [None]:
df_comparison = df_sample.copy()
df_comparison["departure_hour"] = df_comparison["reference_time"] // 3600
df_comparison = df_comparison.groupby(["departure_hour", "passenger_profile", "seed"]).size().reset_index(name = "model")
df_comparison = df_comparison.groupby(["departure_hour", "passenger_profile"])["model"].aggregate(["min", "max", "mean"]).reset_index()

df_comparison["min"] = df_comparison["mean"] - df_comparison["min"]
df_comparison["max"] = df_comparison["max"] - df_comparison["mean"]
df_comparison["model"] = df_comparison["mean"]

df_comparison = df_comparison.set_index(["passenger_profile", "departure_hour"])
df_comparison = df_comparison.reindex(pd.MultiIndex.from_product([[
    "business", "economy"
], list(range(24))], names = ["passenger_profile", "departure_hour"])).reset_index()

In [None]:
px.line(
    df_comparison, 
    x = "departure_hour", 
    y = "model", error_y = "max", error_y_minus = "min",
    color = "passenger_profile"
)

In [None]:
df_comparison = df_sample.copy()
df_comparison["departure_hour"] = df_comparison["reference_time"] // 3600
df_comparison = df_comparison.groupby(["departure_hour", "passenger_profile", "seed"]).size().reset_index(name = "weight")

df_total = df_comparison.groupby(["passenger_profile", "seed"])["weight"].sum().reset_index(name = "total")
df_comparison = pd.merge(df_comparison, df_total, on = ["passenger_profile", "seed"])
df_comparison["weight"] = df_comparison["weight"] / df_comparison["total"]

df_comparison = df_comparison.groupby(["departure_hour", "passenger_profile"])["weight"].aggregate(["min", "max", "mean"]).reset_index()

df_comparison["min"] = df_comparison["mean"] - df_comparison["min"]
df_comparison["max"] = df_comparison["max"] - df_comparison["mean"]
df_comparison["weight"] = df_comparison["mean"]

df_comparison = df_comparison.set_index(["passenger_profile", "departure_hour"])
df_comparison = df_comparison.reindex(pd.MultiIndex.from_product([[
    "business", "economy"
], list(range(24))], names = ["passenger_profile", "departure_hour"])).reset_index()

df_comparison["slot"] = "model"

In [None]:
df_reference = df_departure_hours.copy()
df_reference["min"] = 0.0
df_reference["max"] = 0.0
df_reference["slot"] = "reference"

In [None]:
figure = px.line(
    pd.concat([df_comparison, df_reference]), 
    x = "departure_hour", 
    y = "weight", error_y = "max", error_y_minus = "min",
    color = "passenger_profile", line_dash = "slot"
)

with open("{}/validation_departure_times.png".format(output_path), "wb") as f:
    f.write(figure.to_image("png", width = 600, height = 400, scale = 4))

figure.show()

In [None]:
df_reference = pd.read_parquet(group_sizes_path)
df_reference["slot"] = "reference"

In [None]:
df_comparison = df_sample.copy()
df_comparison = df_comparison.groupby(["group_size", "seed"]).size().reset_index(name = "weight")

df_total = df_comparison.groupby("seed")["weight"].sum().reset_index(name = "total")
df_comparison = pd.merge(df_comparison, df_total, on = "seed")
df_comparison["weight"] = df_comparison["weight"] / df_comparison["total"]

df_comparison = df_comparison.groupby(["group_size"])["weight"].aggregate(["min", "max", "mean"]).reset_index()

df_comparison["min"] = df_comparison["mean"] - df_comparison["min"]
df_comparison["max"] = df_comparison["max"] - df_comparison["mean"]
df_comparison["weight"] = df_comparison["mean"]

df_comparison["slot"] = "model"

In [None]:
figure = px.bar(pd.concat([df_comparison, df_reference]),
    x = "group_size", y = "weight", barmode = "group", color = "slot",
    error_y="max", error_y_minus="min")

with open("{}/validation_group_size.png".format(output_path), "wb") as f:
    f.write(figure.to_image("png", width = 600, height = 400, scale = 4))

figure.show()

In [None]:
df_trips = gpd.read_file("../../../results/belgium/trips/passenger_trips_seed0.gpkg")

In [None]:
df_sample = df_trips.sample(10000, replace = False)
df_sample["geometry"] = df_sample["geometry"].apply(lambda x: sgeo.Point(*x.coords[0]))
df_sample.to_file("/home/shoerl/temp/maia/trips.gpkg")

In [None]:
# Map of some city with the trip origins / destinations
# Map of the trips with transparency overlay