In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np

from tqdm.notebook import tqdm

In [None]:
if "snakemake" in locals():
    input_passegers_path = snakemake.input["passengers"]
    input_locations_path = snakemake.input["locations"]
    output_path = snakemake.output[0]
    zone_attribute = snakemake.params["zone_attribute"]
    seed = snakemake.params["seed"]
    
else:
    input_passegers_path = "../../../results/madrid/population/discretized_passengers.parquet"
    input_locations_path = "../../../results/madrid/osm/locations.parquet"
    output_path = "../../../results/madrid/population/localized_population.parquet"
    zone_attribute = "municipality_id"
    seed = 0

In [None]:
# Load passenger data
df_passengers = pd.read_parquet(input_passegers_path)

# Load location data
df_locations = gpd.read_parquet(input_locations_path)

In [None]:
# Convert sectors to indices
df_sectors = df_passengers.groupby(zone_attribute).size().reset_index(name = "count")

df_locations.set_index(zone_attribute, inplace = True)
df_locations.sort_index(inplace = True)

df_passengers.set_index(zone_attribute, inplace = True)
df_passengers.sort_index(inplace = True)

In [None]:
# Generate locations
for zone_index, count in tqdm(zip(df_sectors[zone_attribute], df_sectors["count"]), total = len(df_sectors)):
    df_source = df_locations.loc[[zone_index]]
    assert len(df_source) > 0

    df_sample = df_source.sample(n = count, replace = True)
    geometry = df_sample["geometry"].values

    if len(geometry) == 1:
        geometry = geometry[0]

    df_passengers.loc[zone_index, "geometry"] = geometry

In [None]:
df_passengers = gpd.GeoDataFrame(df_passengers.reset_index(), crs = df_locations.crs)

In [None]:
# Output
df_passengers.to_parquet(output_path)