In [1]:
import geopandas as gpd
import pandas as pd
import tempfile, zipfile

In [2]:
if "snakemake" in locals():
    input_data_path = snakemake.input["data"]
    input_sectors_path = snakemake.input["sectors"]
    output_path = snakemake.output[0]
else:
    input_data_path = "../../../../resources/brussels/census/sh_statbel_statistical_sectors_3812_20230101.shp.zip"
    input_sectors_path = "../../../../results/brussels/census/sectors.parquet"
    output_path = "../../../../results/brussels/census/spatial.parquet"

In [3]:
# Load spatial data
with tempfile.TemporaryDirectory() as directory:
    with zipfile.ZipFile(input_data_path) as archive:
        archive.extractall(directory)
        df_spatial = gpd.read_file("{}/sh_statbel_statistical_sectors_3812_20230101.shp".format(directory))

In [4]:
# Translate province to identifiers from parking file
df_spatial["province"] = df_spatial["CNIS_PROVI"].replace({
    "10000": "ant",
    "20001": "vbr",
    "20002": "wbr",
    "30000": "wvl",
    "40000": "ovl",
    "50000": "hen",
    "60000": "lui",
    "70000": "lim",
    "80000": "lux", # does not exist in parking data
    "90000": "nam",
    None: "bru"
})

In [5]:
# Load sector identifiers
df_sectors = pd.read_parquet(input_sectors_path)[["sector_id", "sector_index"]]

In [6]:
# Clean structure
df_spatial = df_spatial.rename(columns = {
    "CS01012023": "sector_id",
    "CNIS5_2023": "municipality_id"
})

df_spatial = df_spatial[["municipality_id", "sector_id", "province", "geometry"]]

In [7]:
# Convert municipality column
df_spatial["municipality_id"] = df_spatial["municipality_id"].astype(int)

In [8]:
# Merge sector index
df_spatial = pd.merge(df_spatial, df_sectors, on = "sector_id")

In [9]:
# Output
df_spatial.to_parquet(output_path)