In [12]:
import pandas as pd
import geopandas as gpd
import numpy as np
import py7zr, tempfile
import shutil

In [3]:
if "snakemake" in locals():
    input_path = snakemake.input[0]
    output_path = snakemake.output[0]

else:
    input_path = "../../../../resources/france/census/CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01.7z"
    output_path = "../../../../results/par/census/spatial.parquet"

In [13]:
with tempfile.TemporaryDirectory(delete = True) as directory:
    with py7zr.SevenZipFile(input_path) as archive:
        archive.extractall(path = directory)

    source_path = "/".join([
        directory, 
        "CONTOURS-IRIS_2-1__SHP__FRA_2021-01-01",
        "CONTOURS-IRIS",
        "1_DONNEES_LIVRAISON_2021-06-00217",
        "CONTOURS-IRIS_2-1_SHP_LAMB93_FXX-2021",
        "CONTOURS-IRIS.shp"
    ])

    df_spatial = gpd.read_file(source_path)

In [18]:
# Filter for Île-de-France region
df_spatial = df_spatial[df_spatial["INSEE_COM"].str[:2].isin([
    "75", "91", "92", "93", "77", "94", "95", "78"
])]

In [19]:
# Formatting
df_spatial["municipality_id"] = df_spatial["INSEE_COM"].astype(int)
df_spatial["iris_id"] = df_spatial["CODE_IRIS"].astype(int)

df_spatial = df_spatial[["municipality_id", "iris_id", "geometry"]]

In [22]:
df_spatial.to_parquet(output_path)