In [None]:
import io
import zipfile

import geopandas as gpd
import pandas as pd
import requests

In [None]:
# # Function to download and extract a zip file
# def download_and_extract(url, extract_to="."):
#     response = requests.get(url)
#     if response.status_code == 200:
#         with zipfile.ZipFile(io.BytesIO(response.content)) as z:
#             z.extractall(extract_to)
#     else:
#         print(f"Failed to download the file: {response.status_code}")

In [None]:
# # URL for Eurostat GISCO NUTS 2021 boundaries
# url = "https://gisco-services.ec.europa.eu/distribution/v2/nuts/download/ref-nuts-2021-03m.shp.zip"

In [None]:
# # Download and extract the data
# download_and_extract(url, "eurostat_nuts")

In [None]:
# Load the shapefiles using GeoPandas
gdf_nuts_3 = gpd.read_file("eurostat_nuts/NUTS_RG_03M_2021_4326_LEVL_3.shp.zip")
gdf_nuts_2 = gpd.read_file("eurostat_nuts/NUTS_RG_03M_2021_4326_LEVL_2.shp.zip")

In [None]:
# Filter the GeoDataFrames to include only Spain
gdf_nuts_3_spain = gdf_nuts_3[gdf_nuts_3["CNTR_CODE"] == "ES"]
gdf_nuts_2_spain = gdf_nuts_2[gdf_nuts_2["CNTR_CODE"] == "ES"]

In [None]:
gdf_nuts_3_spain.head()

In [None]:
gdf_nuts_2_spain.head()

In [None]:
gdf_nuts_3_spain[["NUTS_ID", "NAME_LATN"]].head()

In [None]:
gdf_nuts_2_spain[["NUTS_ID", "NAME_LATN"]].head()

In [None]:
final_map = (
    gdf_nuts_3_spain.assign(nuts_2_id=gdf_nuts_3_spain["NUTS_ID"].str.slice(0, 4))
    .loc[
        :,
        [
            "nuts_2_id",
            "NAME_LATN",
            "geometry",
        ],
    ]
    .merge(
        right=gdf_nuts_2_spain[["NUTS_ID", "NAME_LATN"]],
        left_on="nuts_2_id",
        right_on="NUTS_ID",
        suffixes=("_province", "_community"),
        how="outer",
    )
    .rename(
        columns={"NAME_LATN_province": "province", "NAME_LATN_community": "community"}
    )
    .drop(columns=["nuts_2_id", "NUTS_ID"])
)

In [None]:
final_map.to_file("../data/maps/spain-eurostat.geojson", driver="GeoJSON", index=False)