In [None]:
import pandas as pd
import geopandas as gpd

### Preprocessing

In [None]:
df = pd.read_csv("80305ned_TypedDataSet_06112025_170745.csv", sep=";")

new_df = pd.DataFrame()
new_df["Region Code"] = df["RegioS"]
new_df["Avg. Distance to GP"] = df["AfstandTotHuisartsenpraktijk_1"]
new_df["Avg. Distance to Pharmacy"] = df["AfstandTotApotheek_6"]
new_df["Avg. Distance to Hospital"] = df[["AfstandTotZiekenhuis_7", "AfstandTotZiekenhuis_11"]].min(axis=1)
new_df["Avg. Distance to Supermarket"] = df["AfstandTotGroteSupermarkt_20"]
new_df["Avg. Distance to Primary School"] = df["AfstandTotSchool_56"]
new_df["Avg. Distance to High School"] = df["AfstandTotSchool_60"]
new_df["Avg. Distance to Highway"] = df["AfstandTotOpritHoofdverkeersweg_100"]
new_df["Avg. Distance to Train Station"] = df["AfstandTotTreinstationsTotaal_101"]
new_df["Avg. Distance to Fire Station"] = df["AfstandTotBrandweerkazerne_125"]
print(len(new_df))
new_df = new_df.dropna() # drop municipalities that don't exist anymore
print(len(new_df)) # should be number of municipalities = 342
new_df = new_df.sort_values("Region Code").reset_index(drop=True)
new_df.head()

In [None]:
gdf = gpd.read_file("wijkenbuurten_2023_v2.gpkg", layer="gemeenten")

new_gdf = gpd.GeoDataFrame(geometry=gdf["geometry"])
new_gdf["Region Code"] = gdf["gemeentecode"]
new_gdf["Region Name"] = gdf["gemeentenaam"]
new_gdf["Population"] = gdf["aantal_inwoners"]
new_gdf = new_gdf.merge(new_df, how="right", on="Region Code") # merge on region code
print(len(new_gdf))
new_gdf = new_gdf[new_gdf["Population"] > 0] # remove water areas
print(len(new_gdf)) # should be number of municipalities = 342
new_gdf.head()

In [None]:
new_gdf_conv = new_gdf.set_crs("EPSG:28992").to_crs("EPSG:4326") # convert coordinate system
new_gdf_conv.to_file("dataset_clean.json", driver="GeoJSON") # merge csv and shapefile as geojson

### Subsets

In [None]:
gdf = gpd.read_file("dataset_clean.json")

# https://nl.wikipedia.org/wiki/Tabel_van_gemeenten_in_Limburg_(Nederland)
limburg = [
    "Beek",
    "Beekdaelen",
    "Beesel",
    "Bergen (L.)",
    "Brunssum",
    "Echt-Susteren",
    "Eijsden-Margraten",
    "Gennep",
    "Gulpen-Wittem",
    "Heerlen",
    "Horst aan de Maas",
    "Kerkrade",
    "Landgraaf",
    "Leudal",
    "Maasgouw",
    "Maastricht",
    "Meerssen",
    "Mook en Middelaar",
    "Nederweert",
    "Peel en Maas",
    "Roerdalen",
    "Roermond",
    "Simpelveld",
    "Sittard-Geleen",
    "Stein",
    "Vaals",
    "Valkenburg aan de Geul",
    "Venlo",
    "Venray",
    "Voerendaal",
    "Weert"
]

new_gdf = gdf[gdf["Region Name"].isin(limburg)]
print(len(new_gdf)) # should be 31
new_gdf.to_file("dataset_limburg.json", driver="GeoJSON")