In [16]:
import os
import datetime
import requests
import zipfile
import pandas as pd
import geopandas as gpd
import h3
from shapely.geometry import shape
import fiona
from multiprocessing import Pool

In [17]:
def download_and_extract(url, year, download_path, extract_path):
    """
    Downloads a ZIP file from a URL and extracts its contents.

    Args:
        year (int): The year of the data.
        download_path (str): Path to save the downloaded ZIP file.
        extract_path (str): Path to extract the contents of the ZIP file.

    Returns:
        bool: True if the file was successfully downloaded and extracted, False otherwise.
    """    
    try:
        # Download the file
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception if the request was unsuccessful

        # Save the file
        with open(download_path, 'wb') as f:
            f.write(response.content)

        # Extract the file
        with zipfile.ZipFile(download_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)

        return True  # The file was successfully downloaded and extracted

    except requests.exceptions.HTTPError:
        print(f"No file available for {year}")
        return False  # The file was not available

    except Exception as e:
        print(f"Error downloading or extracting file: {e}")
        return False  # An error occurred

In [18]:
def process_geojson(url_gpkg, resolution, output_filepath):
    """
    Reads a GeoPackage file, converts it to GeoJSON, extracts points (centroids) with
    population data, aggregates them into H3 cells at a specified resolution, and saves
    the results to a CSV file.

    Args:
        url_gpkg (str): Path to the GeoPackage file.
        resolution (int): H3 resolution level.
        output_filepath (str): Path to save the CSV file.

    Returns:
        None
    """
    try:
        if not os.path.exists(url_gpkg):
            raise FileNotFoundError(f"Input file not found: {url_gpkg}")

        # Read GeoPackage using Fiona
        with fiona.open(url_gpkg, 'r') as src:
            crs = src.crs
            features = [feat for feat in src]

        # Convert to GeoDataFrame
        population_data = gpd.GeoDataFrame.from_features(features, crs=crs)

        # Select columns and explode
        selected_data = population_data[['aantal_inwoners', 'geometry']]
        centroid_points = selected_data.explode(index_parts=True)

        # Handle missing values (replace with 0 for this example)
        centroid_points.fillna(0, inplace=True)

        # Set CRS (consider error handling if necessary)
        centroid_points = centroid_points.set_crs(4326, allow_override=True)

        # Aggregate to H3 cells
        with Pool() as p:
            h3_cells = p.map(h3.geo_to_h3, centroid_points['geometry'])

        # Save H3 data to CSV
        pd.DataFrame(h3_cells, columns=['h3']).to_csv(output_filepath)

    except Exception as e:
        print(f"Error processing data: {e}")

In [19]:
def main():
    # Get the current year
    current_year = datetime.datetime.now().year

    # Try downloading the file for the current year and previous years
    for year in range(current_year - 1, current_year - 11, -1):  # Try for the last 10 years
        url = f'https://www.cbs.nl/-/media/cbs/dossiers/nederland%20regionaal/wijk%20en%20buurtstatistieken/_kaart%20van%20100%20meter%20bij%20100%20meter%20met%20statistieken/{year}/{year}-cbs-vierkanten100m-statistieken.zip'
        # Construct the file paths using the year
        download_path = f'./cbs_{year}.zip'
        extract_path = f'./cbs_{year}'
        url_gpkg = os.path.join(extract_path, f'cbs_vk100_{year}_v1.gpkg')
        output_filepath = f'./cbs_{year}_h3.csv'

        # Download and extract the file
        if download_and_extract(url, year, download_path, extract_path):
            # If successful, process the data and break the loop
            process_geojson(url_gpkg, 9, output_filepath)
            break

In [20]:
if __name__ == "__main__":
    main()

No file available for 2023
No file available for 2022
No file available for 2021
No file available for 2020
No file available for 2019
No file available for 2018
No file available for 2017
No file available for 2016
No file available for 2015
No file available for 2014
