In [12]:
import requests
import pandas as pd
import cbsodata

In [8]:
def get_odata(target_url: str, max_requests: int = 100) -> pd.DataFrame:
    """ This functions retrieves data from a given dataset from the Central Bureau of Statistics (CBS).
    The API returns the data in a JSON format, which is converted to a pandas DataFrame.

    Each data request contains a maximum of 10.000 cells, therefore in order to retrieve the whole dataset,
    the API is constantly called. 

    Args:
        target_url (str): The target url which points to a dataset
        max_requests (int): Indicates the maximum number of requests should be made

    Returns:
        pd.DataFrame: DataFrame that contains all retrieved data from CBS
    """
    df: pd.DataFrame = pd.DataFrame()
    request_index: int = 1

    # Making the requests
    while target_url and (request_index <= max_requests):
        print(f"Starting request {str(request_index)}/{str(max_requests)}...")

        r = requests.get(target_url).json()
        df = pd.concat([df, pd.DataFrame(r['value'])], ignore_index=True)
        
        if '@odata.nextLink' in r:  
            target_url = r['@odata.nextLink']
        else:
            target_url = None
        
        request_index = request_index + 1
    
    print("Finished requests")

    return df

In [9]:
# Make a dictionary with types of datasets and their identifier
cbs_datasets: dict[str, str] = {
    "youth_safety_indicators": "20277NED"
}

# For each dataset in the dictionary, fetch the data and put it in a csv file
for name, identifier in cbs_datasets.items():
    print(f"Fetching data for the \"{name}\" dataset")

    target_url: str = f"https://odata4.cbs.nl/CBS/{identifier}/Observations"
    data: pd.DataFrame = get_odata(target_url, 3)

    dataframe_to_csv(data, data_path, f"{name}.csv")


Fetching data for the "youth_safety_indicators" dataset
Starting request 1/3...


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
data_sets: dict[str, str] = {
    "DebtConsolidation": "84926NED",

}

data = pd.DataFrame(cbsodata.get_data("84926NED"))
data.head()

In [10]:
# Data derden CBS
derden_datasets: dict[str, str] = {

}


def get_derden_dataset(dataset: str, identifier: str) -> None:
    # First check if the directory already exists

    # Download the required data to the correct save_location
    with cbsodata.catalog("dataderden.cbs.nl"):
        data: pd.DataFrame = pd.DataFrame(cbsodata.get_data(identifier))


def get_all_derden_datasets(derden_datasets: dict[str, str]) -> None:
    for dataset, identifier in derden_datasets.items:
        get_derden_dataset()


Unnamed: 0,ID,RegioS,Perioden,TotaalGeregistreerdeVerdachten_1,VerdachtenVanVermogensmisdrijven_2,VerdachtenVernielingEnOpenbareOrde_3,VerdachtenVanGeweldsmisdrijven_4,VerdachtenVanVerkeersmisdrijven_5,VerdachtenVanDrugsmisdrijven_6,VerdachtenVanVuurwapenmisdrijven_7,...,VerdachtenVanDrugsmisdrijven_13,VerdachtenVanVuurwapenmisdrijven_14,GestarteAdviezen_15,OntvangenMeldingen_16,AfgerondeDienstenOnderzoek_17,KindermishandelingBevestigd_18,GestarteAdviezen_19,OntvangenMeldingen_20,AfgerondeDienstenOnderzoek_21,KindermishandelingBevestigd_22
0,0,Nederland,2015,67090.0,28560.0,13110.0,16020.0,9160.0,5970.0,2540.0,...,19.0,9.0,,,,,,,,
1,1,Nederland,2016,61710.0,25710.0,11940.0,14760.0,8900.0,5120.0,2360.0,...,16.0,8.0,,,,,,,,
2,2,Nederland,2017,56430.0,23380.0,10100.0,13300.0,8520.0,4870.0,2040.0,...,15.0,7.0,,,,,,,,
3,3,Nederland,2018,52700.0,21650.0,9660.0,12040.0,9260.0,4620.0,2260.0,...,15.0,8.0,,,,,,,,
4,4,Nederland,2019,55800.0,23700.0,10460.0,12020.0,10480.0,5300.0,3080.0,...,17.0,10.0,52075.0,55590.0,2440.0,1840.0,301.0,322.0,14.0,11.0
