In [10]:
import requests
import pandas as pd
import time

In [11]:
def get_pedestrian_data(params, key):
    """
    Fetch pedestrian data from the Amsterdam Datapunt API with dynamic query parameters.

    Args:
    params (dict): Dictionary of query parameters to filter the data.

    Returns:
    DataFrame: Pandas DataFrame containing the pedestrian data.
    """
    base_url = "https://api.data.amsterdam.nl/v1/crowdmonitor/passanten/"
    items = []
    first = True
    while base_url:
        if first:
            response = requests.get(base_url, params=params, headers=key)
            first = False
        else:
            response = requests.get(base_url, headers=key)

        if response.status_code != 200:
            # Load data into DataFrame
            raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")
        data = response.json()
        items.extend(data['_embedded']['passanten'])
        next_link = data['_links'].get('next', {}).get('href')
        base_url = next_link
    
    return items


In [12]:
params = {
    "sensor": "CMSA-GAKH-01",
    "datumUur[gte]": "2023-01-01",
    "datumUur[lt]": "2024-01-01",
}

api_file = "apikey.txt"

with open(api_file, 'r') as f:
    apikey = {
        "X-Api-Key": f.read()
    }

start_time = time.time()
response = get_pedestrian_data(params, apikey)
end_time = time.time()

print(f'The data has been pulled succesfully, request took: {end_time - start_time} seconds')

The data has been pulled succesfully, request took: 273.284786939621 seconds


In [13]:
response

[{'_links': {'schema': 'https://schemas.data.amsterdam.nl/datasets/crowdmonitor/dataset#passanten',
   'self': {'href': 'https://api.data.amsterdam.nl/v1/crowdmonitor/passanten/54/',
    'title': 'Kalverstraat t.h.v. 1',
    'id': 54}},
  'id': 54,
  'sensor': 'CMSA-GAKH-01',
  'periode': 'uur',
  'naamLocatie': 'Kalverstraat t.h.v. 1',
  'datumUur': '2023-07-05T12:00:00',
  'aantalPassanten': 1150,
  'gebied': 'Winkelgebied centrum',
  'geometrie': {'type': 'Point',
   'coordinates': [121281.37087987993, 487310.4878591449]}},
 {'_links': {'schema': 'https://schemas.data.amsterdam.nl/datasets/crowdmonitor/dataset#passanten',
   'self': {'href': 'https://api.data.amsterdam.nl/v1/crowdmonitor/passanten/434/',
    'title': 'Kalverstraat t.h.v. 1',
    'id': 434}},
  'id': 434,
  'sensor': 'CMSA-GAKH-01',
  'periode': 'uur',
  'naamLocatie': 'Kalverstraat t.h.v. 1',
  'datumUur': '2023-05-28T11:00:00',
  'aantalPassanten': 2699,
  'gebied': 'Winkelgebied centrum',
  'geometrie': {'type': '

In [14]:
df = pd.DataFrame(response)

In [15]:
df

Unnamed: 0,_links,id,sensor,periode,naamLocatie,datumUur,aantalPassanten,gebied,geometrie
0,{'schema': 'https://schemas.data.amsterdam.nl/...,54,CMSA-GAKH-01,uur,Kalverstraat t.h.v. 1,2023-07-05T12:00:00,1150,Winkelgebied centrum,"{'type': 'Point', 'coordinates': [121281.37087..."
1,{'schema': 'https://schemas.data.amsterdam.nl/...,434,CMSA-GAKH-01,uur,Kalverstraat t.h.v. 1,2023-05-28T11:00:00,2699,Winkelgebied centrum,"{'type': 'Point', 'coordinates': [121281.37087..."
2,{'schema': 'https://schemas.data.amsterdam.nl/...,445,CMSA-GAKH-01,uur,Kalverstraat t.h.v. 1,2023-05-30T04:00:00,75,Winkelgebied centrum,"{'type': 'Point', 'coordinates': [121281.37087..."
3,{'schema': 'https://schemas.data.amsterdam.nl/...,953,CMSA-GAKH-01,uur,Kalverstraat t.h.v. 1,2023-10-29T20:00:00,722,Winkelgebied centrum,"{'type': 'Point', 'coordinates': [121281.37087..."
4,{'schema': 'https://schemas.data.amsterdam.nl/...,1302,CMSA-GAKH-01,uur,Kalverstraat t.h.v. 1,2023-02-01T18:00:00,2188,Winkelgebied centrum,"{'type': 'Point', 'coordinates': [121281.37087..."
...,...,...,...,...,...,...,...,...,...
12765,{'schema': 'https://schemas.data.amsterdam.nl/...,1454562,CMSA-GAKH-01,week,Kalverstraat t.h.v. 1,2023-12-04T00:00:00,388658,,
12766,{'schema': 'https://schemas.data.amsterdam.nl/...,1454638,CMSA-GAKH-01,week,Kalverstraat t.h.v. 1,2023-12-18T00:00:00,431362,,
12767,{'schema': 'https://schemas.data.amsterdam.nl/...,1454653,CMSA-GAKH-01,week,Kalverstraat t.h.v. 1,2023-01-02T00:00:00,435843,,
12768,{'schema': 'https://schemas.data.amsterdam.nl/...,1454686,CMSA-GAKH-01,week,Kalverstraat t.h.v. 1,2023-05-01T00:00:00,422919,,


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12770 entries, 0 to 12769
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   _links           12770 non-null  object
 1   id               12770 non-null  int64 
 2   sensor           12770 non-null  object
 3   periode          12770 non-null  object
 4   naamLocatie      12770 non-null  object
 5   datumUur         12770 non-null  object
 6   aantalPassanten  12770 non-null  int64 
 7   gebied           12353 non-null  object
 8   geometrie        12353 non-null  object
dtypes: int64(2), object(7)
memory usage: 898.0+ KB


In [17]:
df.describe()

Unnamed: 0,id,aantalPassanten
count,12770.0,12770.0
mean,728678.5,5038.958731
std,418915.2,28219.62429
min,54.0,52.0
25%,364768.8,149.0
50%,729705.5,719.0
75%,1092756.0,3455.75
max,1454791.0,567731.0


In [18]:
df.to_csv('./data/kalverstraat2023.csv')