In [3]:
import requests
import pandas as pd
from time import sleep
from random import randint
import datetime

# Function to fetch data from a URL and handle HTTP errors
def fetch_data(url):
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

# Base URL to fetch things with correct layerName
base_url = (
    "https://iot.hamburg.de/v1.0/Things?"
    "$filter=Datastreams/properties/serviceName eq 'HH_STA_HamburgerRadzaehlnetz' "
    "and Datastreams/properties/layerName eq 'Anzahl_Fahrraeder_Zaehlstelle_1-Stunde'&"
    "$expand=Datastreams($filter=properties/layerName eq 'Anzahl_Fahrraeder_Zaehlstelle_1-Stunde')"
)

# Fetch things
things_data = fetch_data(base_url)
things = things_data['value']

# Collect Datastreams
datastreams = []
for thing in things:
    for datastream in thing['Datastreams']:
        datastreams.append(datastream)

# Extract Datastream IDs for hourly data
hourly_datastream_ids = [ds['@iot.id'] for ds in datastreams]

print(f"Hourly Datastream IDs: {hourly_datastream_ids}")


Hourly Datastream IDs: [11797, 11801, 11805, 11821, 11825, 11829, 11833, 11837, 11841, 11845, 11849, 11857, 11873, 11877, 11881, 11885, 11897, 11901, 11905, 11909, 11913, 11917, 11921, 11925, 11929, 11933, 11941, 11957, 11965, 11977, 11993, 12005, 12009, 12013, 12017, 12021, 12025, 12029, 12033, 12037, 12041, 12045, 12049, 12053, 12057, 12061, 12065, 12073, 12085, 12097, 12109, 12117, 12121, 12125, 12129, 12133, 12137, 12141, 12543, 12547, 12551, 12555, 12559, 12563, 12567, 12571, 12575, 12579, 12583, 12587, 12591, 12595, 12599, 12607, 12611, 12615, 12619, 12720, 12724, 12728, 12732, 12736, 12740, 12744, 12748, 12752, 12756, 12764, 12768, 12772, 12776, 12780, 12784, 12788, 12792, 12796, 12818, 12822, 12826, 12830]


In [4]:

# Current date (May 25, 2024)
end_date = datetime.datetime(2024, 5, 26)
start_date = datetime.datetime(2023, 5, 25)

# List of correct Datastream IDs for hourly data
datastream_ids = [11797, 11801, 11805, 11821, 11825, 11829, 11833, 11837, 11841, 11845, 11849, 11857, 11873, 11877, 11881, 11885, 11897, 11901, 11905, 11909, 11913, 11917, 11921, 11925, 11929, 11933, 11941, 11957, 11965, 11977, 11993, 12005, 12009, 12013, 12017, 12021, 12025, 12029, 12033, 12037, 12041, 12045, 12049, 12053, 12057, 12061, 12065, 12073, 12085, 12097, 12109, 12117, 12121, 12125, 12129, 12133, 12137, 12141, 12543, 12547, 12551, 12555, 12559, 12563, 12567, 12571, 12575, 12579, 12583, 12587, 12591, 12595, 12599, 12607, 12611, 12615, 12619, 12720, 12724, 12728, 12732, 12736, 12740, 12744, 12748, 12752, 12756, 12764, 12768, 12772, 12776, 12780, 12784, 12788, 12792, 12796, 12818, 12822, 12826, 12830]

# Fetch observations for each Datastream ID
all_data = []

for datastream_id in datastream_ids:
    observations_url = (
        f"https://iot.hamburg.de/v1.0/Datastreams({datastream_id})/Observations?"
        f"$filter=phenomenonTime ge {start_date.isoformat()}Z and phenomenonTime le {end_date.isoformat()}Z&"
        "$orderby=phenomenonTime desc"
    )
    
    observations_data = fetch_data(observations_url)
    observations = observations_data['value']
    
    while '@iot.nextLink' in observations_data:
        next_url = observations_data['@iot.nextLink']
        sleep(randint(1,3000)/1000)  # Respectful pause between requests
        observations_data = fetch_data(next_url)
        observations.extend(observations_data['value'])
    
    # Extract and store relevant data
    for observation in observations:
        row = {
            'DatastreamID': datastream_id,
            'PhenomenonTime': observation['phenomenonTime'],
            'ResultTime': observation['resultTime'],
            'Result': observation['result']
        }
        all_data.append(row)

# Convert data to a DataFrame
data = pd.DataFrame(all_data)

data


Unnamed: 0,DatastreamID,PhenomenonTime,ResultTime,Result
0,11797,2024-05-25T23:00:00Z/2024-05-25T23:59:59Z,2024-05-27T00:54:25.823Z,26
1,11797,2024-05-25T22:00:00Z/2024-05-25T22:59:59Z,2024-05-27T00:54:25.814Z,40
2,11797,2024-05-25T21:00:00Z/2024-05-25T21:59:59Z,2024-05-26T00:54:27.521Z,84
3,11797,2024-05-25T20:00:00Z/2024-05-25T20:59:59Z,2024-05-26T00:54:27.511Z,132
4,11797,2024-05-25T19:00:00Z/2024-05-25T19:59:59Z,2024-05-26T00:54:27.503Z,158
...,...,...,...,...
146009,12830,2024-03-26T07:00:00Z/2024-03-26T07:59:59Z,2024-03-27T00:54:47.03Z,783
146010,12830,2024-03-26T06:00:00Z/2024-03-26T06:59:59Z,2024-03-27T00:54:47.022Z,339
146011,12830,2024-03-26T05:00:00Z/2024-03-26T05:59:59Z,2024-03-27T00:54:47.015Z,99
146012,12830,2024-03-26T04:00:00Z/2024-03-26T04:59:59Z,2024-03-27T00:54:47.006Z,22


In [5]:
data.to_csv('bike_data_hourly.csv', index=False)