## CREATORS OF THIS FILE
Merlijn Sevenhuijsen
Alessandro Meroli


In [81]:
# Import libraries
import os
import requests
import json
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import openmeteo_requests
import requests_cache
from retry_requests import retry

In [82]:
# Get the API key from the .env
load_dotenv()
API_KEY = os.getenv("API-KEY-TOMTOM")

In [83]:
# Dictionary of all parameters for the API
api_params_incidents = {
    'base_url': 'api.tomtom.com',
    'API_KEY': API_KEY,
    'min_lon': 18.00,
    'max_lon': 18.16,
    'min_lat': 59.25,
    'max_lat': 59.40,
    'version_number': 5,
    'time_validity_filter': 'present',
    'category_filter': '0%2C1%2C2%2C3%2C4%2C5%2C6%2C7%2C8%2C9%2C10%2C11%2C14',
    'language': 'en-GB',
    'fields': '%7Bincidents%7Btype%2Cgeometry%7Bcoordinates%7D%2Cproperties%7Bid%2CmagnitudeOfDelay%2Cevents%7Bdescription%2Ccode%2CiconCategory%7D%2CstartTime%2CendTime%7D%7D%7D'
}

cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Function to make a request getting weather data
def get_weather_data(lon, lat, date):
    hourly_params = {
        "hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "rain", "snowfall", "snow_depth", "weather_code", "surface_pressure", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_10m", "soil_temperature_0_to_7cm", "is_day", "sunshine_duration"]

    }
    
    # Create one dictionary with all parameters
    api_params_weather = {
        "latitude": lat,
        "longitude": lon,
        "start_date": date,
        "end_date": date,
        "hourly": hourly_params['hourly']
    }

    # The base url
    url = "https://archive-api.open-meteo.com/v1/archive"

    # Make the request
    return openmeteo.weather_api(url, params=api_params_weather)[0]

# Function to make  a request getting the incident details
def get_incident_details(params, t):
    # If t is 0 then we get the recent events
    if t == 0:
        t = 'present'

    url = f"https://{params['base_url']}/traffic/services/{params['version_number']}/incidentDetails?bbox={params['min_lon']}%2C{params['min_lat']}%2C{params['max_lon']}%2C{params['max_lat']}&fields={params['fields']}&language={params['language']}&categoryFilter={params['category_filter']}&timeValidityFilter={params['time_validity_filter']}&key={params['API_KEY']}" + (f"&t={t}" if t != 0 else "")
    return json.loads(requests.get(url).text)

In [84]:
# Load the incidents csv if it exists
if os.path.isfile('incidents.csv'):
    df_incidents = pd.read_csv('incidents.csv')
else:
    df_incidents = pd.DataFrame(columns=['id', 'type', 'magnitudeOfDelay', 'startTime', 'endTime', 'description', 'code', 'iconCategory', 'longitude', 'latitude', 'temperature_2m', 'relative_humidity_2m', 'dew_point_2m', 'apparent_temperature', 'precipitation', 'rain', 'snowfall', 'snow_depth', 'weather_code', 'surface_pressure', 'et0_fao_evapotranspiration', 'vapour_pressure_deficit', 'wind_speed_10m', 'soil_temperature_0_to_7cm', 'is_day', 'sunshine_duration'])
    df_incidents.set_index('id', inplace=True)

In [85]:
# Function that handles an incident
def handle_incident(incident, hourly_weather_datas):
    hour_value = incident['properties']['startTime'][11:13]

    # Create a dictionary with the incident details
    dict_incident = {
        'id': incident['properties']['id'],
        'magnitudeOfDelay': incident['properties']['magnitudeOfDelay'],
        'startTime': incident['properties']['startTime'],
        'endTime': incident['properties']['endTime'],
        'type': incident['type'],
        'code': incident['properties']['events'][0]['code'],
        'iconCategory': incident['properties']['events'][0]['iconCategory'],
        'description': incident['properties']['events'][0]['description'],
        'month': incident['properties']['startTime'][5:7],
        'hour': hour_value,
    }

    # Get the coordinates of the incident, if statement is for if the incident only has one coordinate
    if any(isinstance(j, list) for j in incident['geometry']['coordinates']):
        dict_incident['longitude'] = incident['geometry']['coordinates'][0][0]
        dict_incident['latitude'] = incident['geometry']['coordinates'][0][1]
    else:
        dict_incident['longitude'] = incident['geometry']['coordinates'][0]
        dict_incident['latitude'] = incident['geometry']['coordinates'][1]

    # Get the row of weather data that matches the hour
    weather_data = hourly_weather_datas.loc[int(hour_value)]

    # Put all the weather data in the dictionary
    for key, value in weather_data.items():
        if (key != 'date'):
            dict_incident[key] = value
        dict_incident[key] = value
        if (key == 'snow_depth' and np.isnan(value)):
            dict_incident[key] = 0
    
    return dict_incident

In [86]:
# Convert the response to a dataframe, we make rows from each incident

# Function that handles an incident call
def handle_incident_call(incidents, df_incidents, weather_data):
    # Create a list that will contain all the incidents
    list_incidents = []

    # For each active incident
    for i in range(len(incidents)):
        # Check if the incident is already in the dataframe
        if  incidents[i]['properties']['id'] in df_incidents.index.values:
            print('Incident already in dataframe')
            continue

        # Check if the incident is active
        if incidents[i]['properties']['endTime'] == None:
            print('Incident has no ending time')
            continue

        # Get the date
        date = incidents[i]['properties']['startTime'][:10]

        # Create a dictionary with the incident details
        dict_incident = handle_incident(incidents[i], weather_data[date])

        # Add the incident to the dataframe
        list_incidents.append(dict_incident)

    # Create a second dataframe with the new values
    df_incidents_2 = pd.DataFrame(list_incidents)
    if len(df_incidents_2) > 0:
        df_incidents_2.set_index('id', inplace=True)

    # Concatenate the new incidents to the old dataframe
    df_incidents = pd.concat([df_incidents, df_incidents_2], sort=True)
    return df_incidents

In [87]:
# Function that gets all different dates in the response
def get_dates(incidents):
    dates = []
    for i in range(len(incidents)):
        date = incidents[i]['properties']['startTime'][:10]
        if date not in dates:
            dates.append(date)
    return dates

In [88]:
# Function that handles the weather data
def handle_weather_data(response):
    # Process hourly data. The order of variables needs to be the same as requested.
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
    hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
    hourly_rain = hourly.Variables(5).ValuesAsNumpy()
    hourly_snowfall = hourly.Variables(6).ValuesAsNumpy()
    hourly_snow_depth = hourly.Variables(7).ValuesAsNumpy()
    hourly_weather_code = hourly.Variables(8).ValuesAsNumpy()
    hourly_surface_pressure = hourly.Variables(9).ValuesAsNumpy()
    hourly_et0_fao_evapotranspiration = hourly.Variables(10).ValuesAsNumpy()
    hourly_vapour_pressure_deficit = hourly.Variables(11).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(12).ValuesAsNumpy()
    hourly_soil_temperature_0_to_7cm = hourly.Variables(13).ValuesAsNumpy()
    hourly_is_day = hourly.Variables(14).ValuesAsNumpy()
    hourly_sunshine_duration = hourly.Variables(15).ValuesAsNumpy()

    hourly_data = {"date": pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit = "s"),
        end = pd.to_datetime(hourly.TimeEnd(), unit = "s"),
        freq = pd.Timedelta(seconds = hourly.Interval()),
        inclusive = "left"
    )}
    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
    hourly_data["dew_point_2m"] = hourly_dew_point_2m
    hourly_data["apparent_temperature"] = hourly_apparent_temperature
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["rain"] = hourly_rain
    hourly_data["snowfall"] = hourly_snowfall
    hourly_data["snow_depth"] = hourly_snow_depth
    hourly_data["weather_code"] = hourly_weather_code
    hourly_data["surface_pressure"] = hourly_surface_pressure
    hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
    hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
    hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
    hourly_data["is_day"] = hourly_is_day
    hourly_data["sunshine_duration"] = hourly_sunshine_duration

    hourly_dataframe = pd.DataFrame(data = hourly_data)

    return hourly_dataframe


In [94]:
# The position of Stockholm
lon = 18.0687
lat = 59.3294

# Get all the incidents at a specific t
tmin = 1000
tmax = 1703251660

# Add incidents to the dataframe based on t, divided over 5 steps
for t in range(tmin, tmax, int((tmax - tmin) / 5)):
    print(t)
    all_incidents = get_incident_details(api_params_incidents, t)
    print(all_incidents)
    all_incidents = all_incidents['incidents']

    # Filter out only the incidents that are have an end time
    all_incidents = [incident for incident in all_incidents if incident['properties']['endTime'] != None]

    # Also filter out incidents that are not already processed in incidents.csv
    all_incidents = [incident for incident in all_incidents if incident['properties']['id'] not in df_incidents.index.values]

    # Get all dates
    dates = get_dates(all_incidents)

    # A dictionary that for each day contains the weather data
    weather_data = {}
    for date in dates:
        # Weather data for the current date
        hourly_weather = handle_weather_data(get_weather_data(lon, lat, date))

        # Only add it if it has an apparent temperature
        if not np.isnan(hourly_weather['apparent_temperature'][0]):
            weather_data[date] = hourly_weather


    # Filter out all the incidents that are not in the weather data
    all_incidents = [incident for incident in all_incidents if incident['properties']['startTime'][:10] in weather_data]

    # Handle the incidents
    df_incidents = handle_incident_call(all_incidents, df_incidents, weather_data)

# Save the dataframe to a csv file
df_incidents.to_csv('incidents.csv', index=True)


1000
{'incidents': [{'type': 'Feature', 'properties': {'id': '0a29b3f434e13f351a9f390ce2022518', 'magnitudeOfDelay': 2, 'startTime': '2023-12-22T14:27:30Z', 'endTime': '2023-12-22T14:50:30Z', 'events': [{'code': 108, 'description': 'Queuing traffic', 'iconCategory': 6}]}, 'geometry': {'coordinates': [[17.9937238614, 59.3606154956], [17.9943166296, 59.3607254488], [17.99445074, 59.3607495838], [17.9945848505, 59.3607710704], [17.9949040334, 59.3608354444], [17.995052896, 59.3608676101], [17.9952352862, 59.3609078491], [17.9955464224, 59.3609762383], [17.996117733, 59.3611076766], [17.9963899772, 59.3611666678], [17.9966742913, 59.3612270258], [17.9972563307, 59.3613490657], [17.9975983123, 59.3614107476], [17.9981897394, 59.3614778116], [17.9984391849, 59.361501946], [17.9988173763, 59.3615448754], [17.9988468806, 59.3615488907], [17.999172769, 59.3615810983], [17.9993632059, 59.3615998505], [17.999776266, 59.3616400887], [17.9999157409, 59.3616655472], [18.0000418047, 59.3616897243]]}}