In [14]:
import pandas as pd
import requests
import json
import time
import sys
import os
import datetime
from dotenv import load_dotenv
import openmeteo_requests
import requests_cache
from retry_requests import retry

In [2]:
# Import the data
df = pd.read_csv('incidents.csv')

In [6]:
# Get only the incidents without weather data
df_no_weather = df[df['weather_code'].isnull()]

print('Number of incidents without weather data: {}'.format(len(df_no_weather)))

Number of incidents without weather data: 1100


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1138 entries, 0 to 1137
Data columns (total 34 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Unnamed: 0.4                1138 non-null   object 
 1   Unnamed: 0.3                511 non-null    object 
 2   Unnamed: 0.2                314 non-null    float64
 3   Unnamed: 0.1                314 non-null    object 
 4   Unnamed: 0                  286 non-null    object 
 5   id                          1138 non-null   object 
 6   apparent_temperature        38 non-null     float64
 7   code                        1138 non-null   int64  
 8   date                        61 non-null     object 
 9   description                 1138 non-null   object 
 10  dew_point_2m                38 non-null     float64
 11  endTime                     1110 non-null   object 
 12  et0_fao_evapotranspiration  38 non-null     float64
 13  hour                        1138 

In [11]:
# Get the API key
load_dotenv()
API_KEY = os.getenv("API-KEY-TOMTOM")

['2023-12-20 13:00:00' '2023-12-22 11:00:00' '2023-12-22 15:00:00'
 '2023-12-22 14:00:00' '2023-12-22 16:00:00' '2023-12-21 13:00:00'
 '2023-12-22 07:00:00' '2023-12-22 12:00:00' '2023-12-22 10:00:00' nan]


In [59]:

# api_params_incidents = {
#     'base_url': 'api.tomtom.com',
#     'API_KEY': API_KEY,
#     'min_lon': 18.00,
#     'max_lon': 18.16,
#     'min_lat': 59.25,
#     'max_lat': 59.40,
#     'version_number': 5,
#     'time_validity_filter': 'present',
#     'category_filter': '0%2C1%2C2%2C3%2C4%2C5%2C6%2C7%2C8%2C9%2C10%2C11%2C14',
#     'language': 'en-GB',
#     'fields': '%7Bincidents%7Btype%2Cgeometry%7Bcoordinates%7D%2Cproperties%7Bid%2CmagnitudeOfDelay%2Cevents%7Bdescription%2Ccode%2CiconCategory%7D%2CstartTime%2CendTime%7D%7D%7D'
# }

# # A function that calls the tomtom API and tries to obtain the end date
# def get_end_date(id, pars):

#     # Get the incident
#     url = f"https://{pars['base_url']}/traffic/services/{pars['version_number']}/incidentDetails?key={pars['API_KEY']}&ids={id}"
#     return json.loads(requests.get(url).text)

In [16]:
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Function to make a request getting weather data
def get_weather_data(lon, lat, date):
    hourly_params = {
        "hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "rain", "snowfall", "snow_depth", "weather_code", "surface_pressure", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_10m", "soil_temperature_0_to_7cm", "is_day", "sunshine_duration"]

    }

    # Create one dictionary with all parameters
    api_params_weather = {
        "latitude": lat,
        "longitude": lon,
        "start_date": date,
        "end_date": date,
        "hourly": hourly_params['hourly']
    }

    # The base url
    url = "https://archive-api.open-meteo.com/v1/archive"

    # Make the request
    return openmeteo.weather_api(url, params=api_params_weather)[0]

# Function that handles the weather data
def handle_weather_data(response):
    
    # Process hourly data. The order of variables needs to be the same as requested.
    hourly = response.Hourly()
    print(hourly)
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
    hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
    hourly_rain = hourly.Variables(5).ValuesAsNumpy()
    hourly_snowfall = hourly.Variables(6).ValuesAsNumpy()
    hourly_snow_depth = hourly.Variables(7).ValuesAsNumpy()
    hourly_weather_code = hourly.Variables(8).ValuesAsNumpy()
    hourly_surface_pressure = hourly.Variables(9).ValuesAsNumpy()
    hourly_et0_fao_evapotranspiration = hourly.Variables(10).ValuesAsNumpy()
    hourly_vapour_pressure_deficit = hourly.Variables(11).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(12).ValuesAsNumpy()
    hourly_soil_temperature_0_to_7cm = hourly.Variables(13).ValuesAsNumpy()
    hourly_is_day = hourly.Variables(14).ValuesAsNumpy()
    hourly_sunshine_duration = hourly.Variables(15).ValuesAsNumpy()

    hourly_data = {"date": pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit = "s"),
        end = pd.to_datetime(hourly.TimeEnd(), unit = "s"),
        freq = pd.Timedelta(seconds = hourly.Interval()),
        inclusive = "left"
    )}
    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
    hourly_data["dew_point_2m"] = hourly_dew_point_2m
    hourly_data["apparent_temperature"] = hourly_apparent_temperature
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["rain"] = hourly_rain
    hourly_data["snowfall"] = hourly_snowfall
    hourly_data["snow_depth"] = hourly_snow_depth
    hourly_data["weather_code"] = hourly_weather_code
    hourly_data["surface_pressure"] = hourly_surface_pressure
    hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
    hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
    hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
    hourly_data["is_day"] = hourly_is_day
    hourly_data["sunshine_duration"] = hourly_sunshine_duration

    hourly_dataframe = pd.DataFrame(data = hourly_data)

    return hourly_dataframe


In [41]:
# The position of Stockholm
lon = 18.0687
lat = 59.3294

# Get the different dates in the dataset
dates = df_no_weather['date'].unique()

# Remove the nan values
dates = [date for date in dates if date == date]
dates = [date[0:10] for date in dates]

# Interchange the month and day
#dates = [date[0:4] + '-' + date[8:10] + '-' + date[5:7] for date in dates]

# remove duplicates
dates = list(set(dates))
print(dates)

# A dictionary that for each day contains the weather data
weather_data = {}
for date in dates:
    # Weather data for the current date
    hourly_weather = handle_weather_data(get_weather_data(lon, lat, date))
    
    # Add the weather data to the dictionary
    weather_data[date] = hourly_weather

# For each incident apply the weather data
for index, row in df_no_weather.iterrows():   
    # Get the weather data for the current date
    hourly_weather = weather_data[row['date'][0:10]]
    
    # Get the weather data for the current time
    weather_data_current_time = hourly_weather[hourly_weather['hour'] == row['time']]
    
    # Get the weather code
    weather_code = weather_data_current_time['weather_code'].values[0]
    
    # Update the dataframe
    df_no_weather.at[index, 'weather_code'] = weather_code
    



['2023-12-20', '2023-12-22', '2023-12-21']


KeyError: 'time'

In [52]:
get_weather_data(lon, lat, dates[0])

<openmeteo_sdk.WeatherApiResponse.WeatherApiResponse at 0x14e7fdcc3a0>

In [47]:
hourly_weather.keys

<bound method NDFrame.keys of                   date  temperature_2m  relative_humidity_2m  dew_point_2m  \
0  2023-12-21 00:00:00             NaN                   NaN           NaN   
1  2023-12-21 01:00:00             NaN                   NaN           NaN   
2  2023-12-21 02:00:00             NaN                   NaN           NaN   
3  2023-12-21 03:00:00             NaN                   NaN           NaN   
4  2023-12-21 04:00:00             NaN                   NaN           NaN   
5  2023-12-21 05:00:00             NaN                   NaN           NaN   
6  2023-12-21 06:00:00             NaN                   NaN           NaN   
7  2023-12-21 07:00:00             NaN                   NaN           NaN   
8  2023-12-21 08:00:00             NaN                   NaN           NaN   
9  2023-12-21 09:00:00             NaN                   NaN           NaN   
10 2023-12-21 10:00:00             NaN                   NaN           NaN   
11 2023-12-21 11:00:00            