In [1]:
import requests
import time
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
location_df = pd.read_csv(r"../data_f1db/f1db-circuits.csv")
location_df = location_df[['id', 'longitude', 'latitude']].rename(columns={'id': 'circuitId'})

race_df = pd.read_csv(r"../data_f1db/f1db-races.csv")
race_df = race_df[['id', 'year', 'date', 'circuitId']]
race_df = race_df[(race_df['year'] > 1985) & (race_df['year'] < 2025)]

weather_df = race_df.merge(location_df[['circuitId', 'latitude', 'longitude']], 
                            on='circuitId', 
                            how='left')

In [43]:
def fetch_daily_weather(df):
    weather_data = {
        'temperature_max': [],
        'temperature_min': [],
        'precipitation_mm': []
    }

    for _, row in tqdm(df.iterrows(), total=len(df)):
        lat = row['latitude']
        lon = row['longitude']
        date_str = pd.to_datetime(row['date']).strftime('%Y-%m-%d')

        url = (
            "https://archive-api.open-meteo.com/v1/archive"
            f"?latitude={lat}&longitude={lon}"
            f"&start_date={date_str}&end_date={date_str}"
            "&daily=temperature_2m_max,temperature_2m_min,precipitation_sum"
            "&timezone=UTC"
        )

        try:
            resp = requests.get(url, timeout=10)
            resp.raise_for_status()
            data = resp.json().get('daily', {})

            # Extract weather data for the one day
            weather_data['temperature_max'].append(data.get('temperature_2m_max', [None])[0])
            weather_data['temperature_min'].append(data.get('temperature_2m_min', [None])[0])
            weather_data['precipitation_mm'].append(data.get('precipitation_sum', [None])[0])

        except Exception as e:
            print(f"Failed to fetch for {date_str} at ({lat}, {lon}): {e}")
            weather_data['temperature_max'].append(None)
            weather_data['temperature_min'].append(None)
            weather_data['precipitation_mm'].append(None)

        time.sleep(0.5)  # Be nice to the API and avoid rate limits

    # Add the data as new columns
    df['temperature_max'] = weather_data['temperature_max']
    df['temperature_min'] = weather_data['temperature_min']
    df['precipitation_mm'] = weather_data['precipitation_mm']

    return df

weather_df = fetch_daily_weather(weather_df)
weather_df.to_csv("race-weather-data.csv", index=False)

 17%|█▋        | 120/705 [02:32<12:37,  1.30s/it]

Failed to fetch for 1993-07-11 at (52.078611, -1.016944): HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=10)


 33%|███▎      | 234/705 [05:06<10:20,  1.32s/it]

Failed to fetch for 2000-07-02 at (46.863242, 3.164228): HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=10)


 35%|███▌      | 248/705 [05:34<10:13,  1.34s/it]

Failed to fetch for 2001-05-13 at (47.219722, 14.764722): HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=10)


 42%|████▏     | 298/705 [06:48<08:34,  1.26s/it]

Failed to fetch for 2004-05-23 at (43.734722, 7.420556): HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=10)


 56%|█████▋    | 397/705 [08:59<07:05,  1.38s/it]

Failed to fetch for 2009-10-04 at (34.843056, 136.540556): HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=10)


 84%|████████▍ | 591/705 [13:14<02:28,  1.30s/it]

Failed to fetch for 2019-09-22 at (1.291531, 103.86385): HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=10)


 86%|████████▌ | 607/705 [13:42<01:52,  1.15s/it]

Failed to fetch for 2020-09-27 at (43.410278, 39.968271): HTTPSConnectionPool(host='archive-api.open-meteo.com', port=443): Read timed out. (read timeout=10)


100%|██████████| 705/705 [15:50<00:00,  1.35s/it]


In [44]:
def retry_specific_failures(weather_df, retry_rows):
    for entry in retry_rows:
        date_str = entry["date"]
        lat = entry["latitude"]
        lon = entry["longitude"]

        url = (
            "https://archive-api.open-meteo.com/v1/archive"
            f"?latitude={lat}&longitude={lon}"
            f"&start_date={date_str}&end_date={date_str}"
            "&daily=temperature_2m_max,temperature_2m_min,precipitation_sum"
            "&timezone=UTC"
        )

        try:
            resp = requests.get(url, timeout=10)
            resp.raise_for_status()
            data = resp.json().get('daily', {})

            if data and all(data.values()):
                tmax = data.get('temperature_2m_max', [None])[0]
                tmin = data.get('temperature_2m_min', [None])[0]
                precip = data.get('precipitation_sum', [None])[0]

                # Match the row in your dataframe (based on date and approximate lat/lon)
                mask = (
                    (weather_df['date'] == date_str) &
                    (weather_df['latitude'].round(5) == round(lat, 5)) &
                    (weather_df['longitude'].round(5) == round(lon, 5))
                )

                weather_df.loc[mask, 'temperature_max'] = tmax
                weather_df.loc[mask, 'temperature_min'] = tmin
                weather_df.loc[mask, 'precipitation_mm'] = precip

                print(f"✔ Updated weather for {date_str} at ({lat}, {lon})")

            else:
                print(f"⚠ No data available for {date_str} at ({lat}, {lon})")

        except Exception as e:
            print(f"❌ Retry failed for {date_str} at ({lat}, {lon}): {e}")

        time.sleep(0.5)

    return weather_df

In [None]:
retry_rows = [
    {"date": "2004-05-23", "latitude": 43.734722, "longitude": 7.420556},
    {"date": "2009-10-04", "latitude": 34.843056, "longitude": 136.540556},
    {"date": "2019-09-22", "latitude": 1.291531, "longitude": 103.86385},
    {"date": "2020-09-27", "latitude": 43.410278, "longitude": 39.968271},
]

weather_df = retry_specific_failures(weather_df, retry_rows)
weather_df = weather_df.rename(columns={'id': 'raceId'})

✔ Updated weather for 2004-05-23 at (43.734722, 7.420556)
✔ Updated weather for 2009-10-04 at (34.843056, 136.540556)
✔ Updated weather for 2019-09-22 at (1.291531, 103.86385)
✔ Updated weather for 2020-09-27 at (43.410278, 39.968271)


In [4]:
weather_df = pd.read_csv(r"../Patrick/race-weather-data.csv")

weather_df['rain'] = np.where(weather_df['precipitation_mm']  == 0, 'none', 
                              np.where(weather_df['precipitation_mm'] <= 2.4, 'light',
                                       np.where(weather_df['precipitation_mm'] <= 7.5, 'moderate', 'heavy')
                                       )
                              )

weather_df.to_csv("race-weather-data.csv", index=False)