Add Game Weather Data to College Football Data

In [21]:
import pandas as pd
import requests
from datetime import datetime
from time import sleep
import os

INPUT_CSV = '../../intermediate_files/cfb_with_locations.csv'
OUTPUT_CSV = '../../intermediate_files/cfb_with_weather.csv'
WEATHER_API_URL = 'https://archive-api.open-meteo.com/v1/archive'

In [22]:
def fetch_weather_data(date, latitude, longitude):
    params = {
        'latitude': latitude,
        'longitude': longitude,
        'start_date': date,
        'end_date': date,
        'hourly': [
            'temperature_2m',
            'relative_humidity_2m',
            'wind_speed_10m',
            'wind_direction_10m',
            'precipitation',
            'weather_code'
        ],
        'timezone': 'America/New_York'
    }

    response = requests.get(WEATHER_API_URL, params=params)
    response.raise_for_status()
    data = response.json()

    if 'hourly' in data and data['hourly']['time']:
        times = data['hourly']['time']
        noon_index = None

        for i, time_str in enumerate(times):
            hour = datetime.fromisoformat(time_str.replace('Z', '+00:00')).hour
            if hour == 12:
                noon_index = i
                break

        if noon_index is not None:
            weather_data = {
                'weather_temperature_2m': data['hourly']['temperature_2m'][noon_index],
                'weather_humidity_2m': data['hourly']['relative_humidity_2m'][noon_index],
                'weather_wind_speed_10m': data['hourly']['wind_speed_10m'][noon_index],
                'weather_wind_direction_10m': data['hourly']['wind_direction_10m'][noon_index],
                'weather_precipitation': data['hourly']['precipitation'][noon_index],
                'weather_code': data['hourly']['weather_code'][noon_index]
            }
            return weather_data

In [23]:
if os.path.exists(OUTPUT_CSV):
    print(f"Output file exists, loading to resume processing: {OUTPUT_CSV}")
    df = pd.read_csv(OUTPUT_CSV)
    print(f"Loaded {len(df)} rows from existing output")
else:
    print(f"Output file doesn't exist, starting fresh from: {INPUT_CSV}")
    df = pd.read_csv(INPUT_CSV)

weather_columns = [
    'weather_temperature_2m',
    'weather_humidity_2m',
    'weather_wind_speed_10m',
    'weather_wind_direction_10m',
    'weather_precipitation',
    'weather_code'
]

for col in weather_columns:
    if col not in df.columns:
        df[col] = None

successful_requests = 0
failed_requests = 0

start_idx = 0
for idx, row in df.iterrows():
    if all(pd.isna(row[col]) for col in weather_columns):
        start_idx = idx
        break

print("start idx", start_idx)

for idx in range(start_idx, len(df)):
    row = df.iloc[idx]
    date = row['date']
    lat = row['game_lat']
    lon = row['game_lon']

    if pd.isna(lat) or pd.isna(lon):
        failed_requests += 1
        continue

    print("Fetching weather for game", idx+1, "/", len(df))
    weather_data = fetch_weather_data(date, lat, lon)

    if weather_data:
        for col, value in weather_data.items():
            df.at[idx, col] = value
        successful_requests += 1
    else:
        failed_requests += 1

    sleep(2)

    # Save every 500 games
    if (idx + 1) % 500 == 0:
        print("RESULTS: ", successful_requests, failed_requests)

        df.to_csv(OUTPUT_CSV, index=False)

df.to_csv(OUTPUT_CSV, index=False)

Output file exists, loading to resume processing: ../../intermediate_files/cfb_with_weather.csv
Loaded 18636 rows from existing output
start idx 9312
Fetching weather for game 9314 / 18636
Fetching weather for game 9315 / 18636
Fetching weather for game 9316 / 18636
Fetching weather for game 9317 / 18636
Fetching weather for game 9318 / 18636
Fetching weather for game 9319 / 18636
Fetching weather for game 9320 / 18636
Fetching weather for game 9321 / 18636
Fetching weather for game 9322 / 18636
Fetching weather for game 9323 / 18636
Fetching weather for game 9324 / 18636
Fetching weather for game 9325 / 18636
Fetching weather for game 9326 / 18636
Fetching weather for game 9327 / 18636
Fetching weather for game 9328 / 18636
Fetching weather for game 9329 / 18636
Fetching weather for game 9330 / 18636
Fetching weather for game 9331 / 18636
Fetching weather for game 9332 / 18636
Fetching weather for game 9333 / 18636
Fetching weather for game 9334 / 18636
Fetching weather for game 9335 