In [None]:
!pip install pandas
!pip install numpy
!pip install requests

In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("US_Accidents_March23.csv")

# Prune to 40,000 samples (random or top rows)
df = df.dropna(subset=["Start_Lat", "Start_Lng", "Start_Time"])
df = df.sample(40000, random_state=42).reset_index(drop=True)

In [None]:
# Convert Start_Time to datetime and extract date
df['Start_Time'] = pd.to_datetime(df['Start_Time'], format='mixed', errors='coerce')
df = df.dropna(subset=["Start_Time"])  # Drop any rows with invalid time format
df['Date'] = df['Start_Time'].dt.date

# Keep relevant columns only for weather lookup
df_small = df[['ID', 'Start_Lat', 'Start_Lng', 'Date']].copy()

In [None]:
import requests
import time

def get_weather(lat, lon, date):
    url = (
        f"https://archive-api.open-meteo.com/v1/archive?"
        f"latitude={lat}&longitude={lon}"
        f"&start_date={date}&end_date={date}"
        f"&daily=temperature_2m_max,temperature_2m_min,precipitation_sum,cloud_cover_mean"
        f"&timezone=UTC"
    )

    try:
        r = requests.get(url)
        data = r.json()
        temp_max = data['daily']['temperature_2m_max'][0]
        temp_min = data['daily']['temperature_2m_min'][0]
        rain = data['daily']['precipitation_sum'][0]
        cloud_cover = data['daily']['cloud_cover_mean'][0]
        avg_temp = (temp_max + temp_min) / 2.0
        return avg_temp, rain, cloud_cover
    except:
        return None, None, None

In [None]:
from collections import defaultdict

weather_cache = {}
weather_data = []

for i, row in df_small.iterrows():
    lat = round(row['Start_Lat'], 2)
    lon = round(row['Start_Lng'], 2)
    date = row['Date']
    key = (lat, lon, date)

    if key in weather_cache:
        temp, rain, cloud = weather_cache[key]
    else:
        temp, rain, cloud = get_weather(lat, lon, date)
        weather_cache[key] = (temp, rain, cloud)
        time.sleep(1)

    weather_data.append((temp, rain, cloud))


In [None]:
# Add weather columns to original pruned DataFrame
df_small[['Temperature', 'Rain', 'Cloud_Cover']] = pd.DataFrame(weather_data, index=df_small.index)

# Join back with original 40k dataset
df_final = pd.merge(df, df_small[['ID', 'Temperature', 'Rain', 'Cloud_Cover']], on='ID')

# Save to CSV
df_final.to_csv("us_accidents_40k_with_weather.csv", index=False)