In [None]:
import pandas as pd
import requests
from datetime import datetime
from time import sleep

# Load your data
df = pd.read_csv("datasets/Serious Injuries and Fatalities Data for I-40 Tennessee.csv")

# Your Visual Crossing API key
API_KEY = "Paste API quey here"

# Cache for already-queried (rounded_lat, rounded_lon, date) tuples
weather_cache = {}

def extract_datetime(row):
    try:
        date_str = row['Collision_Date_Tooltip'].split('@')[0].strip()
        date_obj = datetime.strptime(date_str, "%A, %B %d, %Y")
        date_fmt = date_obj.strftime("%Y-%m-%d")
        time_str = row['Collision_Time_adj'].strip()
        if time_str.lower() == "unknown":
            return date_fmt, None
        return date_fmt, time_str
    except:
        return None, None

def get_weather(lat, lon, date_str, time_str):
    base_url = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/"
    location = f"{lat},{lon}"
    url = f"{base_url}{location}/{date_str}?key={API_KEY}&unitGroup=us&include=hours"

    while True:
        try:
            response = requests.get(url)
            if response.status_code == 200:
                break
            elif response.status_code == 429:
                print(" → Hit rate limit (429). Sleeping for 60 seconds...")
                sleep(60)
            else:
                return f"HTTP {response.status_code}"
        except Exception as e:
            return f"Exception: {e}"

    try:
        data = response.json()
        hours = data['days'][0].get('hours', [])
        if not hours:
            return "No hourly data"

        # Round to nearest hour
        crash_time = datetime.strptime(time_str, "%H:%M")
        crash_hour = round(crash_time.hour + crash_time.minute / 60)

        for hour_data in hours:
            api_hour = int(hour_data['datetime'].split(":")[0])
            if api_hour == crash_hour:
                return hour_data.get('conditions', 'No condition info')

        return "No matching hour"
    except Exception as e:
        return f"Exception: {e}"

# Collect weather conditions
weather_conditions = []

for i, row in df.iterrows():
    lat, lon = row['Latitude_adj'], row['Longitude_adj']
    date_str, time_str = extract_datetime(row)
    cache_key = (date_str, round(lat, 2), round(lon, 2))

    print(f"[{i+1}/{len(df)}] Getting weather for {date_str} {time_str or '[unknown]'} at ({lat},{lon})")

    if date_str and time_str:
        if cache_key in weather_cache:
            condition = weather_cache[cache_key]
            print(" → (cached)")
        else:
            condition = get_weather(lat, lon, date_str, time_str)
            weather_cache[cache_key] = condition
    else:
        condition = "Missing or invalid time/date"

    print(f" → Weather: {condition}")
    weather_conditions.append(condition)
    
    # Save partial results every 50 rows
    if i > 0 and i % 50 == 0:
        temp_df = df.iloc[:i+1].copy()
        temp_df["Weather_Fetched"] = weather_conditions
        temp_df.to_csv("Map_Overview_with_Weather_partial.csv", index=False)
        print("Saved checkpoint: Map_Overview_with_Weather_partial.csv")

    sleep(2)  # More conservative delay

# Final save
df["Weather_Fetched"] = weather_conditions
df.to_csv("datasets/Serious Injuries and Fatalities Data for I-40 Tennessee *with weather*.csv", index=False)
print("File saved as Serious Injuries and Fatalities Data for I-40 Tennessee *with weather*.csv")