In [1]:
import pandas as pd

In [4]:
df = pd.read_csv("D:/Programing/Weather2Go2/US_Accidents_March23.csv/US_Accidents_March23.csv")

In [5]:
df.columns

Index(['ID', 'Source', 'Severity', 'Start_Time', 'End_Time', 'Start_Lat',
       'Start_Lng', 'End_Lat', 'End_Lng', 'Distance(mi)', 'Description',
       'Street', 'City', 'County', 'State', 'Zipcode', 'Country', 'Timezone',
       'Airport_Code', 'Weather_Timestamp', 'Temperature(F)', 'Wind_Chill(F)',
       'Humidity(%)', 'Pressure(in)', 'Visibility(mi)', 'Wind_Direction',
       'Wind_Speed(mph)', 'Precipitation(in)', 'Weather_Condition', 'Amenity',
       'Bump', 'Crossing', 'Give_Way', 'Junction', 'No_Exit', 'Railway',
       'Roundabout', 'Station', 'Stop', 'Traffic_Calming', 'Traffic_Signal',
       'Turning_Loop', 'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight',
       'Astronomical_Twilight'],
      dtype='object')

In [6]:
df['Weather_Condition'].unique()

array(['Light Rain', 'Overcast', 'Mostly Cloudy', 'Rain', 'Light Snow',
       'Haze', 'Scattered Clouds', 'Partly Cloudy', 'Clear', 'Snow',
       'Light Freezing Drizzle', 'Light Drizzle', 'Fog', 'Shallow Fog',
       'Heavy Rain', 'Light Freezing Rain', 'Cloudy', 'Drizzle', nan,
       'Light Rain Showers', 'Mist', 'Smoke', 'Patches of Fog',
       'Light Freezing Fog', 'Light Haze', 'Light Thunderstorms and Rain',
       'Thunderstorms and Rain', 'Fair', 'Volcanic Ash', 'Blowing Sand',
       'Blowing Dust / Windy', 'Widespread Dust', 'Fair / Windy',
       'Rain Showers', 'Mostly Cloudy / Windy', 'Light Rain / Windy',
       'Hail', 'Heavy Drizzle', 'Showers in the Vicinity', 'Thunderstorm',
       'Light Rain Shower', 'Light Rain with Thunder',
       'Partly Cloudy / Windy', 'Thunder in the Vicinity', 'T-Storm',
       'Heavy Thunderstorms and Rain', 'Thunder', 'Heavy T-Storm',
       'Funnel Cloud', 'Heavy T-Storm / Windy', 'Blowing Snow',
       'Light Thunderstorms and Snow',

In [None]:
CATS = [
  "clear","cloudy","fog","rain_light","rain_heavy","snow_light","snow_heavy",
  "freezing_rain","sleet_mix","thunder","hail","smoke_dust","severe","other","unknown"
]

def kaggle_weather_to_cat(x):
    if x is None or (isinstance(x, float) and pd.isna(x)):
        return "unknown"

    s = str(x).strip().lower()

    # remove common noise tokens but keep meaning
    # (we ignore "windy" as its own category; wind is handled by wind_speed feature)
    s = s.replace("/ windy", "").replace("windy", "").strip()

    # --- highest priority: truly severe phenomena ---
    if "tornado" in s or "funnel cloud" in s:
        return "severe"
    if "squall" in s:
        return "severe"
    if "duststorm" in s:
        return "severe"

    # --- hail (including small/light/heavy) ---
    if "hail" in s:
        return "hail"

    # --- thunder / t-storm ---
    if ("thunder" in s) or ("t-storm" in s) or ("tstorm" in s) or ("thunderstorm" in s):
        return "thunder"

    # --- freezing precip (most dangerous) ---
    # freezing drizzle/rain/fog -> treat as freezing risk bucket
    if "freezing" in s:
        return "freezing_rain"

    # --- sleet / ice pellets / wintry mix ---
    if ("sleet" in s) or ("ice pellet" in s) or ("wintry mix" in s) or ("rain and sleet" in s):
        return "sleet_mix"

    # --- snow (heavy vs light) ---
    if "snow" in s or "blowing snow" in s or "drifting snow" in s or "snow grains" in s:
        if "heavy" in s:
            return "snow_heavy"
        return "snow_light"

    # --- rain/drizzle/showers (heavy vs light) ---
    if ("rain" in s) or ("drizzle" in s) or ("shower" in s):
        if "heavy" in s:
            return "rain_heavy"
        return "rain_light"

    # --- fog/mist/haze ---
    if ("fog" in s) or ("mist" in s) or ("haze" in s):
        return "fog"

    # --- smoke/dust/sand/ash ---
    if ("smoke" in s) or ("dust" in s) or ("sand" in s) or ("ash" in s) or ("volcanic" in s):
        return "smoke_dust"

    # --- clouds ---
    if ("overcast" in s) or ("cloudy" in s) or ("scattered clouds" in s) or ("partly cloudy" in s) or ("mostly cloudy" in s):
        return "cloudy"

    # --- clear/fair ---
    if ("clear" in s) or (s == "fair") or s.startswith("fair "):
        return "clear"

    # --- explicit "n/a precipitation" should not force rain ---
    if "n/a precipitation" in s:
        return "other"

    return "other"


In [None]:
import openmeteo_requests
import pandas as pd
import requests_cache
import time
import os
from retry_requests import retry

# -----------------------------
# Setup Open-Meteo client
# -----------------------------
cache_session = requests_cache.CachedSession(".cache", expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.5)
openmeteo = openmeteo_requests.Client(session=retry_session)

# -----------------------------
# Request parameters
# -----------------------------
url = "https://historical-forecast-api.open-meteo.com/v1/forecast"

latitudes = [
    42.3314, 42.2808, 42.7325, 42.9634, 43.0125,
    42.2917, 44.7631, 46.5436, 46.4953
]
longitudes = [
    -83.0458, -83.7430, -84.5555, -85.6681, -83.6875,
    -85.5872, -85.6206, -87.3954, -84.3453
]

params = {
    "latitude": latitudes,
    "longitude": longitudes,
    "start_date": "2016-01-01",
    "end_date": "2023-03-31",
    "hourly": [
        "temperature_2m",
        "weather_code",
        "relative_humidity_2m",
        "surface_pressure",
        "visibility",
        "wind_speed_10m",
        "precipitation"
    ],
    "wind_speed_unit": "mph",
    "temperature_unit": "fahrenheit",
    "precipitation_unit": "inch",
}

responses = openmeteo.weather_api(url, params=params)

# -----------------------------
# Output directory
# -----------------------------
os.makedirs("weather_outputs", exist_ok=True)

# -----------------------------
# Process each location safely
# -----------------------------
for i, response in enumerate(responses):

    lat = round(response.Latitude(), 4)
    lon = round(response.Longitude(), 4)

    print(f"Processing {lat}, {lon}")
    
# Process hourly data. The order of variables needs to be the same as requested.
    hourly = response.Hourly()

    hourly_data = {
        "time": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left",
        ),
        "temperature_2m": hourly.Variables(0).ValuesAsNumpy(),
        "weather_code": hourly.Variables(1).ValuesAsNumpy(),
        "relative_humidity_2m": hourly.Variables(2).ValuesAsNumpy(),
        "surface_pressure": hourly.Variables(3).ValuesAsNumpy(),
        "visibility": hourly.Variables(4).ValuesAsNumpy(),
        "wind_speed_10m": hourly.Variables(5).ValuesAsNumpy(),
        "precipitation": hourly.Variables(6).ValuesAsNumpy(),
        "latitude": lat,
        "longitude": lon,
    }

    df = pd.DataFrame(hourly_data)

    # File per city (safe + restartable)
    outfile = f"weather_outputs/weather_{lat}_{lon}.csv"

    if os.path.exists(outfile):
        existing = pd.read_csv(outfile, usecols=["time", "latitude", "longitude"])
        df = df.merge(
            existing,
            on=["time", "latitude", "longitude"],
            how="left",
            indicator=True,
        )
        df = df[df["_merge"] == "left_only"].drop(columns="_merge")

    if not df.empty:
        df.to_csv(outfile, mode="a", header=not os.path.exists(outfile), index=False)
        print(f"Saved {len(df)} rows â†’ {outfile}")
    else:
        print("No new data to save")

    # Gentle delay to avoid cascading retries
    time.sleep(1.2)

print("All locations processed.")


In [None]:
def wmo_to_category(code: int) -> str:
    if code in [0]:
        return "clear"

    if code in [1, 2, 3]:
        return "cloudy"

    if code in [45, 48]:
        return "fog"

    # drizzle
    if code in [51, 53, 55]:
        return "rain_light"
    if code in [56, 57]:
        return "freezing_rain"

    # rain
    if code in [61, 63]:
        return "rain_light"
    if code in [65]:
        return "rain_heavy"
    if code in [66, 67]:
        return "freezing_rain"

    # snow
    if code in [71, 73, 77]:
        return "snow_light"
    if code in [75]:
        return "snow_heavy"

    # showers
    if code in [80, 81]:
        return "rain_light"
    if code in [82]:
        return "rain_heavy"
    if code in [85]:
        return "snow_light"
    if code in [86]:
        return "snow_heavy"

    # thunderstorms
    if code in [95]:
        return "thunder"
    if code in [96, 99]:
        return "hail"

    return "other"
