## Weather API

In [None]:
#install
!pip install openmeteo-requests
!pip install requests-cache retry-requests

In [None]:
import os
from datetime import datetime, timedelta

import pandas as pd
import requests_cache
from retry_requests import retry
import openmeteo_requests


# Setup the Open-Meteo API client with cache + retry
cache_session = requests_cache.CachedSession(".cache", expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)


# Construct last 7 days window 
end = datetime.now().strftime("%Y-%m-%d")
start = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")


# NYC borough representative coordinates 
boroughs = ["MANHATTAN", "BROOKLYN", "QUEENS", "BRONX", "STATEN ISLAND"]

latitudes = [40.7834, 40.6501, 40.6815, 40.8499, 40.5623]
longitudes = [-73.9663, -73.9496, -73.8365, -73.8664, -74.1399]

# Open-Meteo request (multiple locations)

url = "https://api.open-meteo.com/v1/forecast"
params = {
    "latitude": latitudes,
    "longitude": longitudes,
    "hourly": [
        "temperature_2m",
        "precipitation",
        "visibility",
        "rain",
        "showers",
        "snowfall",
        "wind_speed_10m",
    ],
    # Pull only last 7 days of hourly data
    "past_days": 7,
    "forecast_days": 0,
    # set timezone
    "timezone": "America/New_York",
}

responses = openmeteo.weather_api(url, params=params)


# Build one combined hourly dataframe for all boroughs

dfs = []

for i, response in enumerate(responses):
    borough = boroughs[i]

    # Hourly block
    hourly = response.Hourly()

    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()
    hourly_visibility = hourly.Variables(2).ValuesAsNumpy()
    hourly_rain = hourly.Variables(3).ValuesAsNumpy()
    hourly_showers = hourly.Variables(4).ValuesAsNumpy()
    hourly_snowfall = hourly.Variables(5).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(6).ValuesAsNumpy()

    # Create timestamps
    datetimes = pd.date_range(
        start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
        end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
        freq=pd.Timedelta(seconds=hourly.Interval()),
        inclusive="left",
    )

    # Build borough hourly DF
    df_b = pd.DataFrame(
        {
            "borough": borough,
            "datetime": datetimes,
            "temperature_2m": hourly_temperature_2m,
            "precipitation": hourly_precipitation,
            "visibility": hourly_visibility,
            "rain": hourly_rain,
            "showers": hourly_showers,
            "snowfall": hourly_snowfall,
            "wind_speed_10m": hourly_wind_speed_10m,
        }
    )

    # add a date column
    df_b["datetime"] = pd.to_datetime(df_b["datetime"])
    df_b["date"] = df_b["datetime"].dt.date

    dfs.append(df_b)

weather_hourly = pd.concat(dfs, ignore_index=True)


# Save

os.makedirs("data", exist_ok=True)

hourly_file = f"data/nyc_borough_weather_hourly_{start}_to_{end}.csv"
weather_hourly.to_csv(hourly_file, index=False)

print(f"{len(weather_hourly)} hourly rows saved to {hourly_file}")
print("\nHourly rows per borough:")
print(weather_hourly["borough"].value_counts())
