In [2]:
# Install dependencies
!pip install pandas requests --quiet

import requests
import pandas as pd
from datetime import date, timedelta

# Cities to compare
CITIES = ["Miami", "Athens", "Copenhagen", "Oslo", "San Antonio", "Fort Lauderdale"]

# Last 30 days
END = date.today() - timedelta(days=1)
START = END - timedelta(days=29)

def geocode_city(name):
    url = "https://geocoding-api.open-meteo.com/v1/search"
    r = requests.get(url, params={"name": name, "count": 1})
    r.raise_for_status()
    res = r.json()
    hit = (res.get("results") or [{}])[0]
    return {
        "city": name,
        "lat": hit.get("latitude"),
        "lon": hit.get("longitude"),
        "country": hit.get("country") or ""
    }

geo = [geocode_city(c) for c in CITIES]
geo_df = pd.DataFrame(geo).dropna(subset=["lat","lon"])

def fetch_daily_weather(lat, lon, start, end):
    url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": lat,
        "longitude": lon,
        "daily": "temperature_2m_max,temperature_2m_min",
        "timezone": "auto",
        "start_date": start.isoformat(),
        "end_date": end.isoformat()
    }
    r = requests.get(url, params=params)
    r.raise_for_status()
    return r.json()

rows = []
for _, r in geo_df.iterrows():
    data = fetch_daily_weather(r.lat, r.lon, START, END)
    days = data["daily"]["time"]
    tmax = data["daily"]["temperature_2m_max"]
    tmin = data["daily"]["temperature_2m_min"]
    for d, hi, lo in zip(days, tmax, tmin):
        rows.append({
            "city": r.city,
            "country": r.country,
            "date": d,
            "tmax_c": hi,
            "tmin_c": lo
        })

df = pd.DataFrame(rows)
df["date"] = pd.to_datetime(df["date"])
df["tavg_c"] = (df["tmax_c"] + df["tmin_c"]) / 2
df["range_c"] = df["tmax_c"] - df["tmin_c"]
df["hot_day"] = df["tmax_c"] >= 30  # flag days hitting 30C+

# Summary
summary = (df.groupby("city", as_index=False)
             .agg(days=("date","nunique"),
                  avg_tmax=("tmax_c","mean"),
                  avg_tmin=("tmin_c","mean"),
                  avg_tavg=("tavg_c","mean"),
                  pct_hot_days=("hot_day","mean")))
summary["pct_hot_days"] = (summary["pct_hot_days"]*100).round(1)

# Save to files
df.to_csv("city_weather_cleaned.csv", index=False)
summary.to_csv("city_weather_summary.csv", index=False)

summary


Unnamed: 0,city,days,avg_tmax,avg_tmin,avg_tavg,pct_hot_days
0,Athens,30,35.626667,25.03,30.328333,100.0
1,Copenhagen,30,23.646667,17.12,20.383333,0.0
2,Fort Lauderdale,30,33.386667,25.573333,29.48,93.3
3,Miami,30,32.373333,25.536667,28.955,93.3
4,Oslo,30,26.57,18.843333,22.706667,26.7
5,San Antonio,30,36.926667,24.776667,30.851667,100.0
