In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from datetime import datetime
from meteostat import Point, Hourly, Stations
import pandas as pd
from tqdm import tqdm



# Define time range 
start = datetime(2006, 1, 1)
end = datetime(2025, 1, 1)

# List of some U.S. regions with coordinates
regions = [
    # central and river plains
    ("Tennessee", 35.6000, -88.8000),
    ("Kentucky", 37.8393, -84.2700),
    ("Mississippi", 33.1238, -89.0556),
    ("Louisiana", 31.2000, -92.4000),
    ("New Mexico", 34.4000, -103.2000),
    ("Iowa", 41.6000, -93.6000),
    ("Missouri", 38.5739, -92.6038),
    ("Illinois", 39.8000, -89.6000),
    ("Indiana", 39.8000, -86.1000),
    ("Ohio", 40.0000, -83.0000),
    ("Arkansas", 34.7465, -92.2896),
    
    ("North Dakota", 46.8000, -100.8000),
    ("South Dakota", 44.4000, -100.3000),
    ("Nebraska", 41.5000, -99.7000),
    ("Kansas", 38.5000, -98.0000),
    ("Oklahoma", 35.5000, -97.5000),
    ("Texas", 35.2000, -101.8000),
    ("Montana", 47.1000, -104.7000),
    ("Wyoming", 42.1000, -104.2000),
    ("Colorado", 39.5501, -105.7821),
    
    # east coast
    ("Maine", 44.3106, -69.7795),
    ("New Hampshire", 43.2081, -71.5376),
    ("Massachusetts", 42.3601, -71.0589),
    ("Rhode Island", 41.8240, -71.4128),
    ("Connecticut", 41.7658, -72.6734),
    ("New York", 42.6526, -73.7562),
    ("New Jersey", 40.2206, -74.7699),
    ("Maryland", 38.9784, -76.4922),
    ("North Carolina", 35.7796, -78.6382),
    ("South Carolina", 34.0007, -81.0348),
    ("Georgia", 33.7490, -84.3880),
    ("Florida", 30.4383, -84.2807),
    ("Delaware", 39.0000, -75.5000),
    ("Virginia", 37.5000, -78.7500)
]

In [3]:
# Collect data
all_data = []
for name, lat, lon in tqdm(regions, desc="Fetching weather"):
    stations = Stations().nearby(lat, lon).fetch(10)
    for station_id, row in stations.iterrows():
        if row['hourly_start'] <= start and row['hourly_end'] >= end:
            df = Hourly(station_id, start, end).fetch()
            break
    df.reset_index(inplace=True)
    df['region'] = name
    all_data.append(df[['time', 'region', 'temp']])

# Combine and save
df_all = pd.concat(all_data, ignore_index=True)


Fetching weather: 100%|██████████| 34/34 [00:08<00:00,  3.92it/s]


In [4]:
df_pivot = df_all.pivot(index='time', columns='region', values='temp')
df_pivot.columns = [col.lower().replace(" ", "_") + "_temp" for col in df_pivot.columns]
df_pivot.reset_index(inplace=True)
df_pivot.to_csv("us_eastern_regions_hourly_temperature_2006_to_2025.csv", index=False)

In [13]:
df_pivot

Unnamed: 0,time,arkansas_temp,colorado_temp,connecticut_temp,delaware_temp,florida_temp,georgia_temp,illinois_temp,indiana_temp,iowa_temp,...,north_dakota_temp,ohio_temp,oklahoma_temp,rhode_island_temp,south_carolina_temp,south_dakota_temp,tennessee_temp,texas_temp,virginia_temp,wyoming_temp
0,2006-01-01 00:00:00,,,,,19.4,,1.1,1.7,1.1,...,-2.8,,,-0.6,,,,9.4,6.0,
1,2006-01-01 01:00:00,,-1.0,-2.8,6.1,19.4,6.7,-0.6,1.7,1.1,...,-2.8,1.7,11.7,-0.6,14.4,-1.7,8.9,10.0,7.0,11.7
2,2006-01-01 02:00:00,,-3.0,-2.2,5.6,17.8,3.9,-0.6,1.7,0.6,...,-2.8,1.7,13.3,0.0,12.8,-2.2,8.3,8.3,6.0,8.3
3,2006-01-01 03:00:00,,-4.0,-2.2,5.0,16.1,3.3,0.6,1.7,0.0,...,-3.3,1.7,13.3,0.0,11.1,-1.1,3.3,8.3,3.0,8.3
4,2006-01-01 04:00:00,,-4.0,-1.7,3.9,15.0,2.8,0.0,0.6,0.0,...,-3.3,1.7,13.9,0.0,8.9,-1.0,5.6,7.2,3.0,7.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166556,2024-12-31 20:00:00,11.5,-10.0,10.0,13.9,26.1,21.7,3.3,3.9,-1.1,...,-6.1,10.0,7.2,8.3,22.2,-1.7,9.4,7.8,16.3,1.1
166557,2024-12-31 21:00:00,11.5,-10.0,10.0,13.9,25.0,21.7,3.3,3.9,-1.1,...,-6.7,10.0,7.2,6.1,21.1,-1.7,9.4,8.3,15.8,0.6
166558,2024-12-31 22:00:00,10.5,-11.0,7.8,12.2,22.8,15.6,3.3,3.9,-1.1,...,-6.7,8.9,7.2,5.0,18.9,-2.2,9.4,7.2,13.5,0.6
166559,2024-12-31 23:00:00,9.7,-13.0,7.8,11.1,20.6,15.6,3.3,3.9,-1.1,...,-6.7,8.9,5.0,5.0,18.9,-2.2,8.9,7.2,10.9,-2.2
