In [1]:
import os
import pandas as pd
from os.path import dirname

root_dir = dirname(os.getcwd())
os.chdir(root_dir)

In [2]:
city = 'bello'
data_split = 'calibration'

In [3]:
import requests
from datetime import datetime, timedelta

# Helper to get start and end dates for each week in the range
def get_week_ranges(start_date, end_date):
    week_ranges = []
    current = start_date
    while current <= end_date:
        week_start = current
        week_end = week_start + timedelta(days=6)
        if week_end > end_date:
            week_end = end_date
        week_ranges.append((week_start, week_end))
        current = week_end + timedelta(days=1)
    return week_ranges

# Date range for calibration or temporary projection
# start_date = datetime.strptime('2014-02-17', '%Y-%m-%d')
# end_date = datetime.strptime('2015-01-11', '%Y-%m-%d')

# Date range for old calibration
start_date = datetime.strptime('2009-11-30', '%Y-%m-%d')
end_date = datetime.strptime('2011-02-20', '%Y-%m-%d')

# Date range for projection
# start_date = datetime.strptime('2006-08-07', '%Y-%m-%d')
# end_date = datetime.strptime('2007-07-08', '%Y-%m-%d')

week_ranges = get_week_ranges(start_date, end_date)
print(len(week_ranges), "weeks to fetch")

api_key = "28b65fe3f0b8422683063940250605"
base_url = "https://api.worldweatheronline.com/premium/v1/past-weather.ashx"

results = []
for week_start, week_end in week_ranges:
    # API requires enddate to be in the same month and year as date
    if week_start.month != week_end.month or week_start.year != week_end.year:
        # Split the week if it crosses month boundary
        split_end = datetime(week_start.year, week_start.month, 1) + timedelta(days=32)
        split_end = split_end.replace(day=1) - timedelta(days=1)
        week_ends = [split_end, week_end]
        week_starts = [week_start, split_end + timedelta(days=1)]
    else:
        week_starts = [week_start]
        week_ends = [week_end]
    for ws, we in zip(week_starts, week_ends):
        params = {
            'q': 'Bello',
            'date': ws.strftime('%Y-%m-%d'),
            'enddate': we.strftime('%Y-%m-%d'),
            'tp': '24',  # daily data
            'format': 'json',
            'key': api_key
        }
        response = requests.get(base_url, params=params)
        if response.status_code == 200:
            data = response.json()
            for day in data['data']['weather']:
                results.append({
                    'date': day['date'],
                    'maxtempC': day['maxtempC'],
                    'mintempC': day['mintempC'],
                    'totalSnow_cm': day.get('totalSnow_cm'),
                    'sunHour': day.get('sunHour'),
                    'uvIndex': day.get('uvIndex'),
                    'precipMM': day['hourly'][0]['precipMM'],
                    'humidity': day['hourly'][0]['humidity'],
                    'tempC': day['hourly'][0]['tempC'],
                })
        else:
            print(f"Failed for week {ws} to {we}")

weather_df = pd.DataFrame(results)
weather_df['date'] = pd.to_datetime(weather_df['date'])
weather_df.to_csv(f'data/{data_split}/{city}/weather.csv', index=False)

64 weeks to fetch


In [4]:
temp_df = weather_df.copy()
temp_df['date'] = pd.to_datetime(temp_df['date'])

temp_df = temp_df[['date', 'precipMM', 'tempC']]

for col in ['precipMM', 'tempC']:
    temp_df[col] = pd.to_numeric(temp_df[col], errors='coerce')

weekly_df = temp_df.groupby(pd.Grouper(key='date', freq='W')).mean().reset_index()
weekly_df.to_csv(f'data/{data_split}/{city}/weather_weekly.csv', index=False)