In [1]:
import os
from os.path import dirname

root_dir = dirname(os.getcwd())
os.chdir(root_dir)

In [2]:
import pandas as pd

weather_df = pd.read_csv('data/weather_data.csv')

In [3]:
# Extract only precipMM and tempC, convert to weekly data, and discard week 53 of 2009
import pandas as pd

# Ensure date is datetime
df = weather_df.copy()
df['date'] = pd.to_datetime(df['date'])
df['week'] = df['date'].dt.isocalendar().week
df['year'] = df['date'].dt.year

# Select only precipMM and tempC
df = df[['date', 'year', 'week', 'precipMM', 'tempC']]

# Convert columns to numeric
for col in ['precipMM', 'tempC']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Group by year and week, aggregate: mean precipMM, mean tempC
weekly = df.groupby(['year', 'week']).agg({
    'precipMM': 'mean',
    'tempC': 'mean'
}).reset_index()

weekly.drop(weekly.tail(1).index, inplace=True)
weekly.to_csv('data/weather_weekly.csv', index=False)

In [4]:
weekly

Unnamed: 0,year,week,precipMM,tempC
0,2009,49,1.700000,25.571429
1,2009,50,4.385714,23.714286
2,2009,51,14.485714,24.285714
3,2009,52,9.957143,22.857143
4,2010,1,1.671429,26.000000
...,...,...,...,...
58,2011,3,3.214286,23.571429
59,2011,4,1.928571,23.428571
60,2011,5,1.585714,23.714286
61,2011,6,13.000000,21.857143


In [5]:
# api_key = "28b65fe3f0b8422683063940250605"

In [6]:
# import requests
# from datetime import datetime, timedelta

# # Helper to get start and end dates for each week in the range
# def get_week_ranges(start_date, end_date):
#     week_ranges = []
#     current = start_date
#     while current <= end_date:
#         week_start = current
#         week_end = week_start + timedelta(days=6)
#         if week_end > end_date:
#             week_end = end_date
#         week_ranges.append((week_start, week_end))
#         current = week_end + timedelta(days=1)
#     return week_ranges

# # Set up date range for week 49 of 2009 to week 7 of 2011
# start_date = datetime.strptime('2009-11-30', '%Y-%m-%d')  # Approximate week 49 start
# end_date = datetime.strptime('2011-02-20', '%Y-%m-%d')    # Approximate week 7 end
# week_ranges = get_week_ranges(start_date, end_date)

# api_key = "28b65fe3f0b8422683063940250605"
# base_url = "https://api.worldweatheronline.com/premium/v1/past-weather.ashx"

# results = []
# for week_start, week_end in week_ranges:
#     # API requires enddate to be in the same month and year as date
#     if week_start.month != week_end.month or week_start.year != week_end.year:
#         # Split the week if it crosses month boundary
#         split_end = datetime(week_start.year, week_start.month, 1) + timedelta(days=32)
#         split_end = split_end.replace(day=1) - timedelta(days=1)
#         week_ends = [split_end, week_end]
#         week_starts = [week_start, split_end + timedelta(days=1)]
#     else:
#         week_starts = [week_start]
#         week_ends = [week_end]
#     for ws, we in zip(week_starts, week_ends):
#         params = {
#             'q': 'Bello',
#             'date': ws.strftime('%Y-%m-%d'),
#             'enddate': we.strftime('%Y-%m-%d'),
#             'tp': '24',  # daily data
#             'format': 'json',
#             'key': api_key
#         }
#         response = requests.get(base_url, params=params)
#         if response.status_code == 200:
#             data = response.json()
#             for day in data['data']['weather']:
#                 results.append({
#                     'date': day['date'],
#                     'maxtempC': day['maxtempC'],
#                     'mintempC': day['mintempC'],
#                     'totalSnow_cm': day.get('totalSnow_cm'),
#                     'sunHour': day.get('sunHour'),
#                     'uvIndex': day.get('uvIndex'),
#                     'precipMM': day['hourly'][0]['precipMM'],
#                     'humidity': day['hourly'][0]['humidity'],
#                     'tempC': day['hourly'][0]['tempC'],
#                 })
#         else:
#             print(f"Failed for week {ws} to {we}")

# weather_df = pd.DataFrame(results)
# weather_df['date'] = pd.to_datetime(weather_df['date'])
# weather_df['week'] = weather_df['date'].dt.isocalendar().week
# weather_df['year'] = weather_df['date'].dt.year
# weather_df.to_csv('data/weather_data.csv', index=False)