In [1]:
import os
import pandas as pd
from os.path import dirname

root_dir = dirname(os.getcwd())
os.chdir(root_dir)

In [2]:
city = 'bello'
data_split = 'projection'

In [3]:
import openmeteo_requests

import pandas as pd
import requests_cache
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 6.3373,
	"longitude": -75.558,
	"start_date": "2006-08-07",
	"end_date": "2007-07-08",
	"daily": ["temperature_2m_mean", "rain_sum"],
	"timezone": "auto",
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates: {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation: {response.Elevation()} m asl")
print(f"Timezone: {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
daily_rain_sum = daily.Variables(1).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}

daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
daily_data["rain_sum"] = daily_rain_sum

daily_dataframe = pd.DataFrame(data = daily_data)

Coordinates: 6.362038612365723°N -75.4522705078125°E
Elevation: 1455.0 m asl
Timezone: b'America/Bogota'b'GMT-5'
Timezone difference to GMT+0: -18000s


In [4]:
daily_dataframe

Unnamed: 0,date,temperature_2m_mean,rain_sum
0,2006-08-07 05:00:00+00:00,20.772581,1.300000
1,2006-08-08 05:00:00+00:00,19.805918,3.700000
2,2006-08-09 05:00:00+00:00,19.793417,0.200000
3,2006-08-10 05:00:00+00:00,21.137165,0.500000
4,2006-08-11 05:00:00+00:00,19.603834,12.200001
...,...,...,...
331,2007-07-04 05:00:00+00:00,20.585081,0.000000
332,2007-07-05 05:00:00+00:00,20.308001,2.100000
333,2007-07-06 05:00:00+00:00,20.501749,1.400000
334,2007-07-07 05:00:00+00:00,19.503832,5.800000


In [5]:
temp_df = daily_dataframe.copy()
temp_df['date'] = pd.to_datetime(temp_df['date'])

temp_df = temp_df[['date', 'temperature_2m_mean', 'rain_sum']]

for col in ['rain_sum', 'temperature_2m_mean']:
    temp_df[col] = pd.to_numeric(temp_df[col], errors='coerce')

weekly_df = temp_df.groupby(pd.Grouper(key='date', freq='W')).mean().reset_index()
weekly_df.to_csv(f'data/{data_split}/{city}/weather_weekly.csv', index=False)

In [6]:
weekly_df

Unnamed: 0,date,temperature_2m_mean,rain_sum
0,2006-08-13 00:00:00+00:00,20.466034,2.557143
1,2006-08-20 00:00:00+00:00,19.756809,14.228572
2,2006-08-27 00:00:00+00:00,19.222286,10.242858
3,2006-09-03 00:00:00+00:00,19.199369,4.771429
4,2006-09-10 00:00:00+00:00,19.310678,8.3
5,2006-09-17 00:00:00+00:00,19.10919,8.028571
6,2006-09-24 00:00:00+00:00,19.444607,6.928572
7,2006-10-01 00:00:00+00:00,20.558889,2.257143
8,2006-10-08 00:00:00+00:00,20.206511,5.642857
9,2006-10-15 00:00:00+00:00,18.833593,17.52857
