In [1]:
import os
from os.path import dirname

root_dir = dirname(os.getcwd())
os.chdir(root_dir)

In [2]:
import yaml
import copy
import requests_cache
import openmeteo_requests
import pandas as pd
from retry_requests import retry

In [3]:
with open('data/configs/weather_data_params.yaml', 'r') as file:
    location_params = yaml.safe_load(file)

In [4]:
location_list = list(location_params['location'].keys())
data_split = list(location_params['data_split'].keys())

In [5]:
for location in location_list:
    print('-' * 80)
    latitude = location_params['location'][location]['latitude']
    longitude = location_params['location'][location]['longitude']

    for split in data_split:
        print('-' * 80)
        start_date = location_params['data_split'][split]['start_date'][location]
        end_date = location_params['data_split'][split]['end_date'][location]

        cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
        retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
        openmeteo = openmeteo_requests.Client(session=retry_session)

        url = 'https://archive-api.open-meteo.com/v1/archive'
        params = {
            'latitude': latitude,
            'longitude': longitude,
            'start_date': start_date,
            'end_date': end_date,
            'daily': ['temperature_2m_mean', 'rain_sum'],
            'timezone': 'auto',
        }
        responses = openmeteo.weather_api(url, params=params)
        response = responses[0]
        print(f'Coordinates: {response.Latitude()}°N {response.Longitude()}°E')
        print(f'Elevation: {response.Elevation()} m asl')
        print(
            f'Timezone: {response.Timezone()}{response.TimezoneAbbreviation()}')
        print(f'Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s')

        daily = response.Daily()
        daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
        daily_rain_sum = daily.Variables(1).ValuesAsNumpy()

        daily_data = {'date': pd.date_range(
            start=pd.to_datetime(daily.Time(), unit='s', utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit='s', utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive='left'
        )}

        daily_data['temperature_2m_mean'] = daily_temperature_2m_mean
        daily_data['rain_sum'] = daily_rain_sum
        daily_dataframe = pd.DataFrame(data=daily_data)
        print(f'Number of days retrieved: {len(daily_dataframe)}')
        daily_dataframe.to_csv(
            f'data/{split}/{location}/weather_daily.csv',
            index=False,
        )

        temp_df = copy.deepcopy(daily_dataframe)
        temp_df = temp_df[['temperature_2m_mean', 'rain_sum']]

        for col in ['rain_sum', 'temperature_2m_mean']:
            temp_df[col] = pd.to_numeric(temp_df[col], errors='coerce')

        weekly_df = temp_df.groupby(
            temp_df.index // 7).mean().reset_index(drop=True)
        print(f'Number of weeks aggregated: {len(weekly_df)}')
        weekly_df.to_csv(
            f'data/{split}/{location}/weather_weekly.csv',
            index=False,
        )

--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Coordinates: 6.362038612365723°N -75.67840576171875°E
Elevation: 1518.0 m asl
Timezone: b'America/Bogota'b'GMT-5'
Timezone difference to GMT+0: -18000s
Number of days retrieved: 336
Number of weeks aggregated: 48
--------------------------------------------------------------------------------
Coordinates: 6.362038612365723°N -75.67840576171875°E
Elevation: 1518.0 m asl
Timezone: b'America/Bogota'b'GMT-5'
Timezone difference to GMT+0: -18000s
Number of days retrieved: 336
Number of weeks aggregated: 48
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Coordinates: -3.760984182357788°N -73.25753784179688°E
Elevation: 94.0 m asl
Timezone: b'America/Lima'b'GMT-5'
Timezone difference to GMT+0: -18000s
Number of days retri