Source api: [OPEN-METEO](https://open-meteo.com/)

In [1]:
import requests as req
import pandas as pd
import matplotlib.pyplot as plt
import requests_cache
from retry_requests import retry
import openmeteo_requests

In [2]:
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [3]:
weather_folder_path = './data2/weather'

In [4]:
years = [
         ('2019-01-01', '2019-12-31'),
         ('2020-01-01', '2020-12-31'),
         ('2021-01-01', '2021-12-31'),
         ('2022-01-01', '2022-12-31'),
         ('2023-01-01', '2023-12-31'),
         ('2024-01-01', '2024-09-30')
        ]

In [6]:
features = ['temperature_2m',
           'relative_humidity_2m',
           'pressure_msl',
           'surface_pressure',
           'precipitation',
           'rain',
           'cloud_cover',
           'cloud_cover_low',
           'cloud_cover_mid',
           'cloud_cover_high',
           'shortwave_radiation',
           'direct_radiation',
           'diffuse_radiation',
           'sunshine_duration',
           'wind_speed_10m',
           'wind_speed_100m',
           'wind_direction_10m',
           'wind_direction_100m',
           'wind_gusts_10m',]

In [7]:
bp = {'lat': 47.4925, 'long': 19.051389, 'name' : 'BP'}
konst = {'lat': 44.1811, 'long': 28.5476, 'name' : 'Konstanca'}

In [8]:
url = "https://archive-api.open-meteo.com/v1/archive"

In [9]:
def get_data(location, start_date, end_date, features):
    features_str = ','.join(features)
    features_str

    params = {
        "latitude": location.get('lat'),
        "longitude": location.get('long'),
        "start_date": start_date,
        "end_date": end_date,
        "hourly":  features_str
    }
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]
    hourly = response.Hourly()

    hourly_data = {"date": pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
        end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = hourly.Interval()),
        inclusive = "left"
        )}
    
    for f in features:
        hourly_data[f"{f}_{location.get('name')}"] = hourly.Variables(features.index(f)).ValuesAsNumpy()

    df = pd.DataFrame(hourly_data)
    return df


In [10]:
locations = [bp, konst]

In [11]:
for loc in locations:
    df_loc = pd.DataFrame()
    for year in years:
        df = get_data(loc, year[0], year[1], features)
        df_loc = pd.concat([df_loc, df])
    if loc == locations[0]:
        df_final = df_loc
    else:
        df_final = pd.merge(df_final, df_loc, on = 'date', how = 'outer')

In [12]:
# rename date to Datetime
df_final = df_final.rename(columns = {'date': 'Datetime'})

In [13]:
df_final

Unnamed: 0,Datetime,temperature_2m_BP,relative_humidity_2m_BP,pressure_msl_BP,surface_pressure_BP,precipitation_BP,rain_BP,cloud_cover_BP,cloud_cover_low_BP,cloud_cover_mid_BP,...,cloud_cover_high_Konstanca,shortwave_radiation_Konstanca,direct_radiation_Konstanca,diffuse_radiation_Konstanca,sunshine_duration_Konstanca,wind_speed_10m_Konstanca,wind_speed_100m_Konstanca,wind_direction_10m_Konstanca,wind_direction_100m_Konstanca,wind_gusts_10m_Konstanca
0,2019-01-01 00:00:00+00:00,-1.579,89.836761,1031.000000,1016.212463,0.0,0.0,17.100000,5.0,1.0,...,0.0,0.0,0.0,0.0,0.0,10.105681,20.326454,355.914459,22.932131,16.199999
1,2019-01-01 01:00:00+00:00,-1.579,89.836761,1031.099976,1016.310974,0.0,0.0,5.400000,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.212135,22.862123,5.527458,26.161512,19.440001
2,2019-01-01 02:00:00+00:00,-1.879,90.484474,1031.199951,1016.393494,0.0,0.0,7.800000,8.0,1.0,...,0.0,0.0,0.0,0.0,0.0,10.041354,21.370783,14.534496,32.619240,19.080000
3,2019-01-01 03:00:00+00:00,-1.779,89.820747,1030.699951,1015.906067,0.0,0.0,5.400000,6.0,0.0,...,0.0,0.0,0.0,0.0,0.0,7.342588,18.218275,11.309896,37.775745,16.919998
4,2019-01-01 04:00:00+00:00,-2.279,90.116875,1030.099976,1015.287659,0.0,0.0,29.700001,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,7.421590,15.021105,14.036275,45.970932,11.879999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50395,2024-09-30 19:00:00+00:00,9.521,66.184006,1020.799988,1006.729004,0.0,0.0,0.000000,0.0,0.0,...,88.0,0.0,0.0,0.0,0.0,28.771297,46.328239,305.579437,309.007477,50.039997
50396,2024-09-30 20:00:00+00:00,8.521,72.820656,1020.599976,1006.482178,0.0,0.0,0.000000,0.0,0.0,...,100.0,0.0,0.0,0.0,0.0,30.996084,49.531731,306.736908,310.578613,53.639996
50397,2024-09-30 21:00:00+00:00,7.871,78.544052,1020.200012,1006.055481,0.0,0.0,0.000000,0.0,0.0,...,100.0,0.0,0.0,0.0,0.0,32.497849,51.738022,309.155426,312.179749,56.160000
50398,2024-09-30 22:00:00+00:00,6.721,87.986679,1020.000000,1005.800720,0.0,0.0,0.000000,0.0,0.0,...,100.0,0.0,0.0,0.0,0.0,31.826931,50.800835,307.647675,310.689270,56.160000


In [14]:
df_final.to_csv(f'{weather_folder_path}/weather_{[loc.get("name") for loc in locations]}_{years[0][0],years[-1][-1]}.csv', index = False)