In [None]:
#!pip install -q torch
#!pip install -q darts
#!pip install -q torchmetrics

In [None]:
from torchmetrics.regression import SymmetricMeanAbsolutePercentageError
import torch
import numpy as np
import pandas as pd
from darts import TimeSeries

In [None]:
!pip install openmeteo-requests
!pip install requests-cache retry-requests numpy pandas


import openmeteo_requests

import requests_cache

from retry_requests import retry
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [None]:

def make_dataframe(openmeteo_response):

    hourly = openmeteo_response.Hourly()
    _start = pd.to_datetime(hourly.Time(), unit = "s", utc = True)
    _end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True)
    _delta = pd.Timedelta(seconds = hourly.Interval())

    data = {
        "date": pd.date_range(_start, _end, freq=_delta, inclusive="left"),
        "temperature_2m": hourly.Variables(0).ValuesAsNumpy(),
        "surface_pressure": hourly.Variables(1).ValuesAsNumpy(),
        "wind_speed_10m": hourly.Variables(2).ValuesAsNumpy(),
        "wind_direction_10m": hourly.Variables(3).ValuesAsNumpy(),
        "wind_gusts_10m": hourly.Variables(4).ValuesAsNumpy(),
    }

    return pd.DataFrame(data).set_index("date")

In [None]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": [47.36, 46, 47.26, 46.5],
	"longitude": [8.55, 8.95, 11.39, 11.35],
	"start_date": "2024-02-10",
	"end_date": "2024-03-15",
	"hourly": ["temperature_2m", "surface_pressure", "wind_speed_10m", "wind_direction_10m", "wind_gusts_10m"]
}
original_data = openmeteo.weather_api(url, params=params)
data = dict(zip(("Zurich", "Lugano", "Innsbruck", "Bolzano"), original_data))
data = pd.concat({key: make_dataframe(val) for key, val in data.items()})

city_codes = {"Zurich": 1, "Lugano": 2, "Innsbruck": 3, "Bolzano": 4}
data['city_code'] = data.index.get_level_values(0).map(city_codes)

In [None]:
original_data = dict(zip(("Zurich", "Lugano", "Innsbruck", "Bolzano"), original_data))
original_data = pd.concat({key: make_dataframe(val) for key, val in original_data.items()})

In [None]:
original_data.loc[('Zurich')]

In [None]:
correct = pd.DataFrame()

In [None]:
correct.index = original_data.loc[('Zurich')].index

In [None]:
correct['temperature_2m_zh'] = original_data.loc[('Zurich')].temperature_2m
correct['surface_pressure_zh'] = original_data.loc[('Zurich')].surface_pressure
correct['wind_speed_10m_zh'] = original_data.loc[('Zurich')].wind_speed_10m
correct['wind_direction_10m_zh'] = original_data.loc[('Zurich')].wind_direction_10m
correct['wind_gusts_10m_zh'] = original_data.loc[('Zurich')].wind_gusts_10m

correct['temperature_2m_lu'] = original_data.loc[('Lugano')].temperature_2m
correct['surface_pressure_lu'] = original_data.loc[('Lugano')].surface_pressure
correct['wind_speed_10m_lu'] = original_data.loc[('Lugano')].wind_speed_10m
correct['wind_direction_10m_lu'] = original_data.loc[('Lugano')].wind_direction_10m
correct['wind_gusts_10m_lu'] = original_data.loc[('Lugano')].wind_gusts_10m

correct['temperature_2m_in'] = original_data.loc[('Innsbruck')].temperature_2m
correct['surface_pressure_in'] = original_data.loc[('Innsbruck')].surface_pressure
correct['wind_speed_10m_in'] = original_data.loc[('Innsbruck')].wind_speed_10m
correct['wind_direction_10m_in'] = original_data.loc[('Innsbruck')].wind_direction_10m
correct['wind_gusts_10m_in'] = original_data.loc[('Innsbruck')].wind_gusts_10m

correct['temperature_2m_bo'] = original_data.loc[('Bolzano')].temperature_2m
correct['surface_pressure_bo'] = original_data.loc[('Bolzano')].surface_pressure
correct['wind_speed_10m_bo'] = original_data.loc[('Bolzano')].wind_speed_10m
correct['wind_direction_10m_bo'] = original_data.loc[('Bolzano')].wind_direction_10m
correct['wind_gusts_10m_bo'] = original_data.loc[('Bolzano')].wind_gusts_10m

In [None]:
correct

In [None]:
#Getting 'date' from the Multindex to be used in the future covariates

correct['dayofyear'] = correct.index.get_level_values('date').dayofyear

#sin and cos calculation

correct['day_sin'] = np.sin(2 * np.pi * correct['dayofyear'] / 365.0)
correct['day_cos'] = np.cos(2 * np.pi * correct['dayofyear'] / 365.0)

In [None]:
originalindex = correct.index

In [None]:
from darts.dataprocessing.transformers.scaler import Scaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
target_scaler = MinMaxScaler()
target_scaler.fit(correct[['surface_pressure_zh']])
scaled_data = pd.DataFrame(scaler.fit_transform(correct), columns=scaler.get_feature_names_out())
scaled_data.index = originalindex

In [None]:
scaled_data

In [None]:
target = scaled_data[['surface_pressure_zh']]
past_cov = scaled_data.drop(columns=['dayofyear', 'day_sin', 'day_cos'])
future_cov = scaled_data[['dayofyear', 'day_sin', 'day_cos']]

In [None]:
from darts import TimeSeries
from darts.models.forecasting.tft_model import TFTModel
from darts.metrics import mse
from darts.metrics import smape, mae
from torchmetrics.regression import MeanAbsoluteError

In [None]:
train_start = '2024-02-10 00:00:00+00:00'
train_end = '2024-03-03 23:00:00+00:00' #70% = 21 days
train_future_end ='2024-03-04 02:00:00+00:00'

val_start = '2024-03-04 00:00:00+00:00'
val_end = '2024-03-12 20:00:00+00:00'
val_future_end = '2024-03-12 23:00:00+00:00'

test_start = '2024-03-13 00:00:00+00:00'
test_end = '2024-03-15 23:00:00+00:00'

y_train = target.loc[train_start:train_end]
past_cov_train = past_cov.loc[train_start:train_end]
future_cov_train = future_cov.loc[train_start:train_future_end]

y_val = target.loc[val_start:val_end]
past_cov_val = past_cov.loc[val_start:val_end]
future_cov_val = future_cov.loc[val_start:val_future_end]

y_test = target.loc[test_start:test_end]

y_train_backtest = target.loc[train_start:val_end]
past_cov_train_backtest = past_cov.loc[train_start:val_end]
future_cov_train_backtest = future_cov.loc[train_start:val_future_end]


In [None]:
y_train_series = TimeSeries.from_dataframe(y_train)
past_cov_train_series = TimeSeries.from_dataframe(past_cov_train)
future_cov_train_series = TimeSeries.from_dataframe(future_cov_train)

y_val_series = TimeSeries.from_dataframe(y_val)
past_cov_val_series = TimeSeries.from_dataframe(past_cov_val)
future_cov_val_series = TimeSeries.from_dataframe(future_cov_val)

y_test_series = TimeSeries.from_dataframe(y_test)

y_train_backtest_series = TimeSeries.from_dataframe(y_train_backtest)
past_cov_train_backtest_series = TimeSeries.from_dataframe(past_cov_train_backtest)
future_cov_train_backtest_series = TimeSeries.from_dataframe(future_cov_train_backtest)

In [None]:
len(y_train_series)

In [None]:
# TFTModel:
input_chunk_length = 24
output_chunk_length = 3

In [None]:
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# stop training when validation loss does not decrease more than 0.05 (`min_delta`) over
# a period of 5 epochs (`patience`)
my_stopper = EarlyStopping(
    monitor="val_loss",
    patience=100,
    min_delta=0.0001,
    mode='min',
)

# use GPU
pl_trainer_kwargs={"callbacks": [my_stopper],
                   "accelerator": "gpu",
                   "devices": [0]}

# use CPU
#pl_trainer_kwargs={"callbacks": [my_stopper],
                   #"accelerator": "cpu"}

# Advanced tuning
tft = TFTModel(input_chunk_length =input_chunk_length ,
               output_chunk_length = output_chunk_length,
               pl_trainer_kwargs = pl_trainer_kwargs,
               lstm_layers=2,
               num_attention_heads=8,
               dropout=0.2,
               batch_size=16,
               hidden_size=64,
               torch_metrics=MeanAbsoluteError(),
               n_epochs=200,
               # add_encoders=add_encoders
               )

tft.fit(series=y_train_series,
        past_covariates = past_cov_train_series,
        future_covariates = future_cov_train_series,
        val_series=y_val_series,
        val_past_covariates=past_cov_val_series,
        val_future_covariates=future_cov_val_series)

In [None]:
# Advanced tuning
# tft = TFTModel(input_chunk_length =input_chunk_length ,
#                output_chunk_length = output_chunk_length,
#                pl_trainer_kwargs = pl_trainer_kwargs,
#                lstm_layers=2,
#                num_attention_heads=8,
#                dropout=0.2,
#                batch_size=16,
#                hidden_size=64,
#                torch_metrics=MeanAbsoluteError(),
#                n_epochs=1000,
#                # add_encoders=add_encoders
#                )

##tft = TFTModel(input_chunk_length =input_chunk_length ,
#               output_chunk_length = output_chunk_length,
#               pl_trainer_kwargs = pl_trainer_kwargs,
#               torch_metrics=MeanAbsoluteError(),
#               n_epochs=50
#               )

In [None]:
preds = tft.predict(n=output_chunk_length,
                   series=y_val_series,
                   past_covariates = past_cov_val_series,
                   future_covariates = future_cov_val_series)

In [None]:
preds.plot(label='prediction_pressure')
y_test_series[:output_chunk_length].plot()

In [None]:
historical_fcast_tft = tft.historical_forecasts(
        series=y_train_backtest_series,
        past_covariates=past_cov_train_backtest_series,
        future_covariates=future_cov_train_backtest_series,
        start=0,
        forecast_horizon=7,
        verbose=False,
        retrain=False
)

In [None]:
historical_fcast_tft.duration,historical_fcast_tft.start_time(), historical_fcast_tft.end_time()

In [None]:
y_train_backtest_series.duration, y_train_backtest_series.start_time(), y_train_backtest_series.end_time()

In [None]:
y_train_backtest_series.plot(label="data")
historical_fcast_tft.plot(low_quantile=0.01, high_quantile=0.99,label="backtest ahead forecast (TFTModel)")
print("SMAPE = {:.2f}%".format(smape(target_scaler.inverse_transform(historical_fcast_tft),target_scaler.inverse_transform(y_train_backtest_series))))
print("MAE = {:.2f}".format(mae(historical_fcast_tft, y_train_backtest_series)))

In [None]:
tft.save('tft_v1_1monthdata_3h')