# Machine Learning - Pytorch

# Imports


In [1]:
!pip uninstall matplotlib --yes
!pip install matplotlib==3.1.3
!pip install darts
!pip install pyyaml==5.4.0
!pip install xlrd==1.2.0

Found existing installation: matplotlib 3.2.2
Uninstalling matplotlib-3.2.2:
  Successfully uninstalled matplotlib-3.2.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting matplotlib==3.1.3
  Using cached matplotlib-3.1.3-cp37-cp37m-manylinux1_x86_64.whl (13.1 MB)
Installing collected packages: matplotlib
Successfully installed matplotlib-3.1.3


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting darts
  Using cached darts-0.21.0-py3-none-any.whl (424 kB)
Collecting catboost>=1.0.6
  Downloading catboost-1.0.6-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 84 kB/s 
[?25hCollecting matplotlib>=3.3.0
  Using cached matplotlib-3.5.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.2 MB)
Collecting pytorch-lightning>=1.5.0
  Downloading pytorch_lightning-1.7.6-py3-none-any.whl (707 kB)
[K     |████████████████████████████████| 707 kB 50.8 MB/s 
Collecting statsmodels>=0.13.0
  Using cached statsmodels-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
Collecting nfoursid>=1.0.0
  Downloading nfoursid-1.0.1-py3-none-any.whl (16 kB)
Collecting statsforecast==0.6.0
  Using cached statsforecast-0.6.0-py3-none-any.whl (44 kB)
Collecting tbats>=1.1.0
  Using cached tbats-1.1.0-py3-none-any.whl (43 kB)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyyaml==5.4.0
  Using cached PyYAML-5.4-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
Installing collected packages: pyyaml
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 6.0
    Uninstalling PyYAML-6.0:
      Successfully uninstalled PyYAML-6.0
Successfully installed pyyaml-5.4
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import pandas as pd
import os, sys
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import shutil
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
import plotly.express as px


from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler 
from sklearn.preprocessing import MaxAbsScaler
from darts.models import NBEATSModel, RNNModel, TFTModel, BlockRNNModel
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from darts.metrics import mape, rmse, mae
from darts.utils.statistics import check_seasonality, plot_acf
from darts.datasets import AirPassengersDataset, SunspotsDataset

import warnings

warnings.filterwarnings("ignore")
import logging

logging.disable(logging.CRITICAL)


# Functions

## Features

In [3]:
def cumsum_per_day(df, col_name):

    """Calculates a cumsum features of a timeseires for column col_name per day. Index needs to be DateTimeIndex"""
    dfs = []
    for _, group in df.groupby(df.index.date):
        group["cumsum"] = group[[col_name]].cumsum()
        dfs.append(group)

    df_ = pd.concat(dfs, axis=0)[["cumsum"]]


    return df_








## Evaluation 

In [4]:
def eval_backtest(backtest_series, actual_series, horizon, transformer):

    unscaled_pred = transformer.inverse_transform(backtest_series)
    unscaled_gt = transformer.inverse_transform(actual_series)

    plt.figure(figsize = (40, 6))
    unscaled_pred.plot(label="pred")
    unscaled_gt.plot(label = "ground truth")
    plt.legend()
    plt.title(f"Backtest, starting: {horizon}-hour horizon")
    print(
        "MAPE: {:.2f}%".format(
            mape(
                unscaled_gt,
                unscaled_pred,
            )
        )
    )
    
    print(
        "RMSE: {:.2f}".format(
            rmse(
                unscaled_gt,
                unscaled_pred,
            )
        )
    )

#  Data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
df = pd.read_csv("/content/drive/MyDrive/PhD_GDrive/household_load_forecasting/data/load_data_15min_watts.csv", index_col = 0, parse_dates = True)[["SFH42"]]

In [10]:
df

Unnamed: 0_level_0,SFH42
index,Unnamed: 1_level_1
2019-01-24 23:15:00,149.466667
2019-01-24 23:30:00,149.466667
2019-01-24 23:45:00,149.466667
2019-01-25 00:00:00,149.466667
2019-01-25 00:15:00,149.466667
...,...
2019-11-13 11:00:00,117.933333
2019-11-13 11:15:00,179.666667
2019-11-13 11:30:00,437.400000
2019-11-13 11:45:00,244.400000


In [11]:
series = TimeSeries.from_dataframe(df).astype("float32")


### Train-Val-Test Split



In [12]:
val_end = pd.Timestamp('2019-10-01 00:00:00')
train_and_dev_series, test = series.split_after(val_end)

train_end = pd.Timestamp('2019-09-01 00:00:00')
train, val = train_and_dev_series.split_after(train_end)

### Scaling the Timeseries

In [13]:
min_max_scaler = MinMaxScaler(feature_range=(0, 1))
scaler = Scaler(min_max_scaler)

train_transformed = scaler.fit_transform(train)
val_transformed = scaler.transform(val)
test_transformed = scaler.transform(test)


In [None]:
n_lags = 48 
n_ahead = 24

encoders = {
    'cyclic': {'future': ['weekday', 'hour']},
}




## Training-Parameters

In [14]:
device = 'gpu' if torch.cuda.is_available() else 'cpu'

print(device)

pl_trainer = {
      "accelerator": device,
      "gpus": [0]
      
    }


n_epochs = 100 #die Epochen (iterationen) durch das Trainings-Set

lr = {'lr': 1e-3} #Die Lernrate für SGD, kann man auch einen Scheduler verwenden

gpu


## Modelling : Temporal Fusion Transformer

The following configurations will be tried:


### Loss Function:

* MSE
* DTW
* LeGuenn

### Features:

* Energy cumsum
* Datetime attributes
* (Seasonal lags) - Selber einführen !


Also use a grid search for:






In [None]:
past_cov_series = TimeSeries.from_dataframe(cumsum_per_day(df_1, "y_1")).astype("float32")

train_and_dev_cov_series, test_cov = past_cov_series.split_after(val_end)
train_cov, val_cov = train_and_dev_cov_series.split_after(train_end)


min_max_scaler_cov = MinMaxScaler(feature_range=(0, 1))
scaler_cov = Scaler(min_max_scaler_cov)

cov_train_transformed = scaler_cov.fit_transform(train_cov)
cov_val_transformed = scaler_cov.transform(val_cov)
cov_test_transformed = scaler_cov.transform(test_cov)


In [None]:
my_model_3 = TFTModel(
    input_chunk_length=n_lags,
    output_chunk_length=n_ahead,
    hidden_size=16,
    lstm_layers=1,
    num_attention_heads=6,
    dropout=0.1,
    batch_size=1024,
    add_relative_index=False,
    add_encoders=encoders,
    likelihood=None,
    random_state=42,
    optimizer_kwargs = {'lr': 1e-3},
    pl_trainer_kwargs={
      "accelerator": "gpu",
      "gpus": [0]
    }
)


In [None]:
my_model_3.fit(series = train_transformed, past_covariates=cov_train_transformed, val_series = val_transformed, val_past_covariates = cov_val_transformed, num_loader_workers=4, epochs=n_epochs)

In [None]:
backtest_series_3 = my_model_3.historical_forecasts(
    test_transformed[:(24*30)+1],
    forecast_horizon=n_ahead,
    past_covariates=cov_test_transformed,
    retrain=False,
    verbose=False,
)

In [None]:
eval_backtest(backtest_series_3, test_transformed[val_transformed.start_time():pd.Timestamp("2021-11-01")], "24", scaler)