In [1]:
import time
import sys
import os
import torch
import json
import pandas as pd
import numpy as np

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.torch.distributions import StudentTOutput
from gluonts.torch.distributions import NormalOutput
from sklearn.metrics import mean_absolute_error, median_absolute_error, mean_squared_error


In [2]:
freq = "D"
prediction_length = 30

# Import dataset
train_data = pd.read_csv("../datasets/exchange/train.csv")
val_data = pd.read_csv("../datasets/exchange/val.csv")
test_data = pd.read_csv("../datasets/exchange/test.csv")

dates = pd.date_range(start="1970-01-01",periods = len(train_data) + len(val_data) + len(test_data), freq = freq)

train_data.index = dates[:len(train_data)]
val_data.index = dates[len(train_data):len(train_data) + len(val_data)]
test_data.index = dates[len(train_data) + len(val_data):]

# Normalize the signal power of each column
stds = train_data.std()
train_data /= stds
val_data /= stds
test_data /= stds

# Get training, validation and test dataset
train_flat = train_data.stack().reset_index()
train_flat.columns = ["date", "series", "value"]
train_dataset = PandasDataset.from_long_dataframe(train_flat, target="value",item_id="series",timestamp="date",freq=freq)

val_flat = val_data.stack().reset_index()
val_flat.columns = ["date", "series", "value"]
val_dataset = PandasDataset.from_long_dataframe(val_flat, target="value",item_id="series",timestamp="date",freq=freq)
val_dataset_14 = [PandasDataset.from_long_dataframe(val_flat.iloc[:-prediction_length*i*train_data.shape[1]] if i != 0 else val_flat, target="value",item_id="series",timestamp="date",freq=freq) for i in range(14)]

test_flat = val_data.stack().reset_index()
test_flat.columns = ["date", "series", "value"]
test_dataset_14 = [PandasDataset.from_long_dataframe(test_flat.iloc[:-prediction_length*i*train_data.shape[1]] if i != 0 else val_flat, target="value",item_id="series",timestamp="date",freq=freq) for i in range(14)]

In [3]:
# Train the model and make predictions
model = DeepAREstimator(
    prediction_length = prediction_length, 
    freq=freq,
    context_length = 1*prediction_length,
    num_layers = 1,
    hidden_size = 30,
    lr = 1e-4,
    dropout_rate = 0.01,
    distr_output = NormalOutput(),
    trainer_kwargs={"max_epochs": 1}
)

In [4]:
predictor = model.train(training_data=train_dataset,validation_data=val_dataset,num_workers=3)

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name  | Type        | Params | In sizes | Out sizes   
----------------------------------------------------------------
0 | model | DeepARModel | 8.3 K  | ?        | [1, 100, 30]
----------------------------------------------------------------
8.3 K     Trainable params
0         Non-trainable params
8.3 K     Total params
0.033     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 0, global step 50: 'val_loss' reached 2.54597 (best 2.54597), saving model to '/rds/general/user/ejh19/home/Final-Year-Project/deepar/lightning_logs/version_11/checkpoints/epoch=0-step=50.ckpt' as top 1
`Trainer.fit` stopped: `max_epochs=1` reached.


In [9]:
# Validation test
samples = []
realisations = []

start = time.time()
for dataset in test_dataset_14:
    
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset,
        predictor=predictor,
        num_samples=128
    )

    samples.append(list(forecast_it))
    realisations.append(list(ts_it))
print(time.time() - start)

0.7281005382537842


In [5]:
pred = list(model.predict(new_dataset))


In [6]:
pred[1].samples.shape

(100, 12)

In [7]:
pred[1]

gluonts.model.forecast.SampleForecast(info=None, item_id='1', samples=array([[3.9303932, 3.2164402, 3.6159856, ..., 4.2684064, 5.360932 ,
        5.1077003],
       [3.277662 , 3.2323525, 4.0443892, ..., 3.632226 , 5.9036365,
        5.899686 ],
       [3.9359155, 3.381524 , 3.0987148, ..., 4.453673 , 4.8194013,
        4.958897 ],
       ...,
       [3.78797  , 3.481149 , 3.2541785, ..., 5.219766 , 6.309618 ,
        4.4366674],
       [3.6072724, 3.0258396, 3.3209856, ..., 4.6233687, 5.066903 ,
        5.343836 ],
       [3.0499535, 2.822828 , 2.7089305, ..., 4.4145417, 4.441472 ,
        5.085745 ]], dtype=float32), start_date=Period('2014-05-02 00:00', 'H'))

In [24]:
forecast_it, ts_it = make_evaluation_predictions(
    dataset=new_dataset,
    predictor=model,
    num_samples=128
)


In [25]:
samples = list(forecast_it)
realisations = list(ts_it)

In [26]:
realisations[0]

Unnamed: 0,0
2012-01-01 01:00,0.597613
2012-01-01 02:00,0.996021
2012-01-01 03:00,0.597613
2012-01-01 04:00,0.796817
2012-01-01 05:00,0.597613
...,...
2014-05-01 19:00,0.398408
2014-05-01 20:00,0.398408
2014-05-01 21:00,0.597613
2014-05-01 22:00,0.796817


In [29]:
samples[0].samples.shape

(128, 12)

In [30]:
samples[0]

gluonts.model.forecast.SampleForecast(info=None, item_id='0', samples=array([[0.14920929, 0.19042899, 0.28593367, ..., 0.18095239, 0.2066782 ,
        0.29034165],
       [0.12254633, 0.14677063, 0.27427086, ..., 0.14345118, 0.21614675,
        0.36654797],
       [0.1085371 , 0.04453568, 0.22900346, ..., 0.19700179, 0.18823113,
        0.20362692],
       ...,
       [0.18475005, 0.15086094, 0.30809957, ..., 0.22552243, 0.29208207,
        0.25332835],
       [0.1783875 , 0.19683847, 0.15335591, ..., 0.181498  , 0.19806147,
        0.4102625 ],
       [0.17266878, 0.14753357, 0.287405  , ..., 0.12129243, 0.24648437,
        0.2652388 ]], dtype=float32), start_date=Period('2014-05-01 12:00', 'H'))

In [13]:
len(samples)

319

In [14]:
len(realisations)

319

In [20]:
 # Import dataset
train_data = pd.read_csv("../datasets/electricity/train.csv", index_col="date")
val_data = pd.read_csv("../datasets/electricity/val.csv", index_col="date")

# Normalize the signal power of each column
stds = train_data.std()
train_data /= stds
val_data /= stds

# Convert data into a glounts ListDataset
def get_dataset(df: pd.DataFrame, freq: str = "h", indices = [-1]) -> ListDataset:
    return ListDataset(
        [
            {
                "start": df.index[0],
                "target": df.values[:i].T,
            }
            for i in indices
        ],
        freq=freq,
        one_dim_target=False
    )

train_dataset = get_dataset(train_data)
val_dataset_14 = get_dataset(val_data, indices=list(range(val_data.shape[0], val_data.shape[0] - 14*prediction_length, -prediction_length)))


In [21]:
forecast_it, ts_it = make_evaluation_predictions(
    dataset=val_dataset_14,
    predictor=model,
    num_samples=128
)

In [22]:
samples = list(forecast_it)
realisations = list(ts_it)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/rds/general/user/ejh19/home/Final-Year-Project/deepar/.venv/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/tmp/pbs.7723944.pbs/ipykernel_614275/579783497.py", line 1, in <module>
    samples = list(forecast_it)
  File "/rds/general/user/ejh19/home/Final-Year-Project/deepar/.venv/lib/python3.10/site-packages/gluonts/torch/model/predictor.py", line 85, in predict
  File "/rds/general/user/ejh19/home/Final-Year-Project/deepar/.venv/lib/python3.10/site-packages/gluonts/model/forecast_generator.py", line 154, in __call__
    for batch in inference_data_loader:
  File "/rds/general/user/ejh19/home/Final-Year-Project/deepar/.venv/lib/python3.10/site-packages/gluonts/transform/_base.py", line 111, in __iter__
    yield from self.transformation(
  File "/rds/general/user/ejh19/home/Final-Year-Project/deepar/.venv/lib/python3.10/site-packages/gluo

In [23]:
val_dataset_14

[{'start': Period('2014-05-02 00:00', 'H'),
  'target': array([[0.39840835, 0.59761256, 0.59761256, ..., 2.191246  , 2.191246  ,
          2.191246  ],
         [3.7999337 , 3.5827944 , 3.1485164 , ..., 5.319907  , 5.2113376 ,
          5.862755  ],
         [0.12322228, 0.12322228, 0.12322228, ..., 0.18483342, 0.18483342,
          0.12322228],
         ...,
         [5.310667  , 5.2021627 , 4.478803  , ..., 3.5384352 , 3.5203512 ,
          2.8030193 ],
         [2.3278568 , 2.3784626 , 1.1892313 , ..., 0.7084782 , 0.6831754 ,
          0.7084782 ],
         [4.972996  , 4.825492  , 5.0362124 , ..., 5.6683726 , 6.158297  ,
          6.5007176 ]], dtype=float32)},
 {'start': Period('2014-05-02 00:00', 'H'),
  'target': array([[0.39840835, 0.59761256, 0.59761256, ..., 2.3904502 , 2.3904502 ,
          5.1793084 ],
         [3.7999337 , 3.5827944 , 3.1485164 , ..., 5.2113376 , 5.319907  ,
          5.862755  ],
         [0.12322228, 0.12322228, 0.12322228, ..., 0.06161114, 0.12322228,
 