In [1]:
import time
import sys
import os
import torch
import json
import pandas as pd
import numpy as np

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.torch.distributions import StudentTOutput
from gluonts.torch.distributions import NormalOutput
from sklearn.metrics import mean_absolute_error, median_absolute_error, mean_squared_error


In [2]:
freq = "D"
prediction_length = 30

# Import dataset
train_data = pd.read_csv("../datasets/exchange/train.csv")
val_data = pd.read_csv("../datasets/exchange/val.csv")
test_data = pd.read_csv("../datasets/exchange/test.csv")

dates = pd.date_range(start="1970-01-01",periods = len(train_data) + len(val_data) + len(test_data), freq = freq)

train_data.index = dates[:len(train_data)]
val_data.index = dates[len(train_data):len(train_data) + len(val_data)]
test_data.index = dates[len(train_data) + len(val_data):]

# Normalize the signal power of each column
stds = train_data.std()
train_data /= stds
val_data /= stds
test_data /= stds

# Get training, validation and test dataset
train_flat = train_data.stack().reset_index()
train_flat.columns = ["date", "series", "value"]
train_dataset = PandasDataset.from_long_dataframe(train_flat, target="value",item_id="series",timestamp="date",freq=freq)

val_flat = val_data.stack().reset_index()
val_flat.columns = ["date", "series", "value"]
val_dataset = PandasDataset.from_long_dataframe(val_flat, target="value",item_id="series",timestamp="date",freq=freq)
val_dataset_14 = [PandasDataset.from_long_dataframe(val_flat.iloc[:-prediction_length*i*train_data.shape[1]] if i != 0 else val_flat, target="value",item_id="series",timestamp="date",freq=freq) for i in range(14)]

test_flat = val_data.stack().reset_index()
test_flat.columns = ["date", "series", "value"]
test_dataset_14 = [PandasDataset.from_long_dataframe(test_flat.iloc[:-prediction_length*i*train_data.shape[1]] if i != 0 else val_flat, target="value",item_id="series",timestamp="date",freq=freq) for i in range(14)]

In [3]:
# Train the model and make predictions
model = DeepAREstimator(
    prediction_length = prediction_length, 
    freq=freq,
    context_length = 1*prediction_length,
    num_layers = 1,
    hidden_size = 30,
    lr = 1e-4,
    dropout_rate = 0.01,
    distr_output = NormalOutput(),
    trainer_kwargs={"max_epochs": 1}
)

In [4]:
predictor = model.train(training_data=train_dataset,validation_data=val_dataset,num_workers=3)

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name  | Type        | Params | In sizes | Out sizes   
----------------------------------------------------------------
0 | model | DeepARModel | 8.3 K  | ?        | [1, 100, 30]
----------------------------------------------------------------
8.3 K     Trainable params
0         Non-trainable params
8.3 K     Total params
0.033     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 0, global step 50: 'val_loss' reached 2.54597 (best 2.54597), saving model to '/rds/general/user/ejh19/home/Final-Year-Project/deepar/lightning_logs/version_11/checkpoints/epoch=0-step=50.ckpt' as top 1
`Trainer.fit` stopped: `max_epochs=1` reached.


In [9]:
# Validation test
samples = []
realisations = []

start = time.time()
for dataset in test_dataset_14:
    
    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset,
        predictor=predictor,
        num_samples=128
    )

    samples.append(list(forecast_it))
    realisations.append(list(ts_it))
print(time.time() - start)

0.7281005382537842
