In [1]:
from itertools import islice

from matplotlib import pyplot as plt
import matplotlib.dates as mdates
from tqdm import tqdm

import torch
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.dataset.repository.datasets import get_dataset, dataset_names

from lag_llama.gluon.estimator import LagLlamaEstimator

In [2]:
torch.set_float32_matmul_precision('medium')

# Load Data

In [3]:
print(f"Available datasets: {dataset_names}")

Available datasets: ['constant', 'exchange_rate', 'solar-energy', 'electricity', 'traffic', 'exchange_rate_nips', 'electricity_nips', 'traffic_nips', 'solar_nips', 'wiki2000_nips', 'wiki-rolling_nips', 'taxi_30min', 'kaggle_web_traffic_with_missing', 'kaggle_web_traffic_without_missing', 'kaggle_web_traffic_weekly', 'm1_yearly', 'm1_quarterly', 'm1_monthly', 'nn5_daily_with_missing', 'nn5_daily_without_missing', 'nn5_weekly', 'tourism_monthly', 'tourism_quarterly', 'tourism_yearly', 'cif_2016', 'london_smart_meters_without_missing', 'wind_farms_without_missing', 'car_parts_without_missing', 'dominick', 'fred_md', 'pedestrian_counts', 'hospital', 'covid_deaths', 'kdd_cup_2018_without_missing', 'weather', 'm3_monthly', 'm3_quarterly', 'm3_yearly', 'm3_other', 'm4_hourly', 'm4_daily', 'm4_weekly', 'm4_monthly', 'm4_quarterly', 'm4_yearly', 'm5', 'uber_tlc_daily', 'uber_tlc_hourly', 'airpassengers', 'australian_electricity_demand', 'electricity_hourly', 'electricity_weekly', 'rideshare_wit

In [4]:
# ETT, Beijing Multisite, UCI, Huawei cloud datasets missing
pretraining_datasets = ["traffic", "uber_tlc_hourly", "australian_electricity_demand", "electricity_hourly", "london_smart_meters_without_missing", "solar-energy", "wind_farms_without_missing", "kdd_cup_2018_without_missing", "sunspot_without_missing"]
datasets = [get_dataset(name) for name in pretraining_datasets]

Download electricity_hourly_dataset.zip:: 11.3MB [00:03, 3.34MB/s]
creating json files: 100%|██████████| 321/321 [00:00<?, ?it/s]
Download london_smart_meters_dataset_without_missing_values.zip:: 209MB [00:40, 5.38MB/s]                            
creating json files: 100%|██████████| 5560/5560 [00:00<00:00, 50042.88it/s]
Download wind_farms_minutely_dataset_without_missing_values.zip:: 68.1MB [00:14, 4.85MB/s]                            
creating json files: 100%|██████████| 339/339 [00:00<?, ?it/s]
Download kdd_cup_2018_dataset_without_missing_values.zip:: 2.32MB [00:00, 3.04MB/s]
creating json files: 100%|██████████| 270/270 [00:00<00:00, 268929.49it/s]
Download sunspot_dataset_without_missing_values.zip:: 72.0kB [00:00, 109kB/s]
creating json files: 100%|██████████| 1/1 [00:00<?, ?it/s]


In [5]:
combined_ds = [ts for ds in datasets for ts in iter(ds.train)]

# Pre-Training

In [6]:
device = "cuda"
nonnegative_pred_samples = True
batch_size = 256 # from paper
num_samples = 100 # from paper

In [7]:
prediction_length = 24
context_length = 24 * 3

In [8]:
estimator = LagLlamaEstimator(
    prediction_length=prediction_length,
    context_length=context_length,

    # estimator args
    input_size=1, # ???
    n_layer=8, # from paper
    n_embd_per_head=16, # from paper
    n_head=9, # from paper
    scaling="robust", # robust standardization
    time_feat=True, # use time features

    nonnegative_pred_samples=nonnegative_pred_samples,
    aug_prob=0.5, # from paper
    lr=1e-4, # from paper

    # linear positional encoding scaling
    rope_scaling={
        "type": "linear",
        "factor": max(1.0, (context_length + prediction_length) / 32), # 32 context length from paper
    },

    batch_size=batch_size,
    num_parallel_samples=num_samples,
    trainer_kwargs = {"max_epochs": 50,}, # <- lightning trainer arguments
)

lightning_module = estimator.create_lightning_module()
transformation = estimator.create_transformation()
predictor = estimator.create_predictor(transformation, lightning_module)

In [9]:
predictor = estimator.train(combined_ds, cache_data=True, shuffle_buffer_length=1000)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
c:\Users\jakob\anaconda3\envs\lag-llama\Lib\site-packages\lightning\pytorch\trainer\connectors\logger_connector\logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
c:\Users\jakob\anaconda3\envs\lag-llama\Lib\site-packages\lightning\pytorch\trainer\configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-------------------

Training: |          | 0/? [00:00<?, ?it/s]

  y = F.scaled_dot_product_attention(
Epoch 0, global step 50: 'train_loss' reached 1.26134 (best 1.26134), saving model to 'g:\\Meine Ablage\\Master\\Semester 2\\Advances in Deep Learning\\lag-llama\\lightning_logs\\version_3\\checkpoints\\epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached -1.07671 (best -1.07671), saving model to 'g:\\Meine Ablage\\Master\\Semester 2\\Advances in Deep Learning\\lag-llama\\lightning_logs\\version_3\\checkpoints\\epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached -2.21497 (best -2.21497), saving model to 'g:\\Meine Ablage\\Master\\Semester 2\\Advances in Deep Learning\\lag-llama\\lightning_logs\\version_3\\checkpoints\\epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached -5.08599 (best -5.08599), saving model to 'g:\\Meine Ablage\\Master\\Semester 2\\Advances in Deep Learning\\lag-llama\\lightning_logs\\version_3\\checkpoints\\epoch=3-step=200.ckpt' as top 1
Epoch 4, globa