In [1]:
!pip install pytorch_lightning --user
!pip install pytorch_forecasting==0.8.2 --user



In [2]:
import os
import warnings
warnings.filterwarnings("ignore")
from pathlib import Path
import warnings
import datetime
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch
import pickle
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
!curl https://raw.githubusercontent.com/BlonskiP/Warcraft_price_prediction/main/Token-prices.csv -o Token-prices.csv
pl.seed_everything(666)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  2 22.1M    2  508k    0     0  2208k      0  0:00:10 --:--:--  0:00:10 2208k
 41 22.1M   41 9452k    0     0  7690k      0  0:00:02  0:00:01  0:00:01 7684k
 76 22.1M   76 16.8M    0     0  7732k      0  0:00:02  0:00:02 --:--:-- 7732k
 92 22.1M   92 20.4M    0     0  6482k      0  0:00:03  0:00:03 --:--:-- 6482k
100 22.1M  100 22.1M    0     0  5645k      0  0:00:04  0:00:04 --:--:-- 5645k


666

#Data and TimeSeriesDataSet

In [3]:
def between_week(date,dates):
  date=pd.to_datetime(date)
  for start_date in dates:
    start_date = pd.to_datetime(start_date)
    week = datetime.timedelta(weeks=1)
    end_date = pd.to_datetime(start_date)+week
    if date >= pd.to_datetime(start_date) and date <= end_date:
      return 1 
  return 0

In [4]:
expansion_realise_dates=['2014-11-13','2016-08-30','2018-08-13','2020-08-26']

In [5]:
df = pd.read_csv('Token-prices.csv')
data=pd.DataFrame({'price':df['current_price'],
                   'time_idx':df.index,
                   'date':df.date,
                   'day_of_week':df.day,
                   'hour':df.hour,
                   'change':df.last_change,
                   'month':pd.to_datetime(df.date).dt.month,
                   'year':pd.to_datetime(df.date).dt.year,
                   'group':0,
                   'avg_7':df['7_day_mean'],
                   'avg_30':df['30_day_mean'],
                   'expansion_week':df.date.apply(lambda x: between_week(x,expansion_realise_dates))})


In [6]:
data.day_of_week=data.day_of_week.astype('category')
data.hour=data.hour.astype('category').apply(str)
data.group=data.group.astype('category').apply(str)
data.day_of_week=data.day_of_week.astype('category').apply(str)
data.month=data.month.astype('category').apply(str)
data.year=data.year.astype('category').apply(str)
data.expansion_week=data.expansion_week.astype('category').apply(str)
type(data.hour[0])

str

In [7]:
max_prediction_length = 3*24 #1 day of samples
max_encoder_length = 500
training_cutoff = data["time_idx"].max() - max_prediction_length

In [8]:
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="price",
    group_ids=["group"],
    min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["group"],
    static_reals=[],
    time_varying_known_categoricals=["day_of_week", "hour","expansion_week",'month','year'],
    variable_groups={},  # group of categorical variables can be treated as one variable
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[
        "price",
        "change",
        'avg_7',
        'avg_30'
    ],
    target_normalizer=GroupNormalizer(
        groups=["group"], transformation="softplus"
    ),  # use softplus and normalize by group
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

In [9]:
validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True)

In [10]:
batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

#Baseline model

In [11]:
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])
baseline_predictions = Baseline().predict(val_dataloader)
(actuals - baseline_predictions).abs().mean().item()


3811.02783203125

In [12]:
trainer = pl.Trainer(
    gpus=0, # clipping gradients is a hyperparameter and important to prevent divergance # of the gradient for recurrent neural networks
    gradient_clip_val=0.1,
)

GPU available: False, used: False
TPU available: None, using: 0 TPU cores


In [13]:
tft = TemporalFusionTransformer.from_dataset(
    training,# not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.03,
    hidden_size=16,  # most important hyperparameter apart from learning rate   
    attention_head_size=1, # number of attention heads. Set to up to 4 for large datasets
    dropout=0.1,  # between 0.1 and 0.3 are good values
    hidden_continuous_size=8,  # set to <= hidden_size
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    reduce_on_plateau_patience=4,# reduce learning rate if no improvement in validation loss after x epochs
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

Number of parameters in network: 23.2k


In [None]:
res = trainer.tuner.lr_find(
    tft,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
    max_lr=10.0,
    min_lr=1e-6,
)


   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 361   
3  | prescalers                         | ModuleDict                      | 144   
4  | static_variable_selection          | VariableSelectionNetwork        | 1.8 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 4.9 K 
6  | decoder_variable_selection         | VariableSelectionNetwork        | 1.9 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork            | 1.1 

HBox(children=(FloatProgress(value=0.0, description='Finding best initial lr', style=ProgressStyle(description…

In [None]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=30,
    gpus=0,
    weights_summary="top",
    gradient_clip_val=0.1,
    limit_train_batches=30,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)

In [None]:
trainer.fit(
    tft,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
)

In [None]:
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="optuna_test",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)
# save study results - also we can resume tuning at a later point in time
with open("test_study.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

In [None]:
best_model_path = trainer.checkpoint_callback.best_model_path
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

In [None]:
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
predictions = best_tft.predict(val_dataloader)
(actuals - predictions).abs().mean()

# Predict and evaluate

In [None]:
raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)


In [None]:
best_tft.plot_prediction(x, raw_predictions,idx=0, add_loss_to_title=True,show_future_observed=True)

In [None]:
predictions, x = best_tft.predict(val_dataloader, return_x=True)
predictions_vs_actuals = best_tft.calculate_prediction_actual_by_variable(x, predictions)
best_tft.plot_prediction_actual_by_variable(predictions_vs_actuals);