In [36]:
import os
import warnings
warnings.filterwarnings("ignore")  # avoid printing out absolute paths

In [3]:
import copy
from pathlib import Path
import warnings
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# Data loading

In [11]:
df = pd.read_csv('Token-prices.csv')

In [32]:
df.date=pd.to_datetime(df.date)
df.day=df.day.astype(str).astype("category")
df.hour=df.day.astype(str).astype("category")
df["month"]=df.date.dt.month.astype(str).astype("category")
df["time_idx"]=df.index
df["series_type"]="Token_price"

In [33]:
df.head(-5)

Unnamed: 0,current_price,time,date,day,hour,7_day_std,30_day_std,7_day_mean,30_day_mean,7_day_low,30_day_low,7_day_high,30_day_high,1_day_high,1_day_low,last_change,time_idx,series_type,month
0,35704,1429648533,2015-04-21 22:35:33,1,1,352.001894,352.001894,35351.333333,35351.333333,35000.0,35000.0,35704.0,35704.0,35704.0,35000.0,704.0,0,Token_price,4
1,36061,1429649396,2015-04-21 22:49:56,1,1,456.629226,456.629226,35528.750000,35528.750000,35000.0,35000.0,36061.0,36061.0,36061.0,35000.0,711.0,1,Token_price,4
2,36422,1429650302,2015-04-21 23:05:02,1,1,562.104795,562.104795,35707.400000,35707.400000,35000.0,35000.0,36422.0,36422.0,36422.0,35000.0,718.0,2,Token_price,4
3,36786,1429651238,2015-04-21 23:20:38,1,1,668.330582,668.330582,35887.166667,35887.166667,35000.0,35000.0,36786.0,36786.0,36786.0,35000.0,725.0,3,Token_price,4
4,37154,1429652111,2015-04-21 23:35:11,1,1,775.556666,775.556666,36068.142857,36068.142857,35000.0,35000.0,37154.0,37154.0,37154.0,35000.0,732.0,4,Token_price,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133144,165111,1610349885,2021-01-11 08:24:45,0,0,2669.552958,2634.690165,159807.009960,160379.225632,155921.0,155921.0,167871.0,170309.0,165336.0,156615.0,1229.0,133144,Token_price,1
133145,165513,1610351085,2021-01-11 08:44:45,0,0,2669.996756,2638.319852,159807.219124,160381.808664,155921.0,155921.0,167871.0,170309.0,165513.0,156615.0,1030.0,133145,Token_price,1
133146,165816,1610352285,2021-01-11 09:04:45,0,0,2668.887533,2642.343148,159806.737052,160384.501805,155921.0,155921.0,167871.0,170309.0,165816.0,156615.0,705.0,133146,Token_price,1
133147,165918,1610353485,2021-01-11 09:24:45,0,0,2665.138322,2646.422899,159805.201195,160387.155235,155921.0,155921.0,167871.0,170309.0,165918.0,156615.0,405.0,133147,Token_price,1


# Dataset creating

In [34]:
max_prediction_length = 6
max_encoder_length = 24
training_cutoff = df["time_idx"].max() - max_prediction_length

In [37]:
training = TimeSeriesDataSet(
    df[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="current_price",
    group_ids=["series_type"], # If you have only one timeseries, set this to the name of column that is constant.
    min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=[],
    time_varying_known_categoricals=["day","month","hour"],
    variable_groups={},  # group of categorical variables can be treated as one variable
    time_varying_known_reals=[],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=[ #list of continuous variables that change over time and are not know in the future
        "current_price",
        "7_day_std",
        "30_day_std",
        "7_day_mean",
        "30_day_mean",
        "7_day_low",
        "30_day_low",
        "7_day_high",
        "30_day_high",
        "1_day_high",
        "1_day_low",
        "last_change"
        
    ],
    target_normalizer=GroupNormalizer(
        groups=[], transformation="softplus"
    ),  # use softplus and normalize by group
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

In [39]:
validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

In [40]:
batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

# Baseline model

In [41]:
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])
baseline_predictions = Baseline().predict(val_dataloader)
(actuals - baseline_predictions).abs().mean().item()

141.8333282470703

# Train the Temporal Fusion Transformer

In [42]:
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    gpus=0,
    # clipping gradients is a hyperparameter and important to prevent divergance
    # of the gradient for recurrent neural networks
    gradient_clip_val=0.1,
)

GPU available: False, used: False
TPU available: None, using: 0 TPU cores


In [43]:
tft = TemporalFusionTransformer.from_dataset(
    training,
    # not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.03,
    hidden_size=16,  # most important hyperparameter apart from learning rate
    # number of attention heads. Set to up to 4 for large datasets
    attention_head_size=1,
    dropout=0.1,  # between 0.1 and 0.3 are good values
    hidden_continuous_size=8,  # set to <= hidden_size
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    # reduce learning rate if no improvement in validation loss after x epochs
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

Number of parameters in network: 27.1k


In [44]:
# find optimal learning rate
res = trainer.tuner.lr_find(
    tft,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
    max_lr=10.0,
    min_lr=1e-6,
)


   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 142   
3  | prescalers                         | ModuleDict                      | 256   
4  | static_variable_selection          | VariableSelectionNetwork        | 1.7 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 10.1 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 916   
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork            | 1.1 

HBox(children=(FloatProgress(value=0.0, description='Finding best initial lr', style=ProgressStyle(description…

Restored states from the checkpoint file at C:\Users\Wilk96\Warcraft_price_prediction\lr_find_temp_model.ckpt


In [45]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=30,
    gpus=0,
    weights_summary="top",
    gradient_clip_val=0.1,
    limit_train_batches=30,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)


tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

GPU available: False, used: False
TPU available: None, using: 0 TPU cores


Number of parameters in network: 27.1k


In [46]:
# fit network
trainer.fit(
    tft,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
)


   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 142   
3  | prescalers                         | ModuleDict                      | 256   
4  | static_variable_selection          | VariableSelectionNetwork        | 1.7 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 10.1 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 916   
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_initial_cell_lstm   | GatedResidualNetwork            | 1.1 

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




1

In [None]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="optuna_test",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("test_study.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[32m[I 2021-01-12 16:14:23,674][0m A new study created in memory with name: no-name-1002849d-f342-4b0c-8dbf-c75d8cbdefd0[0m
GPU available: False, used: False
TPU available: None, using: 0 TPU cores
[32m[I 2021-01-12 16:20:54,164][0m Trial 0 finished with value: 82976.2734375 and parameters: {'gradient_clip_val': 0.7831082182688781, 'hidden_size': 40, 'dropout': 0.12632634587318897, 'hidden_continuous_size': 20, 'attention_head_size': 3, 'learning_rate': 0.0019485220957654288}. Best is trial 0 with value: 82976.2734375.[0m
GPU available: False, used: False
TPU available: None, using: 0 TPU cores
