### 1. Environment Setup

In [1]:
import sys
import os

sys.path.append(os.getcwd())
sys.path.append(os.path.split(os.getcwd())[0])
sys.path.append(os.path.split(os.path.split(os.path.split(os.getcwd())[0])[0])[0]) # master drive

In [2]:
import pytest
import numpy as np
import pandas as pd
import torch
from omegaconf import OmegaConf
from tft_torch import tft
from typing import Dict,List,Tuple
from torch import nn
from torch import optim

In [3]:
is_cuda = torch.cuda.is_available()
device = torch.device("cuda" if is_cuda else "cpu")

- Create Sample Data

In [4]:
n_rows = 500
n_classes = 4
# Prepare sample data
timestamp = pd.date_range(start='2020-01-01', periods=n_rows, freq='D')
time_series = pd.DataFrame({'values': np.random.randn(n_rows)}, index=timestamp)
labels = pd.DataFrame({'label': np.random.randint(0, n_classes, size=n_rows)}, index=timestamp)
ext_features = pd.DataFrame({
    'feature1': np.random.randn(n_rows),
    'feature2': np.random.randn(n_rows)
}, index=timestamp)

combined_data = time_series.join(labels).join(ext_features)

In [5]:
combined_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 500 entries, 2020-01-01 to 2021-05-14
Freq: D
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   values    500 non-null    float64
 1   label     500 non-null    int32  
 2   feature1  500 non-null    float64
 3   feature2  500 non-null    float64
dtypes: float64(3), int32(1)
memory usage: 33.7 KB


- PyTorch: TFT Model Setup

- data properties

In [6]:
""" Test the TemporalFusionTransformer module"""
data_props = {'num_historical_numeric': 2,
                'num_historical_categorical': 6,
                'num_static_numeric': 10,
                'num_static_categorical': 11,
                'num_future_numeric': 2,
                'num_future_categorical': 3,
                'historical_categorical_cardinalities': (1 + np.random.randint(10, size=6)).tolist(), # cardinalities - ie. how many categories each variable has 
                'static_categorical_cardinalities': (1 + np.random.randint(10, size=11)).tolist(),
                'future_categorical_cardinalities': (1 + np.random.randint(10, size=3)).tolist(),
                'num_classes': 4
                }

- define batch size and number of historical steps (to model) and future steps (to forecast)

In [8]:
# create batch
batch_size = 256
historical_steps = 90
future_steps = 1

- read exogenous features and target label

In [9]:
feat = combined_data.loc[:,['feature1', 'feature2']]
label = combined_data['label'][-(len(combined_data)-historical_steps):]

- PyTorch: Data Setup (Static / Future / Observed)

`Please note: at the moment we're filling the static covariates with random data, but this may need to be changed in future to adjust for eg. business days / holidays`

In [10]:
from tft_train_utils import stack_past_values
n_obs = n_rows - historical_steps
batch = {
        'static_feats_numeric': torch.rand(n_obs, data_props['num_static_numeric'],
                                           dtype=torch.float32),
        'static_feats_categorical': torch.stack([torch.randint(c, size=(n_obs,)) for c in
                                                 data_props['static_categorical_cardinalities']],
                                                dim=-1).type(torch.LongTensor),
        
        'historical_ts_numeric': torch.tensor(stack_past_values(feat.values, historical_steps), dtype=torch.float32),

        'historical_ts_categorical': torch.stack([torch.randint(c, size=(n_obs, historical_steps)) for c in
                                                  data_props['historical_categorical_cardinalities']],
                                                 dim=-1).type(torch.LongTensor),
        'future_ts_numeric': torch.rand(n_obs, future_steps, data_props['num_future_numeric'],
                                        dtype=torch.float32),
        'future_ts_categorical': torch.stack([torch.randint(c, size=(n_obs, future_steps)) for c in
                                              data_props['future_categorical_cardinalities']],
                                             dim=-1).type(torch.LongTensor),
        'target' : torch.reshape(torch.tensor(label[-(n_obs):].values, 
                        dtype=torch.int64), (n_obs, future_steps))
    }

  from .autonotebook import tqdm as notebook_tqdm


### PyTorch - TFT Classification

- TFT: Model configuration

In [11]:
configuration = {
        'model':
            {
                'dropout': 0.05,
                'state_size': 64,
                # 'output_quantiles': [0.1, 0.5, 0.9],
                'lstm_layers': 2,
                'attention_heads': 4
            },
        'optimization':
        {
            'batch_size': 256,
            'learning_rate': 1e-3,
            'max_grad_norm': 1.0
        },
        # these arguments are related to possible extensions of the model class
        'task_type': 'classification',
        'target_window_start': None,
        'data_props': data_props
    }

model = tft.TemporalFusionTransformer(OmegaConf.create(configuration))

### Training - Classification

- PyTorch-Lightning: Training

In [12]:
from tft_pl import TemporalFusionTransformer

- create instance of TFT Pytorch-Ligthning model

In [13]:
model = TemporalFusionTransformer(config=OmegaConf.create(configuration))

- compile dataloder

In [14]:
from torch.utils.data import DataLoader, TensorDataset
stat_feat_num = batch['static_feats_numeric']
stat_feat_cat = batch['static_feats_categorical']
hist_ts_num = batch['historical_ts_numeric']
hist_ts_cat = batch['historical_ts_categorical']
futr_ts_num = batch['future_ts_numeric']
futr_ts_cat = batch['future_ts_categorical']
target = batch['target']

train_data = TensorDataset(
    stat_feat_num, stat_feat_cat, hist_ts_num, hist_ts_cat,
    futr_ts_num, futr_ts_cat, target)

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=False)

- run the first small training sample

In [15]:
import pytorch_lightning as pl
trainer = pl.Trainer(max_epochs=10)
trainer.fit(model, train_dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

   | Name                                 | Type                            | Params
------------------------------------------------------------------------------------------
0  | static_transform                     | InputChannelEmbedding           | 5.7 K 
1  | historical_ts_transform              | InputChannelEmbedding           | 2.8 K 
2  | future_ts_transform                  | InputChannelEmbedding           | 1.3 K 
3  | static_selection                     | VariableSelectionNetwork        | 468 K 
4  | historical_ts_selection              | VariableSelectionNetwork        | 175 K 
5  | future_ts_selection                  | VariableSelectionNetwork        | 110 K 
6  | static_encoder_selection             | GatedResidualNetwork            | 16.8 K
7  | static_encoder_enrichment            | GatedResidualNetwor

Epoch 9: 100%|██████████| 13/13 [00:04<00:00,  2.84it/s, v_num=42, train_loss=1.140]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 13/13 [00:04<00:00,  2.73it/s, v_num=42, train_loss=1.140]


- Pytorch-Lightning: Hypertuning

In [None]:
from pytorch_lightning.callbacks import EarlyStopping
import torch
from sklearn.metrics import log_loss
import optuna
import numpy as np
from sklearn.model_selection import KFold
from torch.utils.data import Subset, DataLoader
from torch.nn.functional import nll_loss

def tft_objective(trial):

    vars = ['static_feats_numeric', 'static_feats_categorical',
        'historical_ts_numeric', 'historical_ts_categorical',
        'future_ts_numeric', 'future_ts_categorical', 'target']

    config = configuration
    dataset = train_data

    # Suggest hyperparameters
    lr = trial.suggest_categorical('lr', [1e-5, 1e-3, 1e-2])
    num_heads = trial.suggest_categorical('num_heads', [1, 2, 4])
    dropout_prob = trial.suggest_categorical('dropout_prob', [0.1, 0.3, 0.5])
    hidden_units = trial.suggest_categorical('hidden_units', [64, 128, 256])
    lstm_layers = trial.suggest_categorical('lstm_layers', [1, 2, 4])
    # classifier_units = trial.suggest_categorical('classifier_units', [16, 32, 64])
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

    config['optimization']['learning_rate'] = lr
    config['optimization']['batch_size'] = batch_size
    config['model']['dropout'] = dropout_prob
    config['model']['state_size'] = hidden_units
    config['model']['lstm_layers'] = lstm_layers
    config['model']['attention_heads'] = num_heads

    # Initialize the model with suggested hyperparameters
    model = TemporalFusionTransformer(config=OmegaConf.create(config))

    # Time series split
    kfold = KFold(n_splits=5, shuffle=False)
    cv_scores = []

    for fold, (train_idx, val_idx) in enumerate(kfold.split(dataset)):

        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)
        
        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=False)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

        # Initialize trainer
        trainer = pl.Trainer(
            max_epochs=10,
            callbacks=[EarlyStopping(monitor='train_loss', patience=5, mode='min')],
            logger=False,
            enable_checkpointing=False,
            enable_model_summary=False
        )

        # Train the model
        trainer.fit(model, train_loader)

        # Validate the model

        model.eval()
        all_preds = []
        all_targets = []
        with torch.no_grad():
            for batch in val_loader:
                batch = {v:i for v,i in zip(vars, batch)}
                logits = model(batch)['predicted_quantiles']
                # preds = torch.argmax(classification, dim=1)
                all_preds.extend(logits.squeeze(1).cpu().numpy())
                all_targets.extend(batch['target'].flatten().cpu().numpy())

        # val_predictions = trainer.predict(model, val_loader)
        # val_predictions = torch.cat([x for x in val_predictions], dim=0).numpy()
        
        val_loss = nll_loss(torch.tensor(all_preds), torch.tensor(all_targets))
        cv_scores.append(val_loss)

    return np.mean(cv_scores)

- run hyperparameter tuning with Optuna

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(tft_objective, n_trials=5)

- read the best set of parameters

In [None]:
# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

In [None]:
configuration['optimization']['learning_rate'] = study.best_params['lr']
configuration['optimization']['batch_size'] = study.best_params['batch_size']
configuration['model']['dropout'] = study.best_params['dropout_prob']
configuration['model']['state_size'] = study.best_params['hidden_units']
configuration['model']['lstm_layers'] = study.best_params['lstm_layers']
configuration['model']['attention_heads'] = study.best_params['num_heads']

- Pytorch-Lightning: Train Final Model

In [None]:
model = TemporalFusionTransformer(config=OmegaConf.create(configuration))

In [None]:
# Train the model
trainer = pl.Trainer(
    max_epochs=20,
    callbacks=[EarlyStopping(monitor='train_loss', patience=10, mode='min')]
    ) # remember to add the callbacks
trainer.fit(model, train_dataloader)

- Pytorch-Lightning: Perform Cross-Validation

In [16]:
X = hist_ts_num.detach().numpy()[:,0,:] # take the first 2d input for cv
y = target.detach().numpy().flatten()

In [17]:
from CPCV.cpcv import CombPurgedKFoldCVLocal
from tft_train_utils import cross_validate_model

date_index = combined_data[historical_steps:].index

pred_times = pd.Series(date_index, index=date_index)
eval_times = pd.Series(date_index, index=date_index)

# Construct CPCV in-line with DePrado method
cpcv = CombPurgedKFoldCVLocal(
    n_splits=10,
    n_test_splits=1,
    embargo_td=pd.Timedelta(days=2)
)

cv_split = cpcv.split(
    pd.DataFrame(X, index=date_index), 
    pd.Series(y, index=date_index), 
    pred_times, 
    eval_times)


# Perform cross-validation
cv_results = cross_validate_model(
    train_data,
    model=model,
    num_epochs=10,
    num_classes=data_props['num_classes'],
    cv_split=cv_split)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Fold 1/9
Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 13.13it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 9: 100%|██████████| 24/24 [00:06<00:00,  3.65it/s, v_num=43, train_loss=0.763, val_loss=1.240]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 24/24 [00:06<00:00,  3.47it/s, v_num=43, train_loss=0.763, val_loss=1.240]
Cross-Validation results:
              precision    recall  f1-score   support

     Label_0       0.71      0.38      0.50        13
     Label_1       0.50      0.71      0.59         7
     Label_2       0.44      0.80      0.57        10
     Label_3       0.50      0.27      0.35        11

    accuracy                           0.51        41
   macro avg       0.54      0.54      0.50        41
weighted avg       0.55      0.51      0.49        41



- Pytorch-Lightning: MC Dropout Predictions

In [None]:
from mc_dropout_tft import mc_dropout
mean_predictions, std_predictions = mc_dropout(model, train_dataloader, mc_iterations=50)

In [None]:
predicted_labels = np.argmax(mean_predictions, axis=1)

- Export the reuslts

In [None]:
import pandas as pd
# Save test predictions to a CSV
test_df = pd.DataFrame({
    'Prediction': predicted_labels,
    'Probability_0': [p[0] for p in mean_predictions],
    'Probability_1': [p[1] for p in mean_predictions],
    'Probability_2': [p[2] for p in mean_predictions],  # Adjust based on num_classes
    'Probability_3': [p[3] for p in mean_predictions],
    'Uncertainty_0': [u[0] for u in std_predictions],
    'Uncertainty_1': [u[1] for u in std_predictions],
    'Uncertainty_2': [u[2] for u in std_predictions],
    'Uncertainty_3': [u[3] for u in std_predictions]
})

test_df.to_csv('tft_predictions.csv', index=False)

- Pytorch-Lightning: Save the model

In [None]:
from torch import save
save(model.state_dict(), 'tft_classifier.pth')