### 1. Environment Setup

In [1]:
import sys
import os

sys.path.append(os.getcwd())
sys.path.append(os.path.split(os.getcwd())[0])
sys.path.append(os.path.split(os.path.split(os.path.split(os.getcwd())[0])[0])[0]) # master drive

In [2]:
import pytest
import numpy as np
import pandas as pd
import torch
from omegaconf import OmegaConf
from nhits_pl import NHITS
from typing import Dict,List,Tuple
from torch import nn
from torch import optim

In [3]:
is_cuda = torch.cuda.is_available()
device = torch.device("cuda" if is_cuda else "cpu")

- Create Sample Data

In [4]:
n_rows = 500
n_classes = 4
# Prepare sample data
timestamp = pd.date_range(start='2020-01-01', periods=n_rows, freq='D')
time_series = pd.DataFrame({'values': np.random.randn(n_rows)}, index=timestamp)
labels = pd.DataFrame({'label': np.random.randn(n_rows)}, index=timestamp)
ext_features = pd.DataFrame({
    'feature1': np.random.randn(n_rows),
    'feature2': np.random.randn(n_rows)
}, index=timestamp)

combined_data = time_series.join(labels).join(ext_features)

In [5]:
combined_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 500 entries, 2020-01-01 to 2021-05-14
Freq: D
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   values    500 non-null    float64
 1   label     500 non-null    float64
 2   feature1  500 non-null    float64
 3   feature2  500 non-null    float64
dtypes: float64(4)
memory usage: 35.7 KB


- PyTorch: NHITS Model Setup

- define batch size and number of historical steps (to model) and future steps (to forecast)

In [8]:
# create batch
batch_size = 256
historical_steps = 90
future_steps = 1

- read exogenous features and target label

In [9]:
feat = combined_data.loc[:,['feature1', 'feature2']]
label = combined_data['label'][-(len(combined_data)-historical_steps):]

- PyTorch: Data Setup (Static / Future / Observed)

`Please note: at the moment we're filling the static covariates with random data, but this may need to be changed in future to adjust for eg. business days / holidays`

In [10]:
from tft_train_utils import stack_past_values
n_obs = n_rows
batch = {
        # Seasonal Component
        'seasonality' : torch.tensor([30], dtype=torch.int32), # for monthly seasonality
        # Input Features
        'X': torch.randn((n_obs, 24, 5), dtype=torch.float32),

        '''sample_mask is used to specify which parts of the input data should be considered for loss calculation or predictions. 
        It helps in ignoring padding or missing values in the input sequences during training'''
        'sample_mask': torch.ones((64, 24)),  # Mask with 1s for valid data, 0s for padding/missing


        '''available_mask is used to indicate the availability of data in the sequence. 
        It is crucial for handling missing data points effectively during both training and evaluation'''
        'avaliable_mask': torch.ones((64, 24)),  # Mask with 1s for valid data, 0s for padding/missing

        'y' : torch.tensor(label[-(n_obs):].values, dtype=torch.int64)
    }

  from .autonotebook import tqdm as notebook_tqdm


### PyTorch - NHITS Regression

In [None]:
model_config = {
    'time_in' : torch.arange(-24, 0),  # Last 24 time steps as input context,
    'time_out' : torch.arange(0, 1),  # Next 1 time steps to predict
    'n_pool_kernel' : [2,2,1], # The pooling size for downsampling in each stack. Higher values lead to more smoothing of the input.
    'n_freq_downsample' : [2, 2, 1], # Specifies the downsampling factor for each stack, determining how much interpolation is required (should be equal or higher than n_pool_kernel)
    'hidden_size': 128, # The number of units in the hidden layers.
    'n_blocks' : [3, 3, 3], # Number of blocks in each stack, which controls the depth of the network.
    'n_layers' : 2, # Number of layers within each block.
    'downsample_frequencies' : [2,2,1], # Multiplier for downsampling output in each stack. It should match the n_freq_downsample parameter.
    'batch_normalization': False, # Whether to apply batch normalization after each block.
    'dropout' : 0.5,
    'activation': 'ReLU',
    'learning_rate' : 1e-4,
    'loss' : MSEloss(),
    'log_interval' : 100, # Interval for logging model performance

}

### 

- PyTorch-Lightning: Training

- create instance of NHITS Pytorch-Ligthning model

In [13]:
model = NHITS(model_config)

- compile dataloder

In [14]:
from torch.utils.data import DataLoader, TensorDataset
S = batch['seasonality']
X = batch['X']
y = batch['y']
sample_mask = batch['sample_mask']
available_mask = batch['available_mask']

train_data = TensorDataset(
    S, X, y, sample_mask, available_mask)

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=False)

- run the first small training sample

In [None]:
import pytorch_lightning as pl
trainer = pl.Trainer(max_epochs=10)
trainer.fit(model, train_dataloader)

- Pytorch-Lightning: Hypertuning

In [None]:
from pytorch_lightning.callbacks import EarlyStopping
import torch
from sklearn.metrics import log_loss
import optuna
import numpy as np
from sklearn.model_selection import KFold
from torch.utils.data import Subset, DataLoader
from torch.nn import MSELoss
from CPCV.cpcv import CombinatorialPurgedGroupKFold

def tft_objective(trial):

    vars = ['seasonality', 'X', 'y', 'sample_mask', 'available_mask']

    config = model_config
    dataset = train_data

    # Suggest hyperparameters
    lr = trial.suggest_categorical('lr', [1e-5, 1e-3, 1e-2])
    num_heads = trial.suggest_categorical('num_heads', [1, 2, 4])
    dropout_prob = trial.suggest_categorical('dropout_prob', [0.1, 0.3, 0.5])
    hidden_units = trial.suggest_categorical('hidden_units', [64, 128, 256])
    lstm_layers = trial.suggest_categorical('lstm_layers', [1, 2, 4])
    # classifier_units = trial.suggest_categorical('classifier_units', [16, 32, 64])
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256])

    config['optimization']['learning_rate'] = lr
    config['optimization']['batch_size'] = batch_size
    config['model']['dropout'] = dropout_prob
    config['model']['hidden_size'] = hidden_units
    # config['model']['lstm_layers'] = lstm_layers
    # config['model']['attention_heads'] = num_heads

    # Initialize the model with suggested hyperparameters
    model = NHITS(config=OmegaConf.create(config))

    # Time series split
    n_splits = 5
    n_test_splits = 1

    cpcv = CombinatorialPurgedGroupKFold(
        n_splits=5,
        n_test_splits=1,
        pctEmbargo=0.01
    )

    groups = [i // n_splits for i in range(len(dataset))]
    cv_scores = []

    for fold, (train_idx, val_idx) in enumerate(cpcv.split(dataset, groups=groups)):

        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)
        
        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=False)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

        # Initialize trainer
        trainer = pl.Trainer(
            max_epochs=10,
            callbacks=[EarlyStopping(monitor='train_loss', patience=5, mode='min')],
            logger=False,
            enable_checkpointing=False,
            enable_model_summary=False
        )

        # Train the model
        trainer.fit(model, train_loader)

        # Validate the model

        model.eval()
        all_preds = []
        all_targets = []
        with torch.no_grad():
            for batch in val_loader:
                batch = {v:i for v,i in zip(vars, batch)}
                logits = model(batch)[0]
                # preds = torch.argmax(classification, dim=1)
                all_preds.extend(logits.squeeze(1).cpu().numpy())
                all_targets.extend(batch['y'].flatten().cpu().numpy())

        # val_predictions = trainer.predict(model, val_loader)
        # val_predictions = torch.cat([x for x in val_predictions], dim=0).numpy()
        
        val_loss = MSELoss()(torch.tensor(all_preds), torch.tensor(all_targets))
        cv_scores.append(val_loss)

    return np.mean(cv_scores)

- run hyperparameter tuning with Optuna

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(tft_objective, n_trials=5)

- read the best set of parameters

In [None]:
# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

In [None]:
model_config['learning_rate'] = study.best_params['lr']
model_config['batch_size'] = study.best_params['batch_size']
model_config['dropout'] = study.best_params['dropout_prob']
model_config['hidden_size'] = study.best_params['hidden_units']
# model_config['model']['lstm_layers'] = study.best_params['lstm_layers']
# model_config['model']['attention_heads'] = study.best_params['num_heads']

- Pytorch-Lightning: Train Final Model

In [None]:
model = NHITS(config=OmegaConf.create(model_config))

In [None]:
# Train the model
trainer = pl.Trainer(
    max_epochs=20,
    callbacks=[EarlyStopping(monitor='train_loss', patience=10, mode='min')]
    ) # remember to add the callbacks
trainer.fit(model, train_dataloader)

- Pytorch-Lightning: Perform Cross-Validation

In [None]:
from CPCV.cpcv import CombinatorialPurgedGroupKFold
from tft_train_utils import cross_validate_model

date_index = combined_data[historical_steps:].index

pred_times = pd.Series(date_index, index=date_index)
eval_times = pd.Series(date_index, index=date_index)

# Construct CPCV in-line with DePrado method
cpcv = CombinatorialPurgedGroupKFold(
    n_splits=10,
    n_test_splits=1,
    pctEmbargo = 0.01
)

cv_split = cpcv.split(
    X=pd.DataFrame(X, index=date_index), 
    y=pd.Series(y, index=date_index), 
    groups = [i // 10 for i in range(len(X))])


# Perform cross-validation
cv_results = cross_validate_model(
    train_data,
    model=model,
    num_epochs=10,
    # num_classes=data_props['num_classes'],
    cv_split=cv_split)

- Pytorch-Lightning: MC Dropout Predictions

In [None]:
from mc_dropout_tft import mc_dropout
mean_predictions, std_predictions = mc_dropout(model, train_dataloader, mc_iterations=50)

- Export the reuslts

In [None]:
import pandas as pd
# Save test predictions to a CSV
test_df = pd.DataFrame({
    'Prediction': mean_predictions,
    'Uncertainty': std_predictions
})

test_df.to_csv('nhits_predictions_reg.csv', index=False)

- Pytorch-Lightning: Save the model

In [None]:
from torch import save
save(model.state_dict(), 'nhits_regressor.pth')