- Setup the environment

In [1]:
import sys
import os
cur_dir = os.getcwd()
# Add the current directory to system path
sys.path.append(cur_dir)

- Load input data

Create sample data

In [2]:
import pandas as pd
import numpy as np
# Prepare sample data
timestamp = pd.date_range(start='2020-01-01', periods=300, freq='D')
time_series = pd.DataFrame({'values': np.random.randn(300)}, index=timestamp)
labels = pd.DataFrame({'label': np.random.randint(0, 3, size=300)}, index=timestamp)
ext_features = pd.DataFrame({
    'feature1': np.random.randn(300),
    'feature2': np.random.randn(300)
}, index=timestamp)

combined_data = time_series.join(labels).join(ext_features)

Train / Validation / Test split

In [3]:
# Train / Validation / Test Split
train_data = combined_data[:100]
val_data = combined_data[100:200]
test_data = combined_data[200:]

# Read targets
train_target = train_data.label.values
val_target = val_data.label.values
test_target = test_data.label.values

# Read features
train_features = train_data[['feature1', 'feature2']].values
val_features = val_data[['feature1', 'feature2']].values
test_features = test_data[['feature1', 'feature2']].values

In [4]:
from pl_model_utils import TimeSeriesDataModule
# Instantiate data module and model
data_module = TimeSeriesDataModule(
    train_target, train_features,
    val_target, val_features,
    test_target, test_features,
    batch_size=16
)

# Setup the data for model
data_module.setup()

  from .autonotebook import tqdm as notebook_tqdm


## Classification with Basic Autoencoder

- Compile simple Autoencoder for Classification

In [5]:
from pl_autoencoder_classifiers import AutoencoderClassifier
ae_model = AutoencoderClassifier(context_length=1, num_classes=3, num_features=2)

In [6]:
from pytorch_lightning.callbacks import ModelCheckpoint
# Define a checkpoint callback to save the best model
checkpoint_callback = ModelCheckpoint(
    monitor='train_loss',
    mode='min',
    save_top_k=1,
    save_last=True,
)

- Train the Autoencoder

In [7]:
from pytorch_lightning import Trainer
# Train the model
trainer = Trainer(max_epochs=20, callbacks=[checkpoint_callback])
trainer.fit(ae_model, data_module.train_dataloader())

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type       | Params
------------------------------------------
0 | encoder    | Sequential | 8.8 K 
1 | decoder    | Sequential | 8.4 K 
2 | classifier | Sequential | 2.2 K 
3 | softmax    | Softmax    | 0     
------------------------------------------
19.4 K    Trainable params
0         Non-trainable params
19.4 K    Total params
0.078     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00, 106.64it/s, v_num=52]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00, 84.29it/s, v_num=52] 


- Cross-Validation

In [8]:
from pl_model_utils import cross_validate_model
# Perform cross-validation
cv_results = cross_validate_model(data_module.val_features, data_module.val_target, AutoencoderClassifier, context_length=1, num_classes=3, num_features=2)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type       | Params
------------------------------------------
0 | encoder    | Sequential | 8.8 K 
1 | decoder    | Sequential | 8.4 K 
2 | classifier | Sequential | 2.2 K 
3 | softmax    | Softmax    | 0     
------------------------------------------
19.4 K    Trainable params
0         Non-trainable params
19.4 K    Total params
0.078     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 49: 100%|██████████| 2/2 [00:00<00:00, 66.78it/s, v_num=53] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 2/2 [00:00<00:00, 46.94it/s, v_num=53]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type       | Params
------------------------------------------
0 | encoder    | Sequential | 8.8 K 
1 | decoder    | Sequential | 8.4 K 
2 | classifier | Sequential | 2.2 K 
3 | softmax    | Softmax    | 0     
------------------------------------------
19.4 K    Trainable params
0         Non-trainable params
19.4 K    Total params
0.078     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 3/3 [00:00<00:00, 72.41it/s, v_num=54] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 3/3 [00:00<00:00, 52.23it/s, v_num=54]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type       | Params
------------------------------------------
0 | encoder    | Sequential | 8.8 K 
1 | decoder    | Sequential | 8.4 K 
2 | classifier | Sequential | 2.2 K 
3 | softmax    | Softmax    | 0     
------------------------------------------
19.4 K    Trainable params
0         Non-trainable params
19.4 K    Total params
0.078     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 4/4 [00:00<00:00, 61.84it/s, v_num=55] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 4/4 [00:00<00:00, 48.41it/s, v_num=55]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type       | Params
------------------------------------------
0 | encoder    | Sequential | 8.8 K 
1 | decoder    | Sequential | 8.4 K 
2 | classifier | Sequential | 2.2 K 
3 | softmax    | Softmax    | 0     
------------------------------------------
19.4 K    Trainable params
0         Non-trainable params
19.4 K    Total params
0.078     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 5/5 [00:00<00:00, 58.53it/s, v_num=56] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 5/5 [00:00<00:00, 49.58it/s, v_num=56]

GPU available: False, used: False





TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type       | Params
------------------------------------------
0 | encoder    | Sequential | 8.8 K 
1 | decoder    | Sequential | 8.4 K 
2 | classifier | Sequential | 2.2 K 
3 | softmax    | Softmax    | 0     
------------------------------------------
19.4 K    Trainable params
0         Non-trainable params
19.4 K    Total params
0.078     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 6/6 [00:00<00:00, 95.32it/s, v_num=57] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 6/6 [00:00<00:00, 78.14it/s, v_num=57]
Cross-Validation Classification Report:
           0.0       1.0       2.0  accuracy  macro avg  weighted avg
precision  1.0  0.960000  1.000000    0.9875   0.986667      0.990000
recall     1.0  1.000000  0.966667    0.9875   0.988889      0.987500
f1-score   1.0  0.977778  0.981818    0.9875   0.986532      0.987626
support    6.0  5.400000  4.600000    0.9875  16.000000     16.000000


- Predictions with MC Dropout Uncertainty

In [9]:
from pl_model_utils import mc_dropout_predictions
# Perform MC Dropout predictions
mc_predictions = mc_dropout_predictions(ae_model, data_module.test_dataloader())
# Calculate mean and standard deviation for uncertainty estimates
mean_predictions = mc_predictions.mean(axis=0)
std_predictions = mc_predictions.std(axis=0)
# Convert mean predictions to class labels
predicted_labels = np.argmax(mean_predictions, axis=1)

## Classification with Attention-Enchanced Autoencoder

## Hyperparameter Tuning

In [11]:
from pytorch_lightning.callbacks import EarlyStopping
from pl_autoencoder_classifiers import AutoencoderAttentionClassifier
from pl_model_utils import TimeSeriesDataset
import pytorch_lightning as pl
import torch
from cpcv import CombPurgedKFoldCVLocal
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import log_loss
import optuna
import numpy as np

def ae_attention_objective(trial):
    context_length = 1
    num_classes = 3
    num_features = 2

    # Suggest hyperparameters
    lr = trial.suggest_categorical('lr', [1e-5, 1e-3, 1e-2])
    num_heads = trial.suggest_categorical('num_heads', [1, 2, 4])
    dropout_prob = trial.suggest_categorical('dropout_prob', [0.1, 0.3, 0.5])
    hidden_units = trial.suggest_categorical('hidden_units', [64, 128, 256])
    embed_dim = trial.suggest_categorical('embed_dim', [32, 64, 128])
    classifier_units = trial.suggest_categorical('classifier_units', [16, 32, 64])
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])

    # Initialize the model with suggested hyperparameters
    model = AutoencoderAttentionClassifier(
        context_length=context_length,
        num_classes=num_classes,
        num_features=num_features,
        num_heads=num_heads,
        dropout_prob=dropout_prob,
        hidden_units=hidden_units,
        embed_dim=embed_dim,
        classifier_units=classifier_units,
        lr=lr
    )

    # Assuming you have your dataset in `X` and `y`
    X, y = data_module.val_features, data_module.val_target
    # X = np.array(X_train)  # Ensure X_train is a NumPy array
    # y = np.array(y_train)  # Ensure y_train is a NumPy array

    pred_times = pd.Series(df.index, index=df.index)
    eval_times = pd.Series(df.index, index=df.index)
    
    # Time series split
    cpcv = CombPurgedKFoldCVLocal(
        n_splits=10,
        n_test_splits=1,
        embargo_td=pd.Timedelta(days=2)
        )
        
    cv_scores = []

    for train_index, val_index in cpcv.split(X, y, pred_times, eval_times):
        X_train_fold, X_val_fold = X[train_index], X[val_index]
        y_train_fold, y_val_fold = y[train_index], y[val_index]

        # Create DataLoader for the training and validation fold
        train_dataset = TimeSeriesDataset(
            torch.tensor(y_train_fold, dtype=torch.float32), 
            torch.tensor(X_train_fold, dtype=torch.float32)
            )
        
        val_dataset = TimeSeriesDataset(
            torch.tensor(y_val_fold, dtype=torch.float32), 
            torch.tensor(X_val_fold, dtype=torch.float32)
            )
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        # Initialize trainer
        trainer = pl.Trainer(
            max_epochs=10,
            callbacks=[EarlyStopping(monitor='train_loss', patience=3, mode='min')],
            logger=False,
            enable_checkpointing=False
        )

        # Train the model
        trainer.fit(model, train_loader)

        # Validate the model

        model.eval()
        all_preds = []
        all_targets = []
        with torch.no_grad():
            for batch in val_loader:
                targets, features = batch
                _, classification = model(targets, features)
                # preds = torch.argmax(classification, dim=1)
                all_preds.extend(classification.cpu().numpy())
                all_targets.extend(targets.cpu().numpy())

        # val_predictions = trainer.predict(model, val_loader)
        # val_predictions = torch.cat([x for x in val_predictions], dim=0).numpy()
        
        val_loss = log_loss(all_targets, all_preds)
        cv_scores.append(val_loss)

    return np.mean(cv_scores)

In [None]:
# Run the Optuna study
study = optuna.create_study(direction='minimize')
study.optimize(ae_attention_objective, n_trials=100)

In [None]:
# Print best hyperparameters
print("Best hyperparameters:", study.best_params)

In [None]:
ae_attention_model = AutoencoderAttentionClassifier(
    context_length=1, 
    num_classes=3, 
    num_features=2,
    lr=study.best_params['lr'],
    num_heads=study.best_params['num_heads'],
    dropout_prob=study.best_params['dropout_prob'],
    hidden_units=study.best_params['hidden_units'],
    embed_dim=study.best_params['embed_dim'],
    classifier_units=study.best_params['classifier_units']
    )

In [12]:
ae_attention_model = AutoencoderAttentionClassifier(
    context_length=1, 
    num_classes=3, 
    num_features=2,
    lr=0.001,
    num_heads=1,
    dropout_prob=0.1,
    hidden_units=256,
    embed_dim=128,
    classifier_units=64
    )

In [13]:
# Train the model
trainer = Trainer(max_epochs=20, callbacks=[checkpoint_callback])
trainer.fit(ae_attention_model, data_module.train_dataloader())

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 19: 100%|██████████| 7/7 [00:00<00:00, 37.59it/s, v_num=58]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 7/7 [00:00<00:00, 34.12it/s, v_num=58]


In [15]:
# Perform cross-validation
cv_results = cross_validate_model(
    data_module.val_features, 
    data_module.val_target, 
    AutoencoderAttentionClassifier, 
    context_length=1, 
    num_classes=3, 
    num_features=2,
    lr=study.best_params['lr'],
    num_heads=study.best_params['num_heads'],
    dropout_prob=study.best_params['dropout_prob'],
    hidden_units=study.best_params['hidden_units'],
    embed_dim=study.best_params['embed_dim'],
    classifier_units=study.best_params['classifier_units'])

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 49: 100%|██████████| 2/2 [00:00<00:00, 53.80it/s, v_num=59]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 2/2 [00:00<00:00, 36.77it/s, v_num=59]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 3/3 [00:00<00:00, 52.36it/s, v_num=60]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 3/3 [00:00<00:00, 40.40it/s, v_num=60]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 4/4 [00:00<00:00, 66.01it/s, v_num=61] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 4/4 [00:00<00:00, 52.06it/s, v_num=61]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 5/5 [00:00<00:00, 36.98it/s, v_num=62]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 5/5 [00:00<00:00, 32.12it/s, v_num=62]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type               | Params
--------------------------------------------------
0 | encoder    | Sequential         | 33.9 K
1 | attention  | MultiheadAttention | 66.0 K
2 | decoder    | Sequential         | 33.3 K
3 | classifier | Sequential         | 8.5 K 
4 | softmax    | Softmax            | 0     
--------------------------------------------------
141 K     Trainable params
0         Non-trainable params
141 K     Total params
0.567     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 6/6 [00:00<00:00, 67.63it/s, v_num=63]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 6/6 [00:00<00:00, 56.48it/s, v_num=63]
Cross-Validation Classification Report:
           0.0  1.0  2.0  accuracy  macro avg  weighted avg
precision  1.0  1.0  1.0       1.0        1.0           1.0
recall     1.0  1.0  1.0       1.0        1.0           1.0
f1-score   1.0  1.0  1.0       1.0        1.0           1.0
support    6.0  5.4  4.6       1.0       16.0          16.0


In [16]:
print(cv_results)

           0.0  1.0  2.0  accuracy  macro avg  weighted avg
precision  1.0  1.0  1.0       1.0        1.0           1.0
recall     1.0  1.0  1.0       1.0        1.0           1.0
f1-score   1.0  1.0  1.0       1.0        1.0           1.0
support    6.0  5.4  4.6       1.0       16.0          16.0


In [17]:
# Perform MC Dropout predictions
mc_predictions = mc_dropout_predictions(ae_attention_model, data_module.test_dataloader())
# Calculate mean and standard deviation for uncertainty estimates
mean_predictions = mc_predictions.mean(axis=0)
std_predictions = mc_predictions.std(axis=0)
# Convert mean predictions to class labels
predicted_labels = np.argmax(mean_predictions, axis=1)

In [18]:
# Example output with probabilities and uncertainty
for i, (mean, std) in enumerate(zip(mean_predictions, std_predictions)):
    # softmax_probs = np.exp(mean) / np.sum(np.exp(mean)) # Softmax to get probabilities
    print(f'Sample {i}: Predicted Label = {predicted_labels[i]}, Probabilities = {mean}, Uncertainty (std) = {std}')

Sample 0: Predicted Label = 0, Probabilities = [9.9999952e-01 5.3368467e-07 9.5207750e-14], Uncertainty (std) = [2.7844749e-06 2.7842830e-06 7.2244722e-13]
Sample 1: Predicted Label = 2, Probabilities = [3.140716e-13 2.622725e-06 9.999975e-01], Uncertainty (std) = [2.5197552e-12 1.9383302e-05 1.9386363e-05]
Sample 2: Predicted Label = 2, Probabilities = [3.4909291e-12 3.0534925e-06 9.9999708e-01], Uncertainty (std) = [1.5888625e-11 1.1350981e-05 1.1354489e-05]
Sample 3: Predicted Label = 1, Probabilities = [5.5348803e-05 9.9750680e-01 2.4377755e-03], Uncertainty (std) = [0.00031489 0.01133862 0.01134168]
Sample 4: Predicted Label = 1, Probabilities = [7.0818415e-04 9.9855006e-01 7.4174500e-04], Uncertainty (std) = [0.0025205  0.00299391 0.00154708]
Sample 5: Predicted Label = 1, Probabilities = [8.8841058e-05 9.9986959e-01 4.1567204e-05], Uncertainty (std) = [0.00022357 0.00025558 0.00013787]
Sample 6: Predicted Label = 1, Probabilities = [4.1749119e-04 9.9933887e-01 2.4378397e-04], Un

- Export the reults

In [19]:
import pandas as pd
# Save test predictions to a CSV
test_df = pd.DataFrame({
    'Prediction': predicted_labels,
    'Probability_0': [p[0] for p in mean_predictions],
    'Probability_1': [p[1] for p in mean_predictions],
    'Probability_2': [p[2] for p in mean_predictions],  # Adjust based on num_classes
    'Uncertainty_0': [u[0] for u in std_predictions],
    'Uncertainty_1': [u[1] for u in std_predictions],
    'Uncertainty_2': [u[2] for u in std_predictions] 
})

test_df.to_csv('test_predictions.csv', index=False)

- Export the model

In [20]:
from torch import save
save(ae_attention_model.state_dict(), 'autoencoder_attention_classifier.pth')

In [None]:
import h5py

with h5py.File('ae_data.h5', 'w') as f:
    f.create_dataset('X', data=data_module.val_features)
    f.create_dataset('y', data=data_module.val_target)

## Explainable AI Part

In [None]:
import sys
import os
sys.path.append(os.getcwd())

In [None]:
import h5py

with h5py.File('ae_data.h5', 'r') as f:
    X = f['X'][:]
    y = f['y'][:]

In [None]:
from pl_autoencoder_classifiers import AutoencoderAttentionClassifier
from pl_model_utils import TimeSeriesDataset
from torch.utils.data import DataLoader

In [None]:
val_dataset = TimeSeriesDataset(
            torch.tensor(y, dtype=torch.float32), 
            torch.tensor(X, dtype=torch.float32)
            )

val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [None]:
batch = next(iter(val_loader))
targets, features = batch

In [None]:
ae_model = AutoencoderAttentionClassifier(
    context_length=1, 
    num_classes=3, 
    num_features=2,
    lr=0.001,
    num_heads=1,
    dropout_prob=0.1,
    hidden_units=256,
    embed_dim=128,
    classifier_units=64)
ae_model.load_state_dict(torch.load('autoencoder_attention_classifier.pth'))
ae_model.eval()

In [None]:
import shap

e = shap.DeepExplainer(ae_model, features)

In [None]:
from transformers import modeling_tf_utils