References
WESAD: https://archive.ics.uci.edu/dataset/465/wesad+wearable+stress+and+affect+detection \
TabTransformer:\
https://aravindkolli.medium.com/mastering-tabular-data-with-tabtransformer-a-comprehensive-guide-119f6dbf5a79 \
https://medium.com/@cristianleo120/the-math-behind-tabtransformer-78b78c12cfc1 \
https://towardsdatascience.com/transformers-for-tabular-data-b3e196fab6f4/\
https://towardsdatascience.com/transformers-for-tabular-data-tabtransformer-deep-dive-5fb2438da820/


Steps:
1. Import Dataset
2. Train-test split and Data Loader
3. Transformer/ Neural network
    1) Create a model
    2) Choose a loss function
    3) Set an optimizer 
    4) Run a training loop
        Calculate loss (Forward pass)
        Compute gradients (Backpropagation)
        Updating model parameters
4. Evaluation

In [27]:
#1. Import Dataset
import os
import pickle
import numpy as np
from scipy.signal import resample
from scipy.stats import mode
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [46]:
from torch.utils.data import TensorDataset, Subset, DataLoader
import torch.nn as nn
import torch.optim as optim

In [29]:
class WESADDataset(Dataset):
    def __init__(self, data_path, window_size=128, overlap=0.0):
        self.data_path = data_path
        self.window_size = window_size
        self.overlap = overlap
        self.signal_names = ['ACC','Resp','EDA','Temp','ECG','EMG']  
        self.data, self.labels, self.subjects = self.load_dataset()
    
    def load_dataset(self):
        subjects = [f'S{i}' for i in range(1, 18) if i not in [1, 12]]  # S1 and S12 are not available (Problem with sensors)
        all_data = []
        all_labels = []
        all_subjects = []
        
        orig_fs = 700
        target_fs = 32
        
        for subject in subjects:
            subj_dir = os.path.join(self.data_path, subject)
            data_file = os.path.join(subj_dir, f'{subject}.pkl')
            
            if not os.path.exists(data_file):
                print(f'Warning: {data_file} does not exist')
                continue
            
            try:
                with open(data_file, 'rb') as f:
                    raw = torch.load(f) if self.data_path.endswith('.pt') else pickle.load(f, encoding='latin1')
                
                # Extract chest data and label
                chest_data = raw['signal']['chest']
                labels = raw['label']
                
                # Process signals
                signals = []
                for name in self.signal_names:
                    if name in chest_data:
                        sig = chest_data[name]
                        
                        # Handle multi-dimensional signals (like ACC with x,y,z components)
                        if len(sig.shape) > 1:
                            if name == 'ACC':
                                # For accelerometer, compute magnitude from 3D components
                                if sig.shape[1] == 3:  # x, y, z components
                                    sig = np.sqrt(np.sum(sig**2, axis=1))  # Magnitude
                                else:
                                    sig = sig.flatten()
                            else:
                                sig = sig.flatten()
                        
                        # Resample signal
                        sig_resampled = resample(sig, int(len(sig) * target_fs / orig_fs))
                        signals.append(sig_resampled)
                    else:
                        print(f'Warning: {name} missing for {subject}')
                
                if len(signals) != len(self.signal_names):
                    print(f'Skipping {subject} due to missing modalities')
                    continue
                
                # Ensure all signals have same length
                min_len = min(map(len, signals))
                signals = [s[:min_len] for s in signals]
                signal_matrix = np.stack(signals, axis=1)
                
                # Resample labels
                labels_resampled = resample(labels, min_len)
                labels_resampled = np.round(labels_resampled).astype(int)
                
                # Create sliding windows
                win_data, win_labels = self.create_windows(signal_matrix, labels_resampled)
                
                all_data.extend(win_data)
                all_labels.extend(win_labels)
                all_subjects.extend([subject]*len(win_data))
                
                print(f'Loaded {len(win_data)} sliding windows for {subject}')
                
            except Exception as e:
                print(f'Error processing {subject}: {e}')
                continue
        
        return np.array(all_data), np.array(all_labels), np.array(all_subjects)
    
    def create_windows(self, data, labels):
        step = int(self.window_size * (1 - self.overlap))
        windows = []
        window_labels = []
        
        for start in range(0, data.shape[0] - self.window_size + 1, step):
            end = start + self.window_size
            label_window = labels[start:end]
            
            # Handle newer scipy versions
            mode_result = mode(label_window, keepdims=True)
            lbl = int(mode_result[0][0])
            
            if lbl == 1:  # Baseline
                windows.append(data[start:end])
                window_labels.append(0)
            elif lbl == 2:  # Stress
                windows.append(data[start:end])
                window_labels.append(1)
        
        return windows, window_labels
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.long)


In [30]:
DATASET_PATH = '/Users/kumar/Library/Mobile Documents/com~apple~CloudDocs/Phoenix/OVGU/HiWi2/Tasks/10_WESAD/WESAD.nosync'

ds = WESADDataset(DATASET_PATH)

Loaded 440 sliding windows for S2
Loaded 445 sliding windows for S3
Loaded 449 sliding windows for S4
Loaded 460 sliding windows for S5
Loaded 458 sliding windows for S6
Loaded 457 sliding windows for S7
Loaded 460 sliding windows for S8
Loaded 456 sliding windows for S9
Loaded 476 sliding windows for S10
Loaded 465 sliding windows for S11
Loaded 461 sliding windows for S13
Loaded 464 sliding windows for S14
Loaded 464 sliding windows for S15
Loaded 463 sliding windows for S16
Loaded 476 sliding windows for S17


In [31]:
# Window size: How many timesteps or consecutive records each sample contains
# Batch size: How many independent samples are processed in parallel

In [32]:
print(len(ds)) # How 6894?, all devices sampled at same rate?
input_sample, label_sample = ds[0]
print(len(input_sample)) # Time steps = 128, (window size is downsampled from 700 Hz to 32Hz, 128/32 = 4 seconds of data per window)
print(len(input_sample[0])) # ['ACC','Resp','EDA','Temp','ECG','EMG'], 6 sensors
print('Input sample:', input_sample) # 128 * 6
print('Label sample:', label_sample) # 0 for Baseline, 1 for Stress label for 4 seconds window

6894
128
6
Input sample: tensor([[ 9.5370e-01,  2.2468e+00,  5.5277e+00,  2.9131e+01, -1.4182e-01,
         -6.0475e-03],
        [ 9.1147e-01,  2.3274e+00,  5.5262e+00,  2.9136e+01, -1.3497e-01,
          6.8507e-05],
        [ 9.0827e-01,  2.3982e+00,  5.5229e+00,  2.9145e+01, -9.1329e-02,
         -3.4008e-03],
        [ 9.2792e-01,  2.4003e+00,  5.5208e+00,  2.9142e+01, -1.2794e-01,
          5.1459e-05],
        [ 9.3718e-01,  2.4020e+00,  5.5201e+00,  2.9131e+01, -1.3628e-01,
         -4.2756e-03],
        [ 9.3415e-01,  2.3529e+00,  5.5167e+00,  2.9139e+01, -5.8765e-02,
         -2.8011e-03],
        [ 9.2376e-01,  2.2870e+00,  5.5171e+00,  2.9137e+01,  7.2924e-02,
         -3.5566e-03],
        [ 9.2641e-01,  2.2015e+00,  5.5147e+00,  2.9140e+01,  6.3188e-02,
         -2.0663e-03],
        [ 9.3307e-01,  2.0973e+00,  5.5099e+00,  2.9133e+01,  3.0525e-02,
         -2.8087e-03],
        [ 9.3486e-01,  1.9369e+00,  5.5108e+00,  2.9139e+01,  1.3204e-02,
         -2.8071e-03],
     

In [43]:
print(ds.data)
print(ds.labels)

[[[ 0.75375696  0.59662189  0.25279726 -3.00844398 -0.60045045
   -0.78229919]
  [-0.65243759  0.61855696  0.2523696  -3.00569337 -0.57163441
    0.7494205 ]
  [-0.75893302  0.63781844  0.25144793 -2.99943331 -0.38829829
   -0.11945359]
  ...
  [-0.08670693  0.13346265  0.19340013 -2.99343917 -0.60667006
   -0.23141488]
  [-0.20789459  0.11483002  0.19284315 -2.99956     0.11112855
   -0.18645793]
  [-0.20855926  0.1065732   0.19205292 -2.99216232  0.11806932
    0.35170578]]

 [[-0.37928513  0.09292784  0.19144258 -2.99118525  0.13195381
   -0.24133015]
  [ 0.15649552  0.07157643  0.19062762 -2.99918748 -0.00515754
    0.22788466]
  [ 0.24676713  0.05596738  0.19096845 -2.99865191  0.0573649
   -0.35602722]
  ...
  [-0.07071888 -0.25929936  0.15638297 -2.98666235  0.11662972
    0.06545338]
  [-0.18940893 -0.26109408  0.15642603 -2.98560626  0.04263479
    0.05195681]
  [-0.00698545 -0.27210414  0.15667911 -2.98547582  0.04093985
    0.097237  ]]

 [[ 0.10037127 -0.27509729  0.1565567

In [44]:
# Normalise data

means = ds.data.mean(axis=(0, 1))   # shape: across batch and time steps
stds = ds.data.std(axis=(0, 1))

# apply normalization in-place
ds.data = (ds.data - means[None, None, :]) / stds[None, None, :]

In [47]:
# 2. Train test size

train_ratio, val_ratio, test_ratio = 0.7, 0.15, 0.15

n_total = len(ds)
n_train = int(train_ratio * total_size)
n_val = int(val_ratio * total_size)
n_test = total_size - train_size - valid_size

print(n_total, n_train, n_val, n_test)

6894 4825 1034 1035


In [49]:
# Train test split

train_ds = Subset(ds, range(0, n_train))
val_ds   = Subset(ds, range(n_train, n_train + n_val))
test_ds  = Subset(ds, range(n_train + n_val, n_total))

In [57]:
# Data Loaders
train_dataloader = DataLoader(train_ds, batch_size = 32, shuffle = False) # batch size = grouping 32 samples
val_dataloader = DataLoader(val_ds, batch_size = 32, shuffle = False)
test_dataloader = DataLoader(test_ds, batch_size = 32, shuffle = False)

# Sample batch
for batch_inputs, batch_labels in train_dataloader:
    print('batch_inputs:', batch_inputs)
    print('batch_labels:', batch_labels)
    print(len(batch_inputs))
    print(len(batch_inputs[0]))
    break

batch_inputs: tensor([[[ 7.5376e-01,  5.9662e-01,  2.5280e-01, -3.0084e+00, -6.0045e-01,
          -7.8230e-01],
         [-6.5244e-01,  6.1856e-01,  2.5237e-01, -3.0057e+00, -5.7163e-01,
           7.4942e-01],
         [-7.5893e-01,  6.3782e-01,  2.5145e-01, -2.9994e+00, -3.8830e-01,
          -1.1945e-01],
         ...,
         [-8.6707e-02,  1.3346e-01,  1.9340e-01, -2.9934e+00, -6.0667e-01,
          -2.3141e-01],
         [-2.0789e-01,  1.1483e-01,  1.9284e-01, -2.9996e+00,  1.1113e-01,
          -1.8646e-01],
         [-2.0856e-01,  1.0657e-01,  1.9205e-01, -2.9922e+00,  1.1807e-01,
           3.5171e-01]],

        [[-3.7929e-01,  9.2928e-02,  1.9144e-01, -2.9912e+00,  1.3195e-01,
          -2.4133e-01],
         [ 1.5650e-01,  7.1576e-02,  1.9063e-01, -2.9992e+00, -5.1575e-03,
           2.2788e-01],
         [ 2.4677e-01,  5.5967e-02,  1.9097e-01, -2.9987e+00,  5.7365e-02,
          -3.5603e-01],
         ...,
         [-7.0719e-02, -2.5930e-01,  1.5638e-01, -2.9867e+00,  1.

3. Transformer/ Neural network
    1) Create a model
    2) Choose a loss function
    3) Define a dataset
    4) Set an optimizer 
    5) Run a training loop
        Calculate loss (Forward pass)
        Compute gradients (Backpropagation)
        Updating model parameters

In [58]:
# num_heads = width of attention (how many perspectives are considered in parallel).
# num_layers = depth of reasoning (how many times the model refines its understanding)

In [59]:
# 3.1 TabTransformer model class, modified from Medium

class TabTransformer(nn.Module):
    def __init__(self, num_features, num_classes, dim_embedding, num_heads, num_layers):
        super(TabTransformer, self).__init__()
        self.embedding = nn.Linear(num_features, dim_embedding) # project input features -> embedding
        # transformer encoder (batch_first=True so input shape is [batch_size, timesteps, num_features/dim_embedding])
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim_embedding,nhead=num_heads,dim_feedforward=dim_embedding * 4,batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.classifier = nn.Linear(dim_embedding, num_classes) # simple linear classifier

    def forward(self, x):
        # x: [batch, timesteps, features]
        x = self.embedding(x)            # -> [batch, timesteps, dim_embedding], project input to embedding
        x = self.transformer(x)          # -> [batch, timesteps, dim_embedding], passes through multiple [Attention + FFN + Norm] layers
        x = torch.mean(x, dim=1)         # -> [batch, dim_embedding], global mean pooling over timesteps 
        x = self.classifier(x)           # -> [batch, num_classes], final classification head
        return x

In [60]:
# Initialize model
model = TabTransformer(
    num_features = 6,        # 6 sensor features
    num_classes = 2,         # Binary classification
    dim_embedding = 64,      # Embedding dimension
    num_heads = 4,           # Number of attention heads
    num_layers = 4,          # Number of transformer layers
).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

print(model)

TabTransformer(
  (embedding): Linear(in_features=6, out_features=64, bias=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-3): 4 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (classifier): Linear(in_features=64, out_features=2, bias=True)
)


In [63]:
# Loss function criterion and optimizer 
criterion = nn.CrossEntropyLoss() # measures the error between predicted and true
optimizer = optim.Adam(model.parameters(), lr=0.004) # updates the model weights by minimizing the loss

In [62]:
# Import additional libraries
from sklearn.metrics import accuracy_score, f1_score
import torch.nn.functional as F
from tqdm import tqdm

In [None]:
# Training loop 

for epoch in range(100):
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

    

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

NameError: name 'X_train_tensor' is not defined