STEP 5: Modeling Transformers

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import torch.nn as nn

class TransformerBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = nn.MultiheadAttention(embed_dim, num_heads, dropout=rate, batch_first=True)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim),
        )
        self.layernorm1 = nn.LayerNorm(embed_dim, eps=1e-6)
        self.layernorm2 = nn.LayerNorm(embed_dim, eps=1e-6)
        self.dropout1 = nn.Dropout(rate)
        self.dropout2 = nn.Dropout(rate)

    def forward(self, x):
        attn_output, _ = self.att(x, x, x)
        x = self.layernorm1(x + self.dropout1(attn_output))
        ffn_output = self.ffn(x)
        return self.layernorm2(x + self.dropout2(ffn_output))

class TransformerClassifier(nn.Module):
    def __init__(self, input_dim, embed_dim, num_heads, ff_dim, num_layers, num_classes, rate=0.1):
        super().__init__()
        self.embedding = nn.Linear(input_dim, embed_dim)
        self.transformer_blocks = nn.Sequential(*[
            TransformerBlock(embed_dim, num_heads, ff_dim, rate) for _ in range(num_layers)
        ])
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1) # Pool over sequence length
        self.classifier = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        # Transformer expects shape (batch_size, seq_len, embed_dim).
        # Here, we treat each feature as a step in a sequence of length = number of features
        x = x.unsqueeze(1) # Add sequence length dimension (1)
        x = self.transformer_blocks(x)
        x = x.squeeze(1) # Remove sequence length dimension (1) after blocks
        # Global average pooling
        x = x.unsqueeze(-1) # Add a dimension for AdaptiveAvgPool1d
        x = self.global_avg_pool(x)
        x = x.squeeze(-1)
        return self.classifier(x)

In [None]:
# Create dataset and dataloaders
# Combine all features for transformer input
X_train_combined = np.hstack([X_eeg_train_scaled, X_gsr_train_scaled, X_tiva_train_scaled, X_sr_train_scaled])
X_test_combined  = np.hstack([X_eeg_test_scaled, X_gsr_test_scaled, X_tiva_test_scaled, X_sr_test_scaled])

train_dataset_tf = TensorDataset(torch.tensor(X_train_combined, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
test_dataset_tf  = TensorDataset(torch.tensor(X_test_combined, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))

train_loader_tf  = DataLoader(train_dataset_tf, batch_size=32, shuffle=True)
test_loader_tf   = DataLoader(test_dataset_tf, batch_size=32, shuffle=False)

print("Transformer train data shape:", X_train_combined.shape)
print("Transformer test data shape:", X_test_combined.shape)

Transformer train data shape: (3939, 42)
Transformer test data shape: (985, 42)


In [None]:
# Transformer model training
input_dim = X_train_combined.shape[1]
embed_dim = 64 # Embedding dimension for features
num_heads = 4  # Number of attention heads
ff_dim = 128   # Hidden layer size in feedforward network inside transformer
num_layers = 2 # Number of transformer blocks
num_classes = 3 # Number of sentiment classes

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transformer_model = TransformerClassifier(input_dim, embed_dim, num_heads, ff_dim, num_layers, num_classes).to(device)

criterion_tf = nn.CrossEntropyLoss()
optimizer_tf = optim.Adam(transformer_model.parameters(), lr=0.001)

epochs_tf = 50
for epoch in range(epochs_tf):
    transformer_model.train()
    total_loss = 0
    for xb, yb in train_loader_tf:
        xb, yb = xb.to(device), yb.to(device)
        optimizer_tf.zero_grad()
        out = transformer_model(xb)
        loss = criterion_tf(out, yb)
        loss.backward()
        optimizer_tf.step()
        total_loss += loss.item()

    # Evaluate on test set
    transformer_model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for xb, yb in test_loader_tf:
            xb, yb = xb.to(device), yb.to(device)
            outputs = transformer_model(xb)
            _, preds = torch.max(outputs.data, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(yb.cpu().numpy())

    test_accuracy = accuracy_score(all_labels, all_preds)
    test_macro_f1 = f1_score(all_labels, all_preds, average='macro')

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs_tf}, Train Loss: {total_loss/len(train_loader_tf):.4f}, Test Accuracy: {test_accuracy:.4f}, Test Macro F1: {test_macro_f1:.4f}")

print("\nTransformer Model Evaluation:")
print(classification_report(all_labels, all_preds))

Epoch 10/50, Train Loss: 0.6370, Test Accuracy: 0.7005, Test Macro F1: 0.3494
Epoch 20/50, Train Loss: 0.5307, Test Accuracy: 0.6579, Test Macro F1: 0.3943
Epoch 30/50, Train Loss: 0.3970, Test Accuracy: 0.6518, Test Macro F1: 0.4126
Epoch 40/50, Train Loss: 0.2708, Test Accuracy: 0.6284, Test Macro F1: 0.4206
Epoch 50/50, Train Loss: 0.2022, Test Accuracy: 0.6396, Test Macro F1: 0.4155

Transformer Model Evaluation:
              precision    recall  f1-score   support

           0       0.12      0.07      0.09        43
           1       0.74      0.76      0.75       684
           2       0.41      0.40      0.40       258

    accuracy                           0.64       985
   macro avg       0.42      0.41      0.42       985
weighted avg       0.63      0.64      0.63       985



In [None]:
# Save the trained Transformer model
transformer_model_path = "models/transformer__model.pt"
torch.save(transformer_model.state_dict(), transformer_model_path)
print("Saved Transformer model to:", transformer_model_path)

Saved Transformer model to: models/transformer__model.pt
