In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torchmetrics.classification import BinaryAccuracy, BinaryAveragePrecision, BinaryF1Score, BinaryPrecision, BinaryRecall, BinaryHammingDistance

# Import transformer model
from Transformer_Archs.Sparsemax_Enc import TimeseriesTransformer

# Clear cuda cache
torch.cuda.empty_cache()

In [None]:
# Option for 5% or 10% attack data
option = '15' 

# Get data loaders
train_dataset = torch.load('./Preprocessed_data/train_dataset_{}.pt'.format(option), weights_only=False)
train_config = torch.load('./Preprocessed_data/train_config_{}.pt'.format(option), weights_only=False)
train_loader = DataLoader(train_dataset, **train_config)

val_dataset = torch.load('./Preprocessed_data/val_dataset_{}.pt'.format(option), weights_only=False)
val_config = torch.load('./Preprocessed_data/val_config_{}.pt'.format(option), weights_only=False)
val_loader = DataLoader(val_dataset, **val_config)

test_dataset = torch.load('./Preprocessed_data/test_dataset_{}.pt'.format(option), weights_only=False)
test_config = torch.load('./Preprocessed_data/test_config_{}.pt'.format(option), weights_only=False)
test_loader = DataLoader(test_dataset, **test_config)

# Set feautures and target size
num_features = 373
out_features = 20
seq_len = 12

In [None]:
# Setup hyperparameter
d_model = 512
nhead = 4
num_encoder_layers = 4
dim_feedforward = 512

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize the model
# Input style: [input_feature_size, output_feature_size, d_model, nhead, num_encoder_layers, dim_feedforward]
model = TimeseriesTransformer(num_features, out_features, d_model, nhead, num_encoder_layers, dim_feedforward).to(device)

# Training loop (simplified)
criterion=nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr= 1e-4) #0.001, 5e-4, 1e-3

# Define the learning rate scheduler (for example, exponential decay)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)


In [None]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        """
        Args:
            patience (int): How many epochs to wait after last time validation loss improved.
            min_delta (float): Minimum change in the monitored quantity to qualify as an improvement.
        """
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0


In [None]:
def train(model, device, train_loader, optimizer, epoch, metric):
    model.train()
    accuracy_list, loss_list = [], []
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        data = data.permute(1, 0, 2)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        accuracy = metric(output, target)
        accuracy_list.append(accuracy.item())
        loss_list.append(loss.item())
        
    print("Train Epoch: {} - Training Loss: {:.5f} Training accuracy: {:.3f}%".format(epoch,np.mean(loss_list) ,np.mean(accuracy_list)*100))

    return np.mean(accuracy_list)*100, np.mean(loss_list)

def Validation(model, device, valid_loader, epoch, metric):
    model.eval()
    accuracy_list, loss_list = [], []
    for batch_idx, (data, target) in enumerate(valid_loader):
        data, target = data.to(device), target.to(device)
        data = data.permute(1, 0, 2)
        output = model(data)
        loss = criterion(output, target)
        accuracy = metric(output, target)
        accuracy_list.append(accuracy.item())
        loss_list.append(loss.item())
        
    print("Valid Epoch: {} - Validation Loss: {:.5f} Validation accuracy: {:.3f}%".format(epoch,np.mean(loss_list) ,np.mean(accuracy_list)*100))

    return np.mean(accuracy_list)*100, np.mean(loss_list)             

In [None]:
# Initialize the early stopping mechanism
early_stopping = EarlyStopping(patience=10, min_delta=0.0001)
train_acc_list, val_acc_list = [], []
train_loss_list, val_loss_list = [], []

metric = BinaryAccuracy().to(device)

# Train the model
epochs = 150
for epoch in range(1, epochs + 1):
        train_acc, train_loss = train( model, device, train_loader, optimizer, epoch, metric)
        Val_acc, Val_loss = Validation(model, device, val_loader, epoch, metric)
        train_acc_list.append(train_acc), val_acc_list.append(Val_acc)
        train_loss_list.append(train_loss), val_loss_list.append(Val_loss)
        scheduler.step(train_loss)
        early_stopping(train_loss)
        if early_stopping.early_stop:
             print("Early stopping triggered. Stopping training.")
             break



In [None]:
# save training loss and accuracy
np.save('./Final_models/Sparsemax/train_loss_sparsemax_{}.npy'.format(option), np.array(train_loss_list))
np.save('./Final_models/Sparsemax/train_accuracy_sparsemax_{}.npy'.format(option), np.array(train_acc_list))
np.save('./Final_models//Sparsemax/val_loss_sparsemax_{}.npy'.format(option), np.array(val_loss_list))
np.save('./Final_models/Sparsemax/val_accuracy_sparsemax_{}.npy'.format(option), np.array(val_acc_list))
torch.save(model.state_dict(), "./Final_models/Sparsemax/transformer_sparsemax_{}.pth".format(option))

In [None]:
# Setup hyperparameter
d_model = 512
nhead = 4
num_encoder_layers = 4
dim_feedforward = 512

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize the model
# Input style: [input_feature_size, output_feature_size, d_model, nhead, num_encoder_layers, dim_feedforward]
model = TimeseriesTransformer(num_features, out_features, d_model, nhead, num_encoder_layers, dim_feedforward).to(device)

# Training loop (simplified)
criterion=nn.BCEWithLogitsLoss()

# Load model weights
model.load_state_dict(torch.load("./Final_models/Sparsemax/transformer_sparsemax_{}.pth".format(option), weights_only=True))

In [None]:
model.eval()
test_loss = 0
correct = 0
accuracy_list = []
with torch.no_grad():
    data, target = next(iter(test_loader))
    data, target = data.to(device), target.to(device)
    data = data.permute(1, 0, 2)
    output = model(data)
    test_loss += criterion(output, target).item()  # sum up batch loss
    accuracy = metric(output, target).item()
    accuracy_list.append(accuracy)

    print('Test set: Loss: {:.6f}, Accuracy: {:.3f}%\n'.format(
        test_loss, accuracy*100))

In [None]:
# Metrics
avg_precision = BinaryAveragePrecision().to(device)
f1 = BinaryF1Score().to(device)
precision = BinaryPrecision().to(device)
recall = BinaryRecall().to(device)
hamming = BinaryHammingDistance().to(device)

# Calculate metrics
avg_precision_score = avg_precision(output, target.int()).item()
f1_score = f1(output, target).item()
precision_score = precision(output, target).item()
recall_score = recall(output, target).item()
hamming_score = hamming(output, target).item()

# Print metrics
print(f"Accuracy: {accuracy}")
print(f"Average Precision: {avg_precision_score}")
print(f"F1 Score: {f1_score}")
print(f"Precision: {precision_score}")
print(f"Recall: {recall_score}")
print(f"Hamming Distance: {hamming_score}")

In [None]:
# Plot avg accuracy and loss
plt.grid()
plt.plot(train_acc_list, label=r"Training accuracy")
plt.plot(val_acc_list, label=r"Validation accuracy")
plt.xlabel(r"Epoch $\longrightarrow$")
plt.ylabel(r"Accuracy $\longrightarrow$");
plt.legend()
plt.title(r"Average accuracy per epoch - Softmax");

In [None]:
# Plot avg loss
plt.grid()
plt.plot(train_loss_list, label=r"Training loss")
plt.plot(val_loss_list, label=r"Validation loss")
plt.xlabel(r"Epoch $\longrightarrow$")
plt.ylabel(r"Loss $\longrightarrow$");
plt.legend(loc='best');
plt.title(r"Average loss per epoch - Softmax");