In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Import transformer model
from Transformer_Archs.RST_Enc import TimeseriesTransformer

# Clear CUDA cache
torch.cuda.empty_cache()

In [None]:
# Option for 5% or 10% attack data
option = '5' 

# Get data loaders
train_dataset = torch.load('./Preprocessed_data/train_dataset_{}.pt'.format(option), weights_only=False)
train_config = torch.load('./Preprocessed_data/train_config_{}.pt'.format(option), weights_only=False)
train_loader = DataLoader(train_dataset, **train_config)

val_dataset = torch.load('./Preprocessed_data/val_dataset_{}.pt'.format(option), weights_only=False)
val_config = torch.load('./Preprocessed_data/val_config_{}.pt'.format(option), weights_only=False)
val_loader = DataLoader(val_dataset, **val_config)

test_dataset = torch.load('./Preprocessed_data/test_dataset_{}.pt'.format(option), weights_only=False)
test_config = torch.load('./Preprocessed_data/test_config_{}.pt'.format(option), weights_only=False)
test_loader = DataLoader(test_dataset, **test_config)

# Set feautures and target size
num_features = 86
out_features = 11
seq_len = 12

In [None]:
# Sparsemax hyperparameter
alpha = 2 # must be greater than 1

# Setup hyperparameter
d_model = 64
nhead = 4
num_encoder_layers = 2
dim_feedforward = 64

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize the model
# Input style: [input_feature_size, output_feature_size, d_model, nhead, num_encoder_layers, dim_feedforward]
model = TimeseriesTransformer(num_features, out_features, d_model, nhead, num_encoder_layers, dim_feedforward, alpha).to(device)

# Training loop (simplified)
criterion=nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr= 1e-4) #0.001, 5e-4, 1e-3

# Define the learning rate scheduler (for example, exponential decay)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)


In [None]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        """
        Args:
            patience (int): How many epochs to wait after last time validation loss improved.
            min_delta (float): Minimum change in the monitored quantity to qualify as an improvement.
        """
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0


In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    correct= 0
    accuracy_list, loss_list = [], []
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.type(torch.LongTensor).to(device)
        data = data.permute(1, 0, 2)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1, keepdim=True)
        correct = pred.eq(target.view_as(pred)).sum().item()
        accuracy = correct / target.shape[0]
        accuracy_list.append(accuracy)
        loss_list.append(loss.item())
        
    print("Train Epoch: {} - Training Loss: {:.5f} Training accuracy: {:.3f}%".format(epoch,np.mean(loss_list) ,np.mean(accuracy_list)*100))

    return np.mean(accuracy_list)*100, np.mean(loss_list)

def Validation(model, device, valid_loader, epoch):
    model.eval()
    correct= 0
    accuracy_list, loss_list = [], []
    for batch_idx, (data, target) in enumerate(valid_loader):
        data, target = data.to(device), target.type(torch.LongTensor).to(device)
        data = data.permute(1, 0, 2)
        output = model(data)
        loss = criterion(output, target)
        pred = output.argmax(dim=1, keepdim=True)
        correct = pred.eq(target.view_as(pred)).sum().item()
        accuracy = correct / target.shape[0]
        accuracy_list.append(accuracy)
        loss_list.append(loss.item())
        
    print("Valid Epoch: {} - Validation Loss: {:.5f} Validation accuracy: {:.3f}%".format(epoch,np.mean(loss_list) ,np.mean(accuracy_list)*100))

    return np.mean(accuracy_list)*100, np.mean(loss_list)             

In [None]:
# Initialize the early stopping mechanism
early_stopping = EarlyStopping(patience=10, min_delta=0.0001)
train_acc_list, val_acc_list = [], []
train_loss_list, val_loss_list = [], []

epochs = 70
for epoch in range(1, epochs + 1):
        train_acc, train_loss = train( model, device, train_loader, optimizer, epoch)
        Val_acc, Val_loss = Validation(model, device, val_loader, epoch)
        train_acc_list.append(train_acc), val_acc_list.append(Val_acc)
        train_loss_list.append(train_loss), val_loss_list.append(Val_loss)
        scheduler.step(train_loss)
        early_stopping(train_loss)
        if early_stopping.early_stop:
             print("Early stopping triggered. Stopping training.")
             break



In [None]:
# save training loss and accuracy
np.save('./Final_models/RST/train_loss_RST_{}.npy'.format(option), np.array(train_loss_list))
np.save('./Final_models/RST/train_accuracy_RST_{}.npy'.format(option), np.array(train_acc_list))
np.save('./Final_models//RST/val_loss_RST_{}.npy'.format(option), np.array(val_loss_list))
np.save('./Final_models/RST/val_accuracy_RST_{}.npy'.format(option), np.array(val_acc_list))
torch.save(model.state_dict(), "./Final_models/RST/transformer_RST_{}.pth".format(option))

In [None]:
# Sparsemax hyperparameter
alpha = 2 # must be greater than 1

# Setup hyperparameter
d_model = 64
nhead = 4
num_encoder_layers = 2
dim_feedforward = 64

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize the model
model = TimeseriesTransformer(num_features, out_features,d_model, nhead, num_encoder_layers,dim_feedforward,alpha).to(device)

criterion=nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr= 1e-4) 

# Load model weights
model.load_state_dict(torch.load("./Final_models/RST/transformer_RST_{}.pth".format(option), weights_only=True))

In [None]:
model.eval()
test_loss = 0
correct = 0
accuracy_list = []
with torch.no_grad():
    data, target = next(iter(test_loader))
    data, target = data.to(device), target.type(torch.LongTensor).to(device)
    data = data.permute(1, 0, 2)
    output = model(data)
    test_loss += criterion(output, target).item()  # sum up batch loss
    pred = output.argmax(dim=1, keepdim=True)
    correct = pred.eq(target.view_as(pred)).sum().item()
    accuracy = correct / target.shape[0]
    accuracy_list.append(accuracy)

    print('Test set: Loss: {:.6f}, Accuracy: {:.3f}%, Correct: [{:.0f}/{:.0f}]\n'.format(
        test_loss, accuracy*100, correct, data.size(1)))

In [None]:
cm = confusion_matrix(target.cpu().numpy(), output.argmax(dim=1, keepdim=True).cpu().numpy())

#plot confusion matrix
plt.figure(figsize=(15, 12))
sns.heatmap(cm, annot=True, fmt='g')
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.title('Confusion matrix for RST Transformer')
plt.xticks(ticks=[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5], labels=['No Attack','Attack_Bus_15','Attack_Bus_18','Attack_Bus_19',
                                      'Attack_Bus_20','Attack_Bus_21', 'Attack_Bus_23',
                                      'Attack_Bus_24','Attack_Bus_26','Attack_Bus_29',
                                      'Attack_Bus_30'])
plt.yticks(ticks=[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5], labels=['No Attack','Attack_Bus_15','Attack_Bus_18','Attack_Bus_19',
                                      'Attack_Bus_20','Attack_Bus_21', 'Attack_Bus_23',
                                      'Attack_Bus_24','Attack_Bus_26','Attack_Bus_29',
                                      'Attack_Bus_30'])
plt.savefig('Final_models/RST_confusion_matrix_{}.png'.format(option), dpi=500, bbox_inches='tight')
plt.show()

In [None]:
print(classification_report(target.cpu().numpy(), output.argmax(dim=1, keepdim=True).cpu().numpy(), zero_division=0))

In [None]:
# False positive rate
fp = cm.sum(axis=0) - np.diag(cm)
fn = cm.sum(axis=1) - np.diag(cm)
tp = np.diag(cm)
tn = cm.sum() - (fp + fn + tp)

fpr = fp/(fp + tn)
print("False Positive Rate\n No Attack: {:.3f}%, Bus15: {:.3f}%, Bus18: {:.3f}%, Bus19: {:.3f}%, Bus20: {:.3f}%, Bus21: {:.3f}%, "
"Bus23: {:.3f}%, Bus24: {:.3f}%, Bus26: {:.3f}%, Bus29: {:.3f}%,Bus30: {:.3f}%".format(fpr[0]*100,
                                                        fpr[1]*100,fpr[2]*100, fpr[3]*100, fpr[4]*100, fpr[5]*100, fpr[6]*100, fpr[7]*100, fpr[8]*100, fpr[9]*100, fpr[10]*100))

# False negative rate
fnr = fn/(fn + tp)
print("False Negative Rate\n No Attack: {:.3f}%, Bus15: {:.3f}%, Bus18: {:.3f}%, Bus19: {:.3f}%, Bus20: {:.3f}%, Bus21: {:.3f}%, "
"Bus23: {:.3f}%, Bus24: {:.3f}%, Bus26: {:.3f}%, Bus29: {:.3f}%,Bus30: {:.3f}%".format(fnr[0]*100,
                                                        fnr[1]*100,fnr[2]*100, fnr[3]*100, fnr[4]*100, fnr[5]*100, fnr[6]*100, fnr[7]*100, fnr[8]*100, fnr[9]*100, fnr[10]*100))

In [None]:
# Plot avg accuracy and loss
plt.grid()
plt.plot(train_acc_list, label=r"Training accuracy")
plt.plot(val_acc_list, label=r"Validation accuracy")
plt.xlabel(r"Epoch $\longrightarrow$")
plt.ylabel(r"Accuracy $\longrightarrow$");
plt.legend(loc='best')
plt.title(r"Average accuracy per epoch - RST");

In [None]:
# Plot avg loss
plt.grid()
plt.plot(train_loss_list, label=r"Training loss")
plt.plot(val_loss_list, label=r"Validation loss")
plt.xlabel(r"Epoch $\longrightarrow$")
plt.ylabel(r"Loss $\longrightarrow$");
plt.legend(loc='best');
plt.title(r"Average loss per epoch - RST");