In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", DEVICE)

X_PATH = "processed/preprocessed_X_seq.npy"
Y_PATH = "processed/preprocessed_y_seq.npy"

BATCH_SIZE = 32
EPOCHS = 15
LR = 1e-3
SEQ_LEN = 32

os.makedirs("results", exist_ok=True)


Using device: cuda


In [2]:
X_seq = np.load(X_PATH)
y_seq = np.load(Y_PATH)

print("X:", X_seq.shape)
print("y:", y_seq.shape)

def create_splits(X, y, train_ratio=0.7, val_ratio=0.15):
    N = X.shape[0]
    n_train = int(N * train_ratio)
    n_val = int(N * val_ratio)
    return (
        (X[:n_train], y[:n_train]),
        (X[n_train:n_train+n_val], y[n_train:n_train+n_val]),
        (X[n_train+n_val:], y[n_train+n_val:])
    )

(X_train, y_train), (X_val, y_val), (X_test, y_test) = create_splits(X_seq, y_seq)
print(len(X_train), len(X_val), len(X_test))


X: (78773, 32, 52)
y: (78773, 32)
55141 11815 11817


In [3]:
class FlowDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)

train_loader = DataLoader(FlowDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(FlowDataset(X_val, y_val),     batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(FlowDataset(X_test, y_test),   batch_size=BATCH_SIZE, shuffle=False)


In [4]:
class SimpleTransformerIDS(nn.Module):
    def __init__(self, in_dim, emb_dim=128, n_heads=4, n_layers=2, num_classes=2, max_seq_len=32):
        super().__init__()
        self.embed = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.ReLU(),
            nn.Linear(128, emb_dim)
        )
        self.pos = nn.Embedding(max_seq_len, emb_dim)
        encoder = nn.TransformerEncoderLayer(d_model=emb_dim, nhead=n_heads,
                                             dim_feedforward=256, dropout=0.1,
                                             batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder, num_layers=n_layers)
        self.classifier = nn.Linear(emb_dim, num_classes)

    def forward(self, x):
        B, L, _ = x.shape
        x = self.embed(x)
        pos = torch.arange(L, device=x.device).unsqueeze(0).expand(B, L)
        x = x + self.pos(pos)
        x = self.encoder(x)
        return self.classifier(x)


In [5]:
flat = y_train.reshape(-1)
counts = np.bincount(flat, minlength=2).astype(float)
weights = counts.sum() / (2 * counts + 1e-8)
class_weights = torch.tensor(weights, device=DEVICE, dtype=torch.float32)
print("Class weights:", class_weights)


Class weights: tensor([0.5757, 3.8037], device='cuda:0')


In [6]:
model = SimpleTransformerIDS(in_dim=X_seq.shape[-1]).to(DEVICE)
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

best_val_acc = -1
best_model = "results/transformer_best.pth"

def acc_fn(logits, y):
    preds = logits.argmax(dim=-1)
    return (preds == y).float().mean().item()

for epoch in range(1, EPOCHS+1):
    model.train()
    tl, ta = 0, 0
    for xb, yb in train_loader:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        logits = model(xb)
        loss = criterion(logits.view(-1,2), yb.view(-1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        tl += loss.item()
        ta += acc_fn(logits, yb)
    tl /= len(train_loader); ta /= len(train_loader)

    model.eval()
    vl, va = 0, 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            logits = model(xb)
            loss = criterion(logits.view(-1,2), yb.view(-1))
            vl += loss.item()
            va += acc_fn(logits, yb)
    vl /= len(val_loader); va /= len(val_loader)

    if va > best_val_acc:
        best_val_acc = va
        torch.save(model.state_dict(), best_model)
        print(f"ðŸ’¾ best model saved at epoch {epoch} (Val Acc={va:.4f})")

    print(f"Epoch {epoch}/{EPOCHS} | Train Loss={tl:.4f} Acc={ta:.4f} | Val Loss={vl:.4f} Acc={va:.4f}")

print("Training complete.")


ðŸ’¾ best model saved at epoch 1 (Val Acc=0.7683)
Epoch 1/15 | Train Loss=0.0480 Acc=0.9867 | Val Loss=1.4317 Acc=0.7683
Epoch 2/15 | Train Loss=0.0292 Acc=0.9921 | Val Loss=1.9517 Acc=0.6787
ðŸ’¾ best model saved at epoch 3 (Val Acc=0.8042)
Epoch 3/15 | Train Loss=0.0242 Acc=0.9933 | Val Loss=1.2580 Acc=0.8042
Epoch 4/15 | Train Loss=0.0230 Acc=0.9935 | Val Loss=2.0830 Acc=0.6350
Epoch 5/15 | Train Loss=0.0246 Acc=0.9932 | Val Loss=1.8103 Acc=0.7241
Epoch 6/15 | Train Loss=0.0256 Acc=0.9928 | Val Loss=1.7197 Acc=0.7350
Epoch 7/15 | Train Loss=0.0249 Acc=0.9927 | Val Loss=1.3836 Acc=0.7601
Epoch 8/15 | Train Loss=0.0253 Acc=0.9925 | Val Loss=2.0902 Acc=0.7336
Epoch 9/15 | Train Loss=0.0274 Acc=0.9927 | Val Loss=1.8016 Acc=0.7304
Epoch 10/15 | Train Loss=0.0240 Acc=0.9934 | Val Loss=3.5309 Acc=0.5696
Epoch 11/15 | Train Loss=0.0252 Acc=0.9930 | Val Loss=2.0854 Acc=0.7076
Epoch 12/15 | Train Loss=0.0466 Acc=0.9899 | Val Loss=2.1567 Acc=0.6846
Epoch 13/15 | Train Loss=0.0295 Acc=0.9915 | 

In [7]:
model.load_state_dict(torch.load(best_model))
model.eval()

y_true = []
y_pred = []

with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(DEVICE)
        logits = model(xb)
        preds = logits.argmax(dim=-1)
        y_true.extend(yb.numpy().reshape(-1))
        y_pred.extend(preds.cpu().numpy().reshape(-1))

acc  = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred, zero_division=0)
rec  = recall_score(y_true, y_pred, zero_division=0)
f1   = f1_score(y_true, y_pred, zero_division=0)

print("Transformer Test Metrics:")
print("Accuracy :", acc)
print("Precision:", prec)
print("Recall   :", rec)
print("F1-score :", f1)

import pandas as pd
df = pd.DataFrame([[ "Transformer", acc, prec, rec, f1 ]],
                  columns=["Model","Accuracy","Precision","Recall","F1"])
df.to_csv("results/Transformer.csv", index=False)
df


Transformer Test Metrics:
Accuracy : 0.9276386773292714
Precision: 0.930288637194008
Recall   : 0.5000687393208555
F1-score : 0.6504783680559991


Unnamed: 0,Model,Accuracy,Precision,Recall,F1
0,Transformer,0.927639,0.930289,0.500069,0.650478
