In [7]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader


In [8]:
csv_path = "../data/UNSW-NB15/UNSW_NB15_training-set.csv"
df = pd.read_csv(csv_path)
df.head()


Unnamed: 0,id,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,...,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat,label
0,1,1.1e-05,udp,-,INT,2,0,496,0,90909.0902,...,1,2,0,0,0,1,2,0,Normal,0
1,2,8e-06,udp,-,INT,2,0,1762,0,125000.0003,...,1,2,0,0,0,1,2,0,Normal,0
2,3,5e-06,udp,-,INT,2,0,1068,0,200000.0051,...,1,3,0,0,0,1,3,0,Normal,0
3,4,6e-06,udp,-,INT,2,0,900,0,166666.6608,...,1,3,0,0,0,2,3,0,Normal,0
4,5,1e-05,udp,-,INT,2,0,2126,0,100000.0025,...,1,3,0,0,0,2,3,0,Normal,0


In [9]:
feature_cols = [
    "dur",
    "spkts",
    "dpkts",
    "sbytes",
    "dbytes",
    "rate",
    "sttl",
    "dttl",
    "sload",
    "dload",
    "sloss",
    "dloss",
    "sinpkt",
    "dinpkt",
    "smean",
    "dmean",
    "ct_srv_src",
    "ct_state_ttl",
    "ct_dst_ltm",
    "ct_src_dport_ltm"
]

label_col = "label"

X = df[feature_cols].astype("float32").values
y = df[label_col].astype("int64").values

X.shape, y.shape

((82332, 20), (82332,))

In [10]:
X_train_np, X_val_np, y_train_np, y_val_np = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_np)
X_val_scaled = scaler.transform(X_val_np)

X_train_scaled.shape, X_val_scaled.shape


((65865, 20), (16467, 20))

In [11]:
X_train_tensor = torch.from_numpy(X_train_scaled).float()
y_train_tensor = torch.from_numpy(y_train_np).float()

X_val_tensor = torch.from_numpy(X_val_scaled).float()
y_val_tensor = torch.from_numpy(y_val_np).float()

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

batch_size = 256

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [12]:
class TranWallNet(nn.Module):
    def __init__(self, input_dim, d_model=64, nhead=8, num_layers=2, dim_feedforward=128, dropout=0.1):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.cls_head = nn.Sequential(
            nn.Linear(d_model, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        x = self.input_proj(x)
        x = x.unsqueeze(1)
        x = self.transformer(x)
        x = x[:, 0, :]
        logits = self.cls_head(x)
        return logits.squeeze(1)


In [13]:
input_dim = X_train_tensor.shape[1]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

model = TranWallNet(input_dim=input_dim).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [15]:
num_epochs = 25
best_f1 = 0
patience = 5
patience_counter = 0
for epoch in range(1, num_epochs + 1):
    model.train()
    train_losses = []
    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)

        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    model.eval()
    val_losses = []
    all_logits = []
    all_labels = []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            yb = yb.to(device)
            logits = model(xb)
            loss = criterion(logits, yb)
            val_losses.append(loss.item())
            all_logits.append(logits.cpu())
            all_labels.append(yb.cpu())

    all_logits = torch.cat(all_logits)
    all_labels = torch.cat(all_labels)
    probs = torch.sigmoid(all_logits)
    preds = (probs >= 0.5).long()

    acc = accuracy_score(all_labels.numpy(), preds.numpy())
    f1 = f1_score(all_labels.numpy(), preds.numpy())

    if f1 > best_f1:
        best_f1 = f1
        patience_counter = 0  # reset counter when improvement occurs
    else:
        patience_counter += 1

    if patience_counter == patience:
        print("Early stopping triggered!")
        break


    print(f"Epoch {epoch}: train_loss={np.mean(train_losses):.4f} val_loss={np.mean(val_losses):.4f} acc={acc:.4f} f1={f1:.4f}")


Epoch 1: train_loss=0.1839 val_loss=0.1808 acc=0.9126 f1=0.9168
Epoch 2: train_loss=0.1843 val_loss=0.1752 acc=0.9136 f1=0.9190
Epoch 3: train_loss=0.1808 val_loss=0.1788 acc=0.9122 f1=0.9163
Epoch 4: train_loss=0.1802 val_loss=0.1825 acc=0.9076 f1=0.9120
Epoch 5: train_loss=0.1808 val_loss=0.1822 acc=0.9093 f1=0.9128
Epoch 6: train_loss=0.1805 val_loss=0.1752 acc=0.9127 f1=0.9178
Epoch 7: train_loss=0.1792 val_loss=0.1723 acc=0.9172 f1=0.9217
Epoch 8: train_loss=0.1775 val_loss=0.1836 acc=0.9083 f1=0.9140
Epoch 9: train_loss=0.1805 val_loss=0.1775 acc=0.9125 f1=0.9168
Epoch 10: train_loss=0.1781 val_loss=0.1731 acc=0.9144 f1=0.9182
Epoch 11: train_loss=0.1767 val_loss=0.1728 acc=0.9142 f1=0.9178
Early stopping triggered!


In [16]:
model.eval()
all_logits = []
all_labels = []

with torch.no_grad():
    for xb, yb in val_loader:
        xb = xb.to(device)
        yb = yb.to(device)
        logits = model(xb)
        all_logits.append(logits.cpu())
        all_labels.append(yb.cpu())

all_logits = torch.cat(all_logits)
all_labels = torch.cat(all_labels)
probs = torch.sigmoid(all_logits)
preds = (probs >= 0.5).long()

val_acc = accuracy_score(all_labels.numpy(), preds.numpy())
val_f1 = f1_score(all_labels.numpy(), preds.numpy())

val_acc, val_f1


(0.9167425760612133, 0.9204248650531082)

In [18]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="max", factor=0.5, patience=2
)

best_f1 = -float("inf")
best_state = None
patience = 5
pat = 0

for epoch in range(1, 30):
    model.train()
    running_loss = 0.0
    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)

        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        running_loss += loss.item() * xb.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)

    model.eval()
    all_probs = []
    all_targets = []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            yb = yb.to(device)
            logits = model(xb)
            probs = torch.sigmoid(logits).cpu().numpy()
            all_probs.append(probs)
            all_targets.append(yb.cpu().numpy())
    all_probs = np.concatenate(all_probs)
    all_targets = np.concatenate(all_targets)

    preds = (all_probs >= 0.5).astype(int)
    acc = accuracy_score(all_targets, preds)
    val_f1 = f1_score(all_targets, preds)

    print(f"Epoch {epoch} - loss: {epoch_loss:.4f} acc: {acc:.4f}  val_f1: {val_f1:.4f}")

    if val_f1 > best_f1:
        best_f1 = val_f1
        best_state = model.state_dict()
        pat = 0
    else:
        pat += 1
        if pat >= patience:
            print("Early stopping.")
            break

if best_state is None:
    best_state = model.state_dict()

model.load_state_dict(best_state)

Epoch 1 - loss: 0.1732 acc: 0.9165  val_f1: 0.9205
Epoch 2 - loss: 0.1719 acc: 0.9099  val_f1: 0.9162
Epoch 3 - loss: 0.1719 acc: 0.9166  val_f1: 0.9231
Epoch 4 - loss: 0.1706 acc: 0.9168  val_f1: 0.9228
Epoch 5 - loss: 0.1695 acc: 0.9158  val_f1: 0.9199
Epoch 6 - loss: 0.1696 acc: 0.9172  val_f1: 0.9211
Epoch 7 - loss: 0.1693 acc: 0.9181  val_f1: 0.9220
Epoch 8 - loss: 0.1673 acc: 0.9214  val_f1: 0.9257
Epoch 9 - loss: 0.1683 acc: 0.9187  val_f1: 0.9227
Epoch 10 - loss: 0.1678 acc: 0.9192  val_f1: 0.9236
Epoch 11 - loss: 0.1673 acc: 0.9180  val_f1: 0.9230
Epoch 12 - loss: 0.1663 acc: 0.9186  val_f1: 0.9239
Epoch 13 - loss: 0.1670 acc: 0.9143  val_f1: 0.9196
Early stopping.


<All keys matched successfully>

In [19]:
save_path = "model/tranwall.pt"

os.makedirs("model", exist_ok=True)

checkpoint = {
    "model_state_dict": model.state_dict(),
    "input_dim": input_dim,
    "scaler_mean": scaler.mean_,
    "scaler_scale": scaler.scale_
}

torch.save(checkpoint, save_path)

save_path


'model/tranwall.pt'