In [4]:
import torch
print(torch.__version__)
print(torch.version.cuda)
import platform
print(platform.system(), platform.release())

2.9.0+cu126
12.6
Linux 6.6.105+


In [8]:
!pip uninstall -y torch torchvision torchaudio

[0m

In [1]:
!pip install torch==2.3.1+cu118 torchvision==0.18.1+cu118 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu118


Looking in indexes: https://download.pytorch.org/whl/cu118


In [2]:
!pip install torch-scatter -f https://data.pyg.org/whl/torch-2.3.1+cu118.html
!pip install torch-sparse  -f https://data.pyg.org/whl/torch-2.3.1+cu118.html
!pip install torch-cluster -f https://data.pyg.org/whl/torch-2.3.1+cu118.html
!pip install torch-spline-conv -f https://data.pyg.org/whl/torch-2.3.1+cu118.html
!pip install torch-geometric

Looking in links: https://data.pyg.org/whl/torch-2.3.1+cu118.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.3.0%2Bcu118/torch_scatter-2.1.2%2Bpt23cu118-cp312-cp312-linux_x86_64.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m45.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt23cu118
Looking in links: https://data.pyg.org/whl/torch-2.3.1+cu118.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.3.0%2Bcu118/torch_sparse-0.6.18%2Bpt23cu118-cp312-cp312-linux_x86_64.whl (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt23cu118
Looking in links: https://data.pyg.org/whl/torch-2.3.1+cu118.html
Collecting torch-cluster
  Downloading https://data.p

In [4]:
# src/gnn_model.py
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.data import Data
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    roc_auc_score, precision_recall_curve, auc,
    accuracy_score, f1_score
)

# ============================= CONFIG =============================
DATA_PROCESSED = "/content"   # adjust if needed
RESULTS_DIR = "results"
MODELS_DIR = "models"
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(MODELS_DIR, exist_ok=True)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

WINDOW_SIZE = 10
STRIDE = 5
BATCH_SIZE = 16
EPOCHS = 80
MAX_ATTACK_RATIO = 0.30

# ============================= GRAPH BUILDER =============================
def build_graphs_with_ratio(df: pd.DataFrame):
    graphs = []
    label_col = "label_binary"

    feature_cols = [
        'duration', 'src_bytes', 'dst_bytes', 'count',
        'same_srv_rate', 'diff_srv_rate',
        'serror_rate', 'rerror_rate'
    ]

    for start in range(0, len(df) - WINDOW_SIZE + 1, STRIDE):
        win = df.iloc[start:start + WINDOW_SIZE].copy()
        if len(win) < 8:
            continue

        # Node identifiers
        win['src_node'] = win['service'].astype(str) + '_src'
        win['dst_node'] = win['service'].astype(str) + '_dst'

        nodes = pd.unique(win[['src_node', 'dst_node']].values.ravel('K'))
        if len(nodes) < 2:
            continue

        node2idx = {n: i for i, n in enumerate(nodes)}

        edge_index = []
        edge_attr = []

        for _, row in win.iterrows():
            src = node2idx[row['src_node']]
            dst = node2idx[row['dst_node']]
            edge_index.append([src, dst])
            edge_attr.append([float(row[c]) for c in feature_cols])

        edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
        edge_attr = torch.tensor(edge_attr, dtype=torch.float)

        # -------- Node features (aggregation) --------
        node_features = []
        for node in nodes:
            mask = (win['src_node'] == node) | (win['dst_node'] == node)
            node_data = win[mask]

            if len(node_data) == 0:
                feats = [0.0] * len(feature_cols)
            else:
                feats = [node_data[c].mean() for c in feature_cols]

            # extra node-level features
            feats.append(len(node_data))                     # degree proxy
            feats.append(1.0 if 'http' in str(node) else 0.0)

            node_features.append(feats)

        x = torch.tensor(node_features, dtype=torch.float)

        attack_ratio = (win[label_col] == "attack").mean()
        y = 1 if attack_ratio > 0.5 else 0

        graph = Data(
            x=x,
            edge_index=edge_index,
            edge_attr=edge_attr,
            y=torch.tensor([y], dtype=torch.float)
        )

        graphs.append((graph, attack_ratio))

    return graphs

# ============================= NODE SCALING FIX =============================
def scale_graph_node_features(graphs, scaler=None, fit=False):
    all_x = torch.cat([g.x for g in graphs], dim=0).cpu().numpy()

    if fit:
        scaler = StandardScaler()
        scaler.fit(all_x)

    for g in graphs:
        g.x = torch.tensor(
            scaler.transform(g.x.cpu().numpy()),
            dtype=torch.float
        )

    return scaler

# ============================= MODEL =============================
class GNNAnomalyDetector(nn.Module):
    def __init__(self, input_dim, hidden=128, layers=3):
        super().__init__()
        self.convs = nn.ModuleList(
            [GCNConv(input_dim, hidden)] +
            [GCNConv(hidden, hidden) for _ in range(layers - 1)]
        )
        self.lin1 = nn.Linear(hidden, hidden // 2)
        self.lin2 = nn.Linear(hidden // 2, 1)
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(0.4)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        for conv in self.convs:
            x = self.drop(self.relu(conv(x, edge_index)))
        x = global_mean_pool(x, batch)
        x = self.drop(self.relu(self.lin1(x)))
        return self.lin2(x).squeeze(-1)

# ============================= TRAINING & EVAL =============================
def train_gnn():
    print("Loading processed data...")
    train_df = pd.read_csv(f"{DATA_PROCESSED}/train_processed.csv")
    test_df  = pd.read_csv(f"{DATA_PROCESSED}/test_processed.csv")

    # -------- Build graphs --------
    print("Building training graphs (normal only)...")
    train_graphs_with_ratio = build_graphs_with_ratio(
        train_df[train_df["label_binary"] == "normal"]
    )
    train_graphs = [g for g, _ in train_graphs_with_ratio]

    print("Building test graphs...")
    test_graphs_with_ratio = build_graphs_with_ratio(test_df)
    test_graphs = [g for g, _ in test_graphs_with_ratio]

    clean_windows = [g for g, r in test_graphs_with_ratio if r <= MAX_ATTACK_RATIO][:100]
    attack_windows = [g for g in test_graphs if g.y.item() == 1][:600]
    final_test_graphs = clean_windows + attack_windows

    print(f"Train graphs: {len(train_graphs)}")
    print(f"Test graphs : {len(final_test_graphs)}")

    # -------- SCALE NODE FEATURES (FIX) --------
    print("Scaling node features (graph-level)...")
    node_scaler = scale_graph_node_features(train_graphs, fit=True)
    _ = scale_graph_node_features(final_test_graphs, scaler=node_scaler)

    # -------- DataLoaders --------
    train_loader = DataLoader(train_graphs, batch_size=BATCH_SIZE, shuffle=True)
    test_loader  = DataLoader(final_test_graphs, batch_size=BATCH_SIZE, shuffle=False)

    input_dim = train_graphs[0].x.shape[1]
    model = GNNAnomalyDetector(input_dim=input_dim).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-5)
    criterion = nn.BCEWithLogitsLoss()

    # -------- TRAIN --------
    print("Starting GNN training...")
    for epoch in range(1, EPOCHS + 1):
        model.train()
        loss_sum = 0.0

        for data in train_loader:
            data = data.to(DEVICE)
            data.y = torch.zeros(data.num_graphs, device=DEVICE)

            logits = model(data)
            loss = criterion(logits, data.y)

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            loss_sum += loss.item()

        if epoch == 1 or epoch % 20 == 0:
            print(f"Epoch {epoch:3d} | Loss: {loss_sum / len(train_loader):.6f}")

    torch.save(model.state_dict(), f"{MODELS_DIR}/gnn_detector.pth")
    print("Model saved.")

    # -------- EVAL --------
    model.eval()
    scores, labels = [], []

    with torch.no_grad():
        train_scores = []
        for data in train_loader:
            data = data.to(DEVICE)
            train_scores.extend(torch.sigmoid(model(data)).cpu().numpy())

        threshold = np.percentile(train_scores, 95)

        for data in test_loader:
            data = data.to(DEVICE)
            scores.extend(torch.sigmoid(model(data)).cpu().numpy())
            labels.extend(data.y.cpu().numpy())

    scores = np.array(scores)
    labels = np.array(labels).astype(int)
    preds = (scores > threshold).astype(int)

    roc = roc_auc_score(labels, scores)
    p, r, _ = precision_recall_curve(labels, scores)
    pr = auc(r, p)

    print("\n" + "="*60)
    print("GNN Anomaly Detector Results")
    print("="*60)
    print(f"ROC-AUC       : {roc:.6f}")
    print(f"PR-AUC        : {pr:.6f}")
    print(f"Accuracy      : {accuracy_score(labels, preds):.6f}")
    print(f"F1 Score      : {f1_score(labels, preds):.6f}")
    print(f"Precision@10% : {np.mean(labels[np.argsort(scores)[-max(1,int(0.1*len(scores))):]]):.6f}")
    print(f"Threshold     : {threshold:.6f}")
    print("="*60)

    print("GNN training and evaluation completed successfully!")

# ============================= RUN =============================
if __name__ == "__main__":
    train_gnn()

Using device: cuda
Loading processed data...
Building training graphs (normal only)...
Building test graphs...
Train graphs: 13467
Test graphs : 700
Scaling node features (graph-level)...
Starting GNN training...
Epoch   1 | Loss: 0.017876
Epoch  20 | Loss: 0.000003
Epoch  40 | Loss: 0.000002
Epoch  60 | Loss: 0.000002
Epoch  80 | Loss: 0.000002
Model saved.

GNN Anomaly Detector Results
ROC-AUC       : 0.775783
PR-AUC        : 0.953251
Accuracy      : 0.677143
F1 Score      : 0.777120
Precision@10% : 0.985714
Threshold     : 0.000002
GNN training and evaluation completed successfully!
