# 1. Install dependencies


In [1]:
!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-2.8.0+cpu.html


Looking in links: https://data.pyg.org/whl/torch-2.8.0+cpu.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.8.0%2Bcpu/torch_scatter-2.1.2%2Bpt28cpu-cp312-cp312-linux_x86_64.whl (645 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m645.6/645.6 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.8.0%2Bcpu/torch_sparse-0.6.18%2Bpt28cpu-cp312-cp312-linux_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-cluster
  Downloading https://data.pyg.org/whl/torch-2.8.0%2Bcpu/torch_cluster-1.6.3%2Bpt28cpu-cp312-cp312-linux_x86_64.whl (749 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m749.6/749.6 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-spline-conv
  Downloading https://data.pyg.org/whl/torch-2.8.0%2Bcpu/torch_spline_conv-1.2.

# 2. Import packages

In [2]:
import os
os.chdir("/content/drive/MyDrive/Colab Notebooks/DLI")

import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import softmax
from torch_geometric.data import Data
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.feature_selection import mutual_info_classif
from nsl_preprocessing import UniversalNSLKDDPreprocessor
from tqdm import tqdm


# 3. Data Preprocessing

In [3]:
class PaperStylePreprocessor:
    def __init__(self, target_samples=10000, keep_most_informative=0.8):
        self.target_samples = target_samples
        self.keep_most_informative = keep_most_informative

    def preprocess(self, X_train, y_train, X_test, y_test):
        # Convert to numpy
        if hasattr(X_train, 'values'):
            X_train = X_train.values
        if hasattr(y_train, 'values'):
            y_train = y_train.values
        if hasattr(X_test, 'values'):
            X_test = X_test.values
        if hasattr(y_test, 'values'):
            y_test = y_test.values

        # Downsample training data
        if len(X_train) > self.target_samples:
            unique_classes, counts = np.unique(y_train, return_counts=True)
            sample_ratio = self.target_samples / len(X_train)

            indices = []
            for cls in unique_classes:
                cls_indices = np.where(y_train == cls)[0]
                n_samples = max(100, int(len(cls_indices) * sample_ratio))
                if n_samples <= len(cls_indices):
                    selected = np.random.choice(cls_indices, n_samples, replace=False)
                else:
                    selected = cls_indices
                indices.extend(selected)

            indices = np.array(indices)
            X_train = X_train[indices]
            y_train = y_train[indices]

        # Keep most informative features
        mi_scores = mutual_info_classif(X_train, y_train, random_state=42)
        n_keep = int(len(mi_scores) * self.keep_most_informative)
        most_informative_idx = np.argsort(mi_scores)[-n_keep:]

        X_train = X_train[:, most_informative_idx]
        X_test = X_test[:, most_informative_idx]

        return X_train, y_train, X_test, y_test

# Load and preprocess data
print("=== STEP 1: LOADING AND PREPROCESSING DATA ===")
train_path = "KDDTrain+.txt"
test_path = "KDDTest+.txt"

preprocessor = UniversalNSLKDDPreprocessor(
    n_features=30,
    sampling_strategy='none',
    scaler_type='robust'
)
X_train, y_train, X_test, y_test = preprocessor.fit_transform(train_path, test_path)

# Then: Paper-style preprocessing
paper_preprocessor = PaperStylePreprocessor()
X_train, y_train, X_test, y_test = paper_preprocessor.preprocess(X_train, y_train, X_test, y_test)

print(f"Final data shapes: Train {X_train.shape}, Test {X_test.shape}")
print(f"Class balance: Normal {np.sum(y_train==0)}, Attack {np.sum(y_train==1)}")

=== STEP 1: LOADING AND PREPROCESSING DATA ===
=== Universal NSL-KDD Preprocessing ===
1. Loading and cleaning data...
   Train: (125973, 40), Test: (22544, 40)
2. Engineering features...
   After engineering: (125973, 54)
3. Handling outliers...
4. Encoding categorical features...
5. Scaling features...
6. Selecting top 30 features...
Selected 30 features using consensus:
   1. service_freq
   2. flag_freq
   3. su_attempted
   4. protocol_type
   5. service
   6. flag
   7. src_bytes
   8. dst_bytes
   9. land
  10. wrong_fragment
  11. urgent
  12. hot
  13. num_failed_logins
  14. logged_in
  15. num_compromised
  16. root_shell
  17. duration
  18. num_root
  19. num_file_creations
  20. num_shells
  21. num_access_files
  22. is_guest_login
  23. count
  24. srv_count
  25. serror_rate
  26. srv_serror_rate
  27. rerror_rate
  28. srv_rerror_rate
  29. same_srv_rate
  30. diff_srv_rate
   Final: Train (125973, 30)
          Test (22544, 30)
   Class balance: Normal 67343, Attack 

# 4. Graph Building

In [4]:
def build_graph_fast(X, y, k=15, metric="euclidean"):
    if hasattr(X, "values"):
        X = X.values
    if hasattr(y, "values"):
        y = y.values

    n_samples = len(X)
    k = min(k, n_samples - 1)

    print(f"Building kNN graph (k={k}) for {n_samples} samples...")
    knn = NearestNeighbors(n_neighbors=k, metric=metric, n_jobs=-1)
    knn.fit(X)
    dists, indices = knn.kneighbors(X)

    # Create edges
    src = np.repeat(np.arange(n_samples), k)
    dst = indices.flatten()

    # Filter valid edges
    valid_mask = (src < n_samples) & (dst < n_samples) & (src != dst)
    src = src[valid_mask]
    dst = dst[valid_mask]

    edge_index = torch.tensor(np.vstack([src, dst]), dtype=torch.long)

    # RBF edge weights
    valid_dists = dists.flatten()[valid_mask]
    sigma = np.median(valid_dists[valid_dists > 0]) if len(valid_dists[valid_dists > 0]) > 0 else 1.0
    rbf_weights = np.exp(-valid_dists**2 / (2 * sigma**2))
    edge_weight = torch.tensor(rbf_weights, dtype=torch.float)

    x = torch.tensor(X, dtype=torch.float)
    y = torch.tensor(y, dtype=torch.long)

    print(f"Graph built: {x.shape[0]} nodes, {edge_index.shape[1]} edges")
    return Data(x=x, edge_index=edge_index, edge_attr=edge_weight, y=y)

def apply_graph_augmentation(graph, edge_perturb_rate=0.1, feature_mask_rate=0.2):
    num_nodes = graph.x.shape[0]

    # Edge perturbation
    num_edges = graph.edge_index.shape[1]
    num_perturb = int(num_edges * edge_perturb_rate)

    if num_perturb > 0:
        perturb_idx = torch.randperm(num_edges)[:num_perturb]
        perturb_edges = graph.edge_index[:, perturb_idx]

        valid_edge_mask = (perturb_edges[0] < num_nodes) & (perturb_edges[1] < num_nodes)
        perturb_edges = perturb_edges[:, valid_edge_mask]

        if perturb_edges.shape[1] > 0:
            new_edge_index = torch.cat([graph.edge_index, perturb_edges], dim=1)
            perturb_weights = graph.edge_attr[perturb_idx[valid_edge_mask]]
            new_edge_weight = torch.cat([graph.edge_attr, perturb_weights])
        else:
            new_edge_index = graph.edge_index
            new_edge_weight = graph.edge_attr
    else:
        new_edge_index = graph.edge_index
        new_edge_weight = graph.edge_attr

    # Feature masking
    x_aug = graph.x.clone()
    num_features = x_aug.shape[1]
    num_mask = int(num_features * feature_mask_rate)

    if num_mask > 0:
        for i in range(num_nodes):
            mask_idx = torch.randperm(num_features)[:num_mask]
            x_aug[i, mask_idx] = 0

    return Data(x=x_aug, edge_index=new_edge_index, edge_attr=new_edge_weight, y=graph.y)

print("\n=== STEP 2: BUILDING GRAPHS ===")
train_graph = build_graph_fast(X_train, y_train, k=15)
test_graph = build_graph_fast(X_test, y_test, k=15)


=== STEP 2: BUILDING GRAPHS ===
Building kNN graph (k=15) for 9999 samples...
Graph built: 9999 nodes, 140545 edges
Building kNN graph (k=15) for 22544 samples...
Graph built: 22544 nodes, 319048 edges


# 5. Model Definition

In [5]:
from torch_geometric.nn import GATv2Conv

class ContrastiveMemoryBank:
    def __init__(self, feature_dim, memory_size=1024, temperature=0.1):
        self.memory_size = memory_size
        self.temperature = temperature
        self.feature_dim = feature_dim
        self.memory = torch.randn(memory_size, feature_dim)
        self.labels = torch.randint(0, 2, (memory_size,))
        self.ptr = 0

    def update(self, features, labels):
        batch_size = features.shape[0]
        if self.ptr + batch_size > self.memory_size:
            self.ptr = 0

        end_ptr = min(self.ptr + batch_size, self.memory_size)
        actual_batch = end_ptr - self.ptr

        self.memory[self.ptr:end_ptr] = features[:actual_batch].detach()
        self.labels[self.ptr:end_ptr] = labels[:actual_batch].detach()
        self.ptr = end_ptr % self.memory_size

    def compute_contrastive_loss(self, features, labels, margin=0.5):
        if features.shape[0] == 0:
            return torch.tensor(0.0, device=features.device, requires_grad=True)

        features = F.normalize(features, dim=-1)
        memory_features = F.normalize(self.memory, dim=-1)

        # Move memory to same device as features
        if self.memory.device != features.device:
            self.memory = self.memory.to(features.device)
            self.labels = self.labels.to(features.device)
            memory_features = memory_features.to(features.device)

        similarities = torch.mm(features, memory_features.T) / self.temperature

        contrastive_loss = torch.tensor(0.0, device=features.device, requires_grad=True)
        for i in range(features.shape[0]):
            label = labels[i]

            # Positive pairs
            pos_mask = (self.labels == label)
            if pos_mask.sum() > 0:
                pos_sim = similarities[i][pos_mask]
                pos_loss = -torch.log(torch.exp(pos_sim).sum() + 1e-8)
                contrastive_loss = contrastive_loss + pos_loss.mean()

            # Negative pairs
            neg_mask = (self.labels != label)
            if neg_mask.sum() > 0:
                neg_sim = similarities[i][neg_mask]
                neg_loss = torch.clamp(margin - neg_sim, min=0).pow(2)
                contrastive_loss = contrastive_loss + neg_loss.mean()

        return contrastive_loss / max(features.shape[0], 1)

class CAGN_GAT_Fusion(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, output_dim=2, memory_size=1024,
                 feat_dropout=0.3, attn_dropout=0.3):
        super().__init__()
        self.feat_dropout = nn.Dropout(feat_dropout)

        # CAGN branch (contrastive + attention)
        self.cagn_gat1 = GATv2Conv(input_dim, hidden_dim, heads=8, dropout=attn_dropout, concat=True,
                           edge_dim=1)
        self.cagn_gat2 = GATv2Conv(hidden_dim * 8, hidden_dim, heads=4, dropout=attn_dropout, concat=True,
                           edge_dim=1)
        self.cagn_gat3 = GATv2Conv(hidden_dim * 4, hidden_dim, heads=1, dropout=attn_dropout, concat=False,
                           edge_dim=1)

        self.cagn_norm1 = nn.LayerNorm(hidden_dim * 8)
        self.cagn_norm2 = nn.LayerNorm(hidden_dim * 4)
        self.cagn_norm3 = nn.LayerNorm(hidden_dim)

        # Standard GAT branch
        self.gat1 = GATv2Conv(input_dim, hidden_dim, heads=8, dropout=attn_dropout, concat=True,
                           edge_dim=1)
        self.gat2 = GATv2Conv(hidden_dim * 8, hidden_dim, heads=4, dropout=attn_dropout, concat=True,
                           edge_dim=1)
        self.gat3 = GATv2Conv(hidden_dim * 4, hidden_dim, heads=1, dropout=attn_dropout, concat=False,
                           edge_dim=1)

        self.gat_norm1 = nn.LayerNorm(hidden_dim * 8)
        self.gat_norm2 = nn.LayerNorm(hidden_dim * 4)
        self.gat_norm3 = nn.LayerNorm(hidden_dim)

        # Contrastive memory bank
        self.memory_bank = ContrastiveMemoryBank(hidden_dim, memory_size)

        # Fusion and output
        self.fusion_gate = nn.Linear(2 * hidden_dim, hidden_dim)
        self.classifier = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index, edge_weight=None, return_embeddings=False):
        # CAGN branch
        cagn_h = self.cagn_gat1(x, edge_index, edge_weight.unsqueeze(-1))
        cagn_h = self.cagn_norm1(cagn_h)
        cagn_h = F.elu(cagn_h)
        cagn_h = self.feat_dropout(cagn_h)

        cagn_h = self.cagn_gat2(cagn_h, edge_index, edge_weight.unsqueeze(-1))
        cagn_h = self.cagn_norm2(cagn_h)
        cagn_h = F.elu(cagn_h)
        cagn_h = self.feat_dropout(cagn_h)

        cagn_embeddings = self.cagn_gat3(cagn_h, edge_index, edge_weight)
        cagn_embeddings = self.cagn_norm3(cagn_embeddings)

        # Standard GAT branch
        gat_h = self.gat1(x, edge_index, edge_weight)
        gat_h = self.gat_norm1(gat_h)
        gat_h = F.elu(gat_h)
        gat_h = self.feat_dropout(gat_h)

        gat_h = self.gat2(gat_h, edge_index, edge_weight)
        gat_h = self.gat_norm2(gat_h)
        gat_h = F.elu(gat_h)
        gat_h = self.feat_dropout(gat_h)

        gat_embeddings = self.gat3(gat_h, edge_index, edge_weight)
        gat_embeddings = self.gat_norm3(gat_embeddings)

        # Adaptive fusion using learnable gating
        fused_input = torch.cat([cagn_embeddings, gat_embeddings], dim=-1)
        gate = torch.sigmoid(self.fusion_gate(fused_input))
        final_embeddings = gate * cagn_embeddings + (1 - gate) * gat_embeddings

        if return_embeddings:
            return final_embeddings, cagn_embeddings

        return self.classifier(final_embeddings)

# 6. Model Building

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n=== STEP 3: MODEL SETUP ===")
print(f"Using device: {device}")

model = CAGN_GAT_Fusion(
    input_dim=24,
    hidden_dim=64,
    output_dim=2,
    memory_size=1024
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=300)

def train_one_epoch(model, graph, optimizer, device, epoch, contrastive_weight=0.1, contrastive_start=10):
    model.train()
    optimizer.zero_grad()

    # Apply augmentation
    aug_graph = graph

    # Forward pass
    final_embeddings, cagn_embeddings = model(
        aug_graph.x.to(device),
        aug_graph.edge_index.to(device),
        aug_graph.edge_attr.to(device),
        return_embeddings=True
    )

    # Classification loss
    out = model.classifier(final_embeddings)
    clf_loss = F.cross_entropy(out, aug_graph.y.to(device))

    # Contrastive loss (with warm-up)
    if epoch >= contrastive_start:
        contrastive_loss = model.memory_bank.compute_contrastive_loss(
            cagn_embeddings, aug_graph.y.to(device)
        )
        total_loss = clf_loss + contrastive_weight * contrastive_loss
    else:
        contrastive_loss = torch.tensor(0.0)
        total_loss = clf_loss

    total_loss.backward()
    optimizer.step()

    # Update memory bank
    model.memory_bank.update(cagn_embeddings.detach().cpu(), aug_graph.y.cpu())

    return total_loss.item(), clf_loss.item(), contrastive_loss.item()

def evaluate(model, graph, device):
    model.eval()
    with torch.no_grad():
        out = model(graph.x.to(device), graph.edge_index.to(device), graph.edge_attr.to(device))
        preds = out.argmax(dim=-1).cpu()
        labels = graph.y.cpu()
        acc = accuracy_score(labels, preds)
        f1 = f1_score(labels, preds, average="weighted")
        precision = precision_score(labels, preds, average="weighted")
        recall = recall_score(labels, preds, average="weighted")
    return acc, f1, precision, recall



=== STEP 3: MODEL SETUP ===
Using device: cpu


# 7. Model Training

In [None]:
print(f"\n=== STEP 4: TRAINING ===")
train_losses = []
clf_losses = []
cont_losses = []
test_accs = []
test_f1s = []
epochs_list = []
best_acc, best_f1 = 0, 0
best_precision, best_recall = 0, 0

for epoch in range(1, 301):
    total_loss, clf_loss, cont_loss = train_one_epoch(
        model, train_graph, optimizer, device, epoch
    )
    acc, f1, precision, recall = evaluate(model, test_graph, device)
    scheduler.step()

    train_losses.append(total_loss)
    clf_losses.append(clf_loss)
    cont_losses.append(cont_loss)
    test_accs.append(acc)
    test_f1s.append(f1)
    epochs_list.append(epoch)

    if acc > best_acc:
        best_acc = acc
        best_f1 = f1
        best_precision = precision
        best_recall = recall

    if epoch % 50 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Total: {total_loss:.4f} | Clf: {clf_loss:.4f} | Cont: {cont_loss:.4f}")
        print(f"          | Acc: {acc:.4f} | F1: {f1:.4f} | Prec: {precision:.4f} | Rec: {recall:.4f}")

print("\n=== FINAL RESULTS ===")
print(f"Best Accuracy:  {best_acc:.4f}")
print(f"Best F1:        {best_f1:.4f}")
print(f"Best Precision: {best_precision:.4f}")
print(f"Best Recall:    {best_recall:.4f}")


=== STEP 4: TRAINING ===
Epoch 001 | Total: 0.9009 | Clf: 0.9009 | Cont: 0.0000
          | Acc: 0.7331 | F1: 0.7298 | Prec: 0.7894 | Rec: 0.7331


# 8. Model Evaluation

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

model.eval()
with torch.no_grad():
    final_out = model(test_graph.x.to(device), test_graph.edge_index.to(device), test_graph.edge_attr.to(device))
    final_preds = final_out.argmax(dim=-1).cpu().numpy()
    true_labels = test_graph.y.cpu().numpy()

# Create plots
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('CAGN-GAT Fusion Model Analysis', fontsize=16)

# 1. Loss curves
axes[0, 0].plot(epochs_list, train_losses, 'r-', label='Total Loss', linewidth=2)
axes[0, 0].plot(epochs_list, clf_losses, 'b-', label='Classification Loss', linewidth=2)
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].set_title('Training Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Accuracy over time
axes[0, 1].plot(epochs_list, test_accs, 'g-', label='Test Accuracy', linewidth=2)
axes[0, 1].axhline(y=best_acc, color='g', linestyle='--', alpha=0.7, label=f'Best: {best_acc:.3f}')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Accuracy')
axes[0, 1].set_title('Accuracy Evolution')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# 3. Confusion Matrix
cm = confusion_matrix(true_labels, final_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[1, 0],
            xticklabels=['Normal', 'Attack'], yticklabels=['Normal', 'Attack'])
axes[1, 0].set_xlabel('Predicted')
axes[1, 0].set_ylabel('Actual')
axes[1, 0].set_title('Confusion Matrix')

# 4. Contrastive Loss
axes[1, 1].plot(epochs_list, cont_losses, 'purple', linewidth=2)
axes[1, 1].axvline(x=10, color='red', linestyle='--', alpha=0.7, label='Contrastive Start')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Contrastive Loss')
axes[1, 1].set_title('Contrastive Learning')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nFinal Results: Accuracy={best_acc:.4f}, F1={best_f1:.4f}")
print("\nClassification Report:")
print(classification_report(true_labels, final_preds, target_names=['Normal', 'Attack']))

In [5]:
%cd /content/drive/My Drive/Colab Notebooks/Intrusion-detection-system-dli

# Copy the Mateen folder and its contents into the current directory
!cp -r '/content/drive/My Drive/Colab Notebooks/DLI' .
# Add the new folder and its contents to the staging area
!git add DLI

# Commit the changes
!git commit -m "Added DLI project as a subfolder"

# Push the changes to GitHub
!git push

/content/drive/My Drive/Colab Notebooks/Intrusion-detection-system-dli
[main e5bc895] Added DLI project as a subfolder
 5 files changed, 149162 insertions(+)
 create mode 100644 DLI/DLI Assignment.ipynb
 create mode 100644 DLI/KDDTest+.txt
 create mode 100644 DLI/KDDTrain+.txt
 create mode 100644 DLI/__pycache__/nsl_preprocessing.cpython-312.pyc
 create mode 100644 DLI/nsl_preprocessing.py
remote: Invalid username or token. Password authentication is not supported for Git operations.
fatal: Authentication failed for 'https://github.com/Micro-Maxis/Intrusion-detection-system-dli.git/'


In [6]:
import os

# 🔴 Replace with your NEW token (not the leaked one)
os.environ["GITHUB_TOKEN"] = "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"

# Now clone with token
!git remote set-url origin https://$GITHUB_TOKEN@github.com/Micro-Maxis/Intrusion-detection-system-dli.git
