In [13]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nsl-kdd-augmented/smote_augmented.csv
/kaggle/input/nslkdd/KDDTest+.arff
/kaggle/input/nslkdd/KDDTest-21.arff
/kaggle/input/nslkdd/KDDTest1.jpg
/kaggle/input/nslkdd/KDDTrain+.txt
/kaggle/input/nslkdd/KDDTrain+_20Percent.txt
/kaggle/input/nslkdd/KDDTest-21.txt
/kaggle/input/nslkdd/KDDTest+.txt
/kaggle/input/nslkdd/KDDTrain+.arff
/kaggle/input/nslkdd/index.html
/kaggle/input/nslkdd/KDDTrain+_20Percent.arff
/kaggle/input/nslkdd/KDDTrain1.jpg
/kaggle/input/nslkdd/nsl-kdd/KDDTest+.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTest-21.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTest1.jpg
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+_20Percent.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTest-21.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTest+.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+.arff
/kaggle/input/nslkdd/nsl-kdd/index.html
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+_20Percent.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTrain1.jpg


In [21]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.metrics import classification_report
from tqdm import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# ===========================================
# 1️⃣ Sampling: Strategic Over-Representation
# ===========================================
class_counts = np.bincount(y_train_enc)
# Q1 Tech: Custom frequency smoothing
# We use a 0.3 power to give even more weight to the rarest classes than before
weights = 1.0 / np.power(class_counts + 1, 0.7) 
samples_weight = torch.from_numpy(weights[y_train_enc])
sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

train_loader = DataLoader(
    torch.utils.data.TensorDataset(torch.tensor(X_train_proc, dtype=torch.float32), torch.tensor(y_train_enc, dtype=torch.long)),
    batch_size=512, sampler=sampler
)

# ===========================================
# 2️⃣ NOVELTY: Margin-based Poly-Focal Loss
# ===========================================
class Q1_MarginPolyLoss(nn.Module):
    """
    Combines Focal Loss, Poly-1 Loss, and Class-Adaptive Margins.
    Specifically designed to decouple 'Normal' from 'R2L/U2R' attacks.
    """
    def __init__(self, class_counts, gamma=2.0, epsilon=1.0):
        super().__init__()
        self.gamma = gamma
        self.epsilon = epsilon
        # Pre-calculate margins: larger margins for minority classes
        self.margins = torch.tensor(1.0 / np.log1p(class_counts)).float().to(device)
        self.margins = (self.margins / self.margins.max()) * 2.0 

    def forward(self, logits, targets):
        # Apply Class-Adaptive Margin
        mask = F.one_hot(targets, num_classes=logits.shape[1]).float()
        logits = logits - (mask * self.margins)
        
        ce_loss = F.cross_entropy(logits, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        
        # Poly-Focal Hybrid
        loss = (1 - pt)**self.gamma * ce_loss + self.epsilon * (1 - pt)
        return loss.mean()

# ===========================================
# 3️⃣ NOVELTY: LMD-Net (Latent Manifold Decoupler)
# ===========================================
class LMD_Net(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        # Expansion Path: Finds hidden separation dimensions
        self.expansion = nn.Sequential(
            nn.Linear(input_dim, 1024),
            nn.GELU(),
            nn.LayerNorm(1024)
        )
        
        # Gated BottleNeck
        self.gate = nn.Sequential(nn.Linear(1024, 1024), nn.Sigmoid())
        
        self.fc_blocks = nn.Sequential(
            nn.Linear(1024, 512),
            nn.GELU(),
            nn.Dropout(0.4),
            nn.Linear(512, 256),
            nn.GELU(),
            nn.LayerNorm(256)
        )
        self.head = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.expansion(x)
        # Gating recalibrates the expanded manifold
        g = self.gate(x)
        x = x * g
        x = self.fc_blocks(x)
        return self.head(x)

# ===========================================
# 4️⃣ Execution
# ===========================================
model = LMD_Net(X_train_proc.shape[1], num_classes).to(device)
criterion = Q1_MarginPolyLoss(class_counts)
optimizer = torch.optim.AdamW(model.parameters(), lr=8e-4, weight_decay=1e-2)
# Cosine annealing helps find the tiny local minima for rare classes
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=15)

for epoch in range(20):
    model.train()
    for X_b, y_b in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        loss = criterion(model(X_b), y_b)
        loss.backward()
        optimizer.step()
    scheduler.step()

# Evaluation
model.eval()
all_p, all_y = [], []
with torch.no_grad():
    for X_b, y_b in test_loader:
        out = model(X_b.to(device))
        all_p.extend(torch.argmax(out, dim=1).cpu().numpy())
        all_y.extend(y_b.numpy())

print("\n--- LMD-Net Q1 Results ---")
print(classification_report(all_y, all_p, labels=np.arange(num_classes), target_names=le.classes_, zero_division=0))

Epoch 1: 100%|██████████| 247/247 [00:01<00:00, 172.31it/s]
Epoch 2: 100%|██████████| 247/247 [00:01<00:00, 173.96it/s]
Epoch 3: 100%|██████████| 247/247 [00:01<00:00, 154.51it/s]
Epoch 4: 100%|██████████| 247/247 [00:01<00:00, 173.13it/s]
Epoch 5: 100%|██████████| 247/247 [00:01<00:00, 172.41it/s]
Epoch 6: 100%|██████████| 247/247 [00:01<00:00, 151.50it/s]
Epoch 7: 100%|██████████| 247/247 [00:01<00:00, 171.36it/s]
Epoch 8: 100%|██████████| 247/247 [00:01<00:00, 173.05it/s]
Epoch 9: 100%|██████████| 247/247 [00:01<00:00, 173.72it/s]
Epoch 10: 100%|██████████| 247/247 [00:01<00:00, 153.77it/s]
Epoch 11: 100%|██████████| 247/247 [00:01<00:00, 173.64it/s]
Epoch 12: 100%|██████████| 247/247 [00:01<00:00, 172.46it/s]
Epoch 13: 100%|██████████| 247/247 [00:01<00:00, 170.18it/s]
Epoch 14: 100%|██████████| 247/247 [00:01<00:00, 153.52it/s]
Epoch 15: 100%|██████████| 247/247 [00:01<00:00, 172.41it/s]
Epoch 16: 100%|██████████| 247/247 [00:01<00:00, 171.54it/s]
Epoch 17: 100%|██████████| 247/24


--- LMD-Net Q1 Results ---
                 precision    recall  f1-score   support

           back       0.99      0.80      0.89       359
buffer_overflow       0.71      0.50      0.59        20
      ftp_write       0.01      0.67      0.02         3
   guess_passwd       0.09      0.00      0.00      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.95      0.98      0.96       141
           land       1.00      1.00      1.00         7
     loadmodule       1.00      1.00      1.00         2
       multihop       0.00      0.00      0.00        18
        neptune       1.00      1.00      1.00      4657
           nmap       0.99      1.00      0.99        73
         normal       0.83      0.97      0.90      9711
           perl       0.50      0.50      0.50         2
            phf       0.33      0.50      0.40         2
            pod       0.72      0.95      0.82        41
      portsweep       0.64      0.94      0.76       157
  