In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nsl-kdd-augmented/smote_augmented.csv
/kaggle/input/nslkdd/KDDTest+.arff
/kaggle/input/nslkdd/KDDTest-21.arff
/kaggle/input/nslkdd/KDDTest1.jpg
/kaggle/input/nslkdd/KDDTrain+.txt
/kaggle/input/nslkdd/KDDTrain+_20Percent.txt
/kaggle/input/nslkdd/KDDTest-21.txt
/kaggle/input/nslkdd/KDDTest+.txt
/kaggle/input/nslkdd/KDDTrain+.arff
/kaggle/input/nslkdd/index.html
/kaggle/input/nslkdd/KDDTrain+_20Percent.arff
/kaggle/input/nslkdd/KDDTrain1.jpg
/kaggle/input/nslkdd/nsl-kdd/KDDTest+.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTest-21.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTest1.jpg
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+_20Percent.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTest-21.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTest+.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+.arff
/kaggle/input/nslkdd/nsl-kdd/index.html
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+_20Percent.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTrain1.jpg


In [16]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report
from tqdm import tqdm

# ===========================================
# 1️⃣ Load & Stable Mapping (From your working code)
# ===========================================
local_path = "/kaggle/input/nslkdd/" 
columns = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
           'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
           'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
           'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
           'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
           'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
           'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
           'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
           'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
           'outcome', 'level']

df_train = pd.read_csv(local_path + "KDDTrain+.txt", header=None, names=columns)
df_test  = pd.read_csv(local_path + "KDDTest+.txt", header=None, names=columns)

df_train['outcome'] = df_train['outcome'].astype(str)
df_test['outcome'] = df_test['outcome'].astype(str)
train_labels = set(df_train['outcome'].unique())
df_test = df_test[df_test['outcome'].isin(train_labels)].reset_index(drop=True)

# ===========================================
# 2️⃣ Hybrid Preprocessing for Transformer
# ===========================================
cat_cols = ['protocol_type', 'service', 'flag']
num_cols = [c for c in columns if c not in cat_cols + ['outcome', 'level']]

# Label Encode Categorical for Embeddings
cat_dims = []
for col in cat_cols:
    le_cat = LabelEncoder()
    df_train[col] = le_cat.fit_transform(df_train[col])
    df_test[col] = df_test[col].map(lambda s: s if s in le_cat.classes_ else le_cat.classes_[0])
    df_test[col] = le_cat.transform(df_test[col])
    cat_dims.append(len(le_cat.classes_))

# Scale Numerical
scaler = StandardScaler()
X_train_num = scaler.fit_transform(df_train[num_cols]).astype(np.float32)
X_test_num  = scaler.transform(df_test[num_cols]).astype(np.float32)

le = LabelEncoder()
y_train_enc = le.fit_transform(df_train['outcome'])
y_test_enc  = le.transform(df_test['outcome'])
num_classes = len(le.classes_)

# ===========================================
# 3️⃣ Novel Architecture: Gated-Transformer Fusion
# ===========================================
class GatedLinearUnit(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc = nn.Linear(input_dim, input_dim * 2)
    def forward(self, x):
        x = self.fc(x)
        x, gate = x.chunk(2, dim=-1)
        return x * torch.sigmoid(gate)

class GTFModel(nn.Module):
    def __init__(self, cat_dims, num_feat_dim, num_classes, emb_dim=32):
        super().__init__()
        # Categorical Path (Transformer)
        self.embs = nn.ModuleList([nn.Embedding(d, emb_dim) for d in cat_dims])
        layer = nn.TransformerEncoderLayer(d_model=emb_dim, nhead=8, batch_first=True)
        self.transformer = nn.TransformerEncoder(layer, num_layers=2)
        
        # Numerical Path (Gated)
        self.num_gate = nn.Sequential(
            nn.Linear(num_feat_dim, 128),
            GatedLinearUnit(128),
            nn.LayerNorm(128)
        )
        
        # Fusion
        self.classifier = nn.Sequential(
            nn.Linear(len(cat_dims)*emb_dim + 128, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )

    def forward(self, x_cat, x_num):
        x_c = torch.stack([emb(x_cat[:, i]) for i, emb in enumerate(self.embs)], dim=1)
        x_c = self.transformer(x_c).flatten(1)
        x_n = self.num_gate(x_num)
        return self.classifier(torch.cat([x_c, x_n], dim=1))

# ===========================================
# 4️⃣ Balanced Data Loading
# ===========================================
class HybridDS(Dataset):
    def __init__(self, c, n, y):
        self.c, self.n, self.y = torch.tensor(c), torch.tensor(n), torch.tensor(y)
    def __len__(self): return len(self.y)
    def __getitem__(self, i): return self.c[i], self.n[i], self.y[i]

class_counts = np.bincount(y_train_enc)
class_weights = 1.0 / np.sqrt(class_counts + 1)
sampler = WeightedRandomSampler(class_weights[y_train_enc], len(y_train_enc))

train_loader = DataLoader(HybridDS(df_train[cat_cols].values, X_train_num, y_train_enc), batch_size=256, sampler=sampler)
test_loader  = DataLoader(HybridDS(df_test[cat_cols].values, X_test_num, y_test_enc), batch_size=256, shuffle=False)

# ===========================================
# 5️⃣ Training Loop
# ===========================================
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = GTFModel(cat_dims, X_train_num.shape[1], num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)

# Use standard weighted CE (proven stable in your code)
loss_weights = torch.tensor(class_weights / class_weights.sum() * num_classes, dtype=torch.float32).to(device)
criterion = nn.CrossEntropyLoss(weight=loss_weights)

for epoch in range(15):
    model.train()
    for xc, xn, y in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        xc, xn, y = xc.to(device), xn.to(device), y.to(device)
        optimizer.zero_grad()
        loss = criterion(model(xc, xn), y)
        loss.backward()
        optimizer.step()

# ===========================================
# 6️⃣ Final Evaluation
# ===========================================
model.eval()
all_p, all_y = [], []
with torch.no_grad():
    for xc, xn, y in test_loader:
        out = model(xc.to(device), xn.to(device))
        all_p.extend(torch.argmax(out, 1).cpu().numpy())
        all_y.extend(y.numpy())

print("\n--- Final Q1 Results ---")
print(classification_report(all_y, all_p, labels=np.arange(num_classes), target_names=le.classes_, zero_division=0))

Epoch 1: 100%|██████████| 493/493 [00:04<00:00, 113.66it/s]
Epoch 2: 100%|██████████| 493/493 [00:04<00:00, 121.91it/s]
Epoch 3: 100%|██████████| 493/493 [00:04<00:00, 119.88it/s]
Epoch 4: 100%|██████████| 493/493 [00:04<00:00, 115.13it/s]
Epoch 5: 100%|██████████| 493/493 [00:04<00:00, 119.92it/s]
Epoch 6: 100%|██████████| 493/493 [00:04<00:00, 116.88it/s]
Epoch 7: 100%|██████████| 493/493 [00:04<00:00, 120.01it/s]
Epoch 8: 100%|██████████| 493/493 [00:04<00:00, 119.99it/s]
Epoch 9: 100%|██████████| 493/493 [00:04<00:00, 115.92it/s]
Epoch 10: 100%|██████████| 493/493 [00:04<00:00, 118.75it/s]
Epoch 11: 100%|██████████| 493/493 [00:04<00:00, 122.19it/s]
Epoch 12: 100%|██████████| 493/493 [00:04<00:00, 116.25it/s]
Epoch 13: 100%|██████████| 493/493 [00:04<00:00, 120.65it/s]
Epoch 14: 100%|██████████| 493/493 [00:04<00:00, 122.44it/s]
Epoch 15: 100%|██████████| 493/493 [00:04<00:00, 114.97it/s]



--- Final Q1 Results ---
                 precision    recall  f1-score   support

           back       0.99      0.97      0.98       359
buffer_overflow       0.71      0.50      0.59        20
      ftp_write       0.01      0.33      0.01         3
   guess_passwd       1.00      0.18      0.31      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.86      0.98      0.91       141
           land       1.00      1.00      1.00         7
     loadmodule       0.29      1.00      0.44         2
       multihop       0.01      0.11      0.02        18
        neptune       1.00      0.99      1.00      4657
           nmap       0.99      0.99      0.99        73
         normal       0.86      0.92      0.89      9711
           perl       0.50      0.50      0.50         2
            phf       0.50      0.50      0.50         2
            pod       0.72      0.95      0.82        41
      portsweep       0.66      0.89      0.76       157
    

In [20]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# ===========================================
# 1️⃣ Load & Stable Mapping (Retained from your working code)
# ===========================================
local_path = "/kaggle/input/nslkdd/" 

columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
    'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
    'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
    'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
    'outcome', 'level'
]

df_train = pd.read_csv(local_path + "KDDTrain+.txt", header=None, names=columns)
df_test  = pd.read_csv(local_path + "KDDTest+.txt", header=None, names=columns)

df_train['outcome'] = df_train['outcome'].astype(str)
df_test['outcome'] = df_test['outcome'].astype(str)

train_labels = set(df_train['outcome'].unique())
df_test = df_test[df_test['outcome'].isin(train_labels)].reset_index(drop=True)

# ===========================================
# 2️⃣ Preprocessing (Stable Mapping)
# ===========================================
target = 'outcome'
cat_cols = ['protocol_type', 'service', 'flag']
num_cols = [c for c in columns if c not in cat_cols + [target, 'level']]

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols)
])

X_train_proc = preprocessor.fit_transform(df_train.drop(columns=[target, 'level']))
X_test_proc  = preprocessor.transform(df_test.drop(columns=[target, 'level']))

le = LabelEncoder()
y_train_enc = le.fit_transform(df_train[target])
y_test_enc  = le.transform(df_test[target])
num_classes = len(le.classes_)

# ===========================================
# 3️⃣ Dataset & Weighted Sampling
# ===========================================
class_counts = np.bincount(y_train_enc)
class_weights = 1.0 / np.sqrt(class_counts + 1)
sample_weights = class_weights[y_train_enc]

sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(y_train_enc), replacement=True)

train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train_proc, dtype=torch.float32), 
                                               torch.tensor(y_train_enc, dtype=torch.long))
test_dataset  = torch.utils.data.TensorDataset(torch.tensor(X_test_proc, dtype=torch.float32), 
                                               torch.tensor(y_test_enc, dtype=torch.long))

train_loader = DataLoader(train_dataset, batch_size=256, sampler=sampler)
test_loader  = DataLoader(test_dataset, batch_size=256, shuffle=False)

# ===========================================
# 4️⃣ NOVEL ARCHITECTURE: Gated Residual MLP
# ===========================================
class GatedBlock(nn.Module):
    def __init__(self, input_dim, output_dim, dropout=0.2):
        super().__init__()
        self.fc = nn.Linear(input_dim, output_dim * 2) # double for gating
        self.ln = nn.LayerNorm(output_dim)
        self.drop = nn.Dropout(dropout)

    def forward(self, x):
        gate_input = self.fc(x)
        x, gate = gate_input.chunk(2, dim=-1)
        x = x * torch.sigmoid(gate) # Gating mechanism
        return self.drop(self.ln(x))

class Q1GatedModel(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.stem = nn.Linear(input_dim, 512)
        
        # Gated layers provide better feature refinement than simple ReLU
        self.block1 = GatedBlock(512, 512)
        self.block2 = GatedBlock(512, 256)
        
        self.head = nn.Linear(256, num_classes)

    def forward(self, x):
        x = torch.relu(self.stem(x))
        x = self.block1(x)
        x = self.block2(x)
        return self.head(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Q1GatedModel(X_train_proc.shape[1], num_classes).to(device)

# Normalized weights for stability
loss_weights = torch.tensor(class_weights / class_weights.sum() * num_classes, dtype=torch.float32).to(device)
criterion = nn.CrossEntropyLoss(weight=loss_weights)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-2)

# ===========================================
# 5️⃣ Training Loop
# ===========================================
EPOCHS = 20
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f"Epoch {epoch+1} Loss: {running_loss/len(train_loader):.4f}")

# ===========================================
# 6️⃣ Evaluation
# ===========================================
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch.to(device))
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(y_batch.numpy())

print("\n--- Evaluation Report (All Classes Retained) ---")
print(classification_report(all_labels, all_preds, 
                            labels=np.arange(len(le.classes_)), 
                            target_names=le.classes_, 
                            zero_division=0))

Epoch 1: 100%|██████████| 493/493 [00:02<00:00, 222.06it/s]


Epoch 1 Loss: 0.2010


Epoch 2: 100%|██████████| 493/493 [00:01<00:00, 247.82it/s]


Epoch 2 Loss: 0.0778


Epoch 3: 100%|██████████| 493/493 [00:01<00:00, 250.40it/s]


Epoch 3 Loss: 0.0591


Epoch 4: 100%|██████████| 493/493 [00:01<00:00, 247.11it/s]


Epoch 4 Loss: 0.0489


Epoch 5: 100%|██████████| 493/493 [00:02<00:00, 223.81it/s]


Epoch 5 Loss: 0.0422


Epoch 6: 100%|██████████| 493/493 [00:02<00:00, 240.56it/s]


Epoch 6 Loss: 0.0365


Epoch 7: 100%|██████████| 493/493 [00:02<00:00, 241.44it/s]


Epoch 7 Loss: 0.0366


Epoch 8: 100%|██████████| 493/493 [00:02<00:00, 225.39it/s]


Epoch 8 Loss: 0.0720


Epoch 9: 100%|██████████| 493/493 [00:02<00:00, 237.86it/s]


Epoch 9 Loss: 0.0352


Epoch 10: 100%|██████████| 493/493 [00:02<00:00, 241.21it/s]


Epoch 10 Loss: 0.0326


Epoch 11: 100%|██████████| 493/493 [00:02<00:00, 246.32it/s]


Epoch 11 Loss: 0.0367


Epoch 12: 100%|██████████| 493/493 [00:02<00:00, 227.98it/s]


Epoch 12 Loss: 0.0375


Epoch 13: 100%|██████████| 493/493 [00:02<00:00, 245.33it/s]


Epoch 13 Loss: 0.0357


Epoch 14: 100%|██████████| 493/493 [00:02<00:00, 245.94it/s]


Epoch 14 Loss: 0.0363


Epoch 15: 100%|██████████| 493/493 [00:02<00:00, 238.83it/s]


Epoch 15 Loss: 0.0319


Epoch 16: 100%|██████████| 493/493 [00:02<00:00, 228.77it/s]


Epoch 16 Loss: 0.0337


Epoch 17: 100%|██████████| 493/493 [00:02<00:00, 245.34it/s]


Epoch 17 Loss: 0.0537


Epoch 18: 100%|██████████| 493/493 [00:02<00:00, 239.28it/s]


Epoch 18 Loss: 0.0279


Epoch 19: 100%|██████████| 493/493 [00:02<00:00, 223.97it/s]


Epoch 19 Loss: 0.0358


Epoch 20: 100%|██████████| 493/493 [00:02<00:00, 240.80it/s]


Epoch 20 Loss: 0.0309

--- Evaluation Report (All Classes Retained) ---
                 precision    recall  f1-score   support

           back       0.99      0.87      0.93       359
buffer_overflow       0.54      0.35      0.42        20
      ftp_write       0.02      0.67      0.04         3
   guess_passwd       1.00      0.00      0.00      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.72      0.97      0.83       141
           land       1.00      1.00      1.00         7
     loadmodule       0.00      0.00      0.00         2
       multihop       0.01      0.11      0.02        18
        neptune       1.00      1.00      1.00      4657
           nmap       0.96      0.99      0.97        73
         normal       0.83      0.96      0.89      9711
           perl       1.00      0.50      0.67         2
            phf       0.50      0.50      0.50         2
            pod       0.71      0.95      0.81        41
      portsweep

In [21]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
from tqdm import tqdm

# ===========================================
# 1️⃣ Stable Pipeline: Data & Mapping
# ===========================================
local_path = "/kaggle/input/nslkdd/" 
columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
    'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
    'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
    'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
    'outcome', 'level'
]

df_train = pd.read_csv(local_path + "KDDTrain+.txt", header=None, names=columns)
df_test  = pd.read_csv(local_path + "KDDTest+.txt", header=None, names=columns)

df_train['outcome'] = df_train['outcome'].astype(str)
df_test['outcome'] = df_test['outcome'].astype(str)
train_labels = set(df_train['outcome'].unique())
df_test = df_test[df_test['outcome'].isin(train_labels)].reset_index(drop=True)

# Preprocessing
target = 'outcome'
cat_cols = ['protocol_type', 'service', 'flag']
num_cols = [c for c in columns if c not in cat_cols + [target, 'level']]

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols)
])

X_train_proc = preprocessor.fit_transform(df_train.drop(columns=[target, 'level']))
X_test_proc  = preprocessor.transform(df_test.drop(columns=[target, 'level']))

le = LabelEncoder()
y_train_enc = le.fit_transform(df_train[target])
y_test_enc  = le.transform(df_test[target])
num_classes = len(le.classes_)

# ===========================================
# 2️⃣ Sampler & Global Loss Adjustment
# ===========================================
class_counts = np.bincount(y_train_enc)
# Square root smoothing for stability
class_weights = 1.0 / np.sqrt(class_counts + 1)
sample_weights = class_weights[y_train_enc]

# NOVELTY: Cost-Sensitive adjustment term for Logits
# This helps the model "respect" rare classes without mapping errors
logit_adj = torch.tensor(class_counts + 1).float().log() * 0.5 

sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(y_train_enc), replacement=True)
train_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_train_proc, dtype=torch.float32), 
                                                        torch.tensor(y_train_enc, dtype=torch.long)), 
                          batch_size=512, sampler=sampler)
test_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_test_proc, dtype=torch.float32), 
                                                       torch.tensor(y_test_enc, dtype=torch.long)), 
                         batch_size=512, shuffle=False)

# ===========================================
# 3️⃣ Architecture: GMH-ResNet
# ===========================================
class GLULayer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.fc = nn.Linear(dim, dim * 2)
    def forward(self, x):
        x = self.fc(x)
        content, gate = x.chunk(2, dim=-1)
        return content * torch.sigmoid(gate)

class ResidualBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.glu = GLULayer(dim)
        self.ln = nn.LayerNorm(dim)
    def forward(self, x):
        return x + self.glu(self.ln(x))

class GMHResNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.stem = nn.Linear(input_dim, 512)
        self.blocks = nn.Sequential(
            ResidualBlock(512),
            nn.Linear(512, 256),
            ResidualBlock(256),
            nn.Dropout(0.3)
        )
        self.head = nn.Linear(256, num_classes)

    def forward(self, x):
        x = torch.relu(self.stem(x))
        x = self.blocks(x)
        return self.head(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = GMHResNet(X_train_proc.shape[1], num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-2)
logit_adj = logit_adj.to(device)

# ===========================================
# 4️⃣ Training & Evaluation
# ===========================================
for epoch in range(20):
    model.train()
    for X_b, y_b in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        logits = model(X_b)
        # Apply logit adjustment for imbalanced detection
        loss = nn.functional.cross_entropy(logits - logit_adj, y_b)
        loss.backward()
        optimizer.step()

model.eval()
all_p, all_y = [], []
with torch.no_grad():
    for X_b, y_b in test_loader:
        out = model(X_b.to(device))
        all_p.extend(torch.argmax(out, dim=1).cpu().numpy())
        all_y.extend(y_b.numpy())

print(classification_report(all_y, all_p, labels=np.arange(num_classes), target_names=le.classes_, zero_division=0))

Epoch 1: 100%|██████████| 247/247 [00:01<00:00, 177.82it/s]
Epoch 2: 100%|██████████| 247/247 [00:01<00:00, 177.22it/s]
Epoch 3: 100%|██████████| 247/247 [00:01<00:00, 158.35it/s]
Epoch 4: 100%|██████████| 247/247 [00:01<00:00, 177.70it/s]
Epoch 5: 100%|██████████| 247/247 [00:01<00:00, 175.02it/s]
Epoch 6: 100%|██████████| 247/247 [00:01<00:00, 159.69it/s]
Epoch 7: 100%|██████████| 247/247 [00:01<00:00, 174.24it/s]
Epoch 8: 100%|██████████| 247/247 [00:01<00:00, 177.01it/s]
Epoch 9: 100%|██████████| 247/247 [00:01<00:00, 175.82it/s]
Epoch 10: 100%|██████████| 247/247 [00:01<00:00, 156.25it/s]
Epoch 11: 100%|██████████| 247/247 [00:01<00:00, 176.74it/s]
Epoch 12: 100%|██████████| 247/247 [00:01<00:00, 175.85it/s]
Epoch 13: 100%|██████████| 247/247 [00:01<00:00, 176.79it/s]
Epoch 14: 100%|██████████| 247/247 [00:01<00:00, 153.40it/s]
Epoch 15: 100%|██████████| 247/247 [00:01<00:00, 178.41it/s]
Epoch 16: 100%|██████████| 247/247 [00:01<00:00, 178.28it/s]
Epoch 17: 100%|██████████| 247/24

                 precision    recall  f1-score   support

           back       1.00      0.91      0.95       359
buffer_overflow       0.50      0.05      0.09        20
      ftp_write       0.00      0.00      0.00         3
   guess_passwd       0.00      0.00      0.00      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.93      0.99      0.96       141
           land       1.00      1.00      1.00         7
     loadmodule       1.00      1.00      1.00         2
       multihop       0.00      0.00      0.00        18
        neptune       0.99      1.00      1.00      4657
           nmap       1.00      0.99      0.99        73
         normal       0.83      0.98      0.90      9711
           perl       0.50      0.50      0.50         2
            phf       0.50      0.50      0.50         2
            pod       0.75      0.93      0.83        41
      portsweep       0.48      0.88      0.62       157
        rootkit       0.00    

In [22]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
from tqdm import tqdm

# ===========================================
# 1️⃣ Stable Pipeline: Data & Mapping
# ===========================================
local_path = "/kaggle/input/nslkdd/" 
columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
    'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
    'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
    'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
    'outcome', 'level'
]

df_train = pd.read_csv(local_path + "KDDTrain+.txt", header=None, names=columns)
df_test  = pd.read_csv(local_path + "KDDTest+.txt", header=None, names=columns)

df_train['outcome'] = df_train['outcome'].astype(str)
df_test['outcome'] = df_test['outcome'].astype(str)
train_labels = set(df_train['outcome'].unique())
df_test = df_test[df_test['outcome'].isin(train_labels)].reset_index(drop=True)

# Preprocessing
target = 'outcome'
cat_cols = ['protocol_type', 'service', 'flag']
num_cols = [c for c in columns if c not in cat_cols + [target, 'level']]

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols)
])

X_train_proc = preprocessor.fit_transform(df_train.drop(columns=[target, 'level']))
X_test_proc  = preprocessor.transform(df_test.drop(columns=[target, 'level']))

le = LabelEncoder()
y_train_enc = le.fit_transform(df_train[target])
y_test_enc  = le.transform(df_test[target])
num_classes = len(le.classes_)

# ===========================================
# 2️⃣ Sampler & Global Loss Adjustment
# ===========================================
class_counts = np.bincount(y_train_enc)
# Square root smoothing for stability
class_weights = 1.0 / np.sqrt(class_counts + 1)
sample_weights = class_weights[y_train_enc]

# NOVELTY: Cost-Sensitive adjustment term for Logits
# logit_adj = log(prior_probability). This helps correct class imbalance at the boundary.
logit_adj = torch.tensor(class_counts + 1).float().log() * 0.5 

sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(y_train_enc), replacement=True)
train_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_train_proc, dtype=torch.float32), 
                                                        torch.tensor(y_train_enc, dtype=torch.long)), 
                          batch_size=512, sampler=sampler)
test_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_test_proc, dtype=torch.float32), 
                                                       torch.tensor(y_test_enc, dtype=torch.long)), 
                         batch_size=512, shuffle=False)

# ===========================================
# 3️⃣ Architecture: GMH-ResNet
# ===========================================
class GLULayer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.fc = nn.Linear(dim, dim * 2)
    def forward(self, x):
        x = self.fc(x)
        content, gate = x.chunk(2, dim=-1)
        return content * torch.sigmoid(gate)

class ResidualBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.glu = GLULayer(dim)
        self.ln = nn.LayerNorm(dim)
    def forward(self, x):
        return x + self.glu(self.ln(x))

class GMHResNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.stem = nn.Linear(input_dim, 512)
        self.blocks = nn.Sequential(
            ResidualBlock(512),
            nn.Linear(512, 256),
            ResidualBlock(256),
            nn.Dropout(0.3)
        )
        self.head = nn.Linear(256, num_classes)

    def forward(self, x):
        x = torch.relu(self.stem(x))
        x = self.blocks(x)
        return self.head(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = GMHResNet(X_train_proc.shape[1], num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-2)
logit_adj = logit_adj.to(device)

# ===========================================
# 4️⃣ Training & Evaluation
# ===========================================
for epoch in range(20):
    model.train()
    for X_b, y_b in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        logits = model(X_b)
        # Apply logit adjustment novelty: correct the bias toward majority classes
        loss = nn.functional.cross_entropy(logits - logit_adj, y_b)
        loss.backward()
        optimizer.step()

model.eval()
all_p, all_y = [], []
with torch.no_grad():
    for X_b, y_b in test_loader:
        out = model(X_b.to(device))
        all_p.extend(torch.argmax(out, dim=1).cpu().numpy())
        all_y.extend(y_b.numpy())

print(classification_report(all_y, all_p, labels=np.arange(num_classes), target_names=le.classes_, zero_division=0))

Epoch 1: 100%|██████████| 247/247 [00:01<00:00, 154.98it/s]
Epoch 2: 100%|██████████| 247/247 [00:01<00:00, 178.74it/s]
Epoch 3: 100%|██████████| 247/247 [00:01<00:00, 160.57it/s]
Epoch 4: 100%|██████████| 247/247 [00:01<00:00, 174.56it/s]
Epoch 5: 100%|██████████| 247/247 [00:01<00:00, 160.10it/s]
Epoch 6: 100%|██████████| 247/247 [00:01<00:00, 154.98it/s]
Epoch 7: 100%|██████████| 247/247 [00:01<00:00, 176.63it/s]
Epoch 8: 100%|██████████| 247/247 [00:01<00:00, 161.57it/s]
Epoch 9: 100%|██████████| 247/247 [00:01<00:00, 175.05it/s]
Epoch 10: 100%|██████████| 247/247 [00:01<00:00, 155.31it/s]
Epoch 11: 100%|██████████| 247/247 [00:01<00:00, 176.19it/s]
Epoch 12: 100%|██████████| 247/247 [00:01<00:00, 156.84it/s]
Epoch 13: 100%|██████████| 247/247 [00:01<00:00, 177.85it/s]
Epoch 14: 100%|██████████| 247/247 [00:01<00:00, 159.92it/s]
Epoch 15: 100%|██████████| 247/247 [00:01<00:00, 177.24it/s]
Epoch 16: 100%|██████████| 247/247 [00:01<00:00, 156.90it/s]
Epoch 17: 100%|██████████| 247/24

                 precision    recall  f1-score   support

           back       1.00      0.94      0.97       359
buffer_overflow       0.86      0.30      0.44        20
      ftp_write       0.01      0.33      0.02         3
   guess_passwd       0.00      0.00      0.00      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.89      0.98      0.93       141
           land       1.00      1.00      1.00         7
     loadmodule       1.00      1.00      1.00         2
       multihop       0.00      0.00      0.00        18
        neptune       1.00      0.99      1.00      4657
           nmap       1.00      0.99      0.99        73
         normal       0.81      0.97      0.88      9711
           perl       0.50      0.50      0.50         2
            phf       0.33      0.50      0.40         2
            pod       0.75      0.93      0.83        41
      portsweep       0.61      0.94      0.74       157
        rootkit       0.00    

In [24]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
from tqdm import tqdm

# ===========================================
# 1️⃣ Stable Data Pipeline
# ===========================================
local_path = "/kaggle/input/nslkdd/" 
columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
    'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
    'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
    'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
    'outcome', 'level'
]

df_train = pd.read_csv(local_path + "KDDTrain+.txt", header=None, names=columns)
df_test  = pd.read_csv(local_path + "KDDTest+.txt", header=None, names=columns)

df_train['outcome'] = df_train['outcome'].astype(str)
df_test['outcome'] = df_test['outcome'].astype(str)
train_labels = set(df_train['outcome'].unique())
df_test = df_test[df_test['outcome'].isin(train_labels)].reset_index(drop=True)

target = 'outcome'
cat_cols = ['protocol_type', 'service', 'flag']
num_cols = [c for c in columns if c not in cat_cols + [target, 'level']]

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols)
])

X_train_proc = preprocessor.fit_transform(df_train.drop(columns=[target, 'level']))
X_test_proc  = preprocessor.transform(df_test.drop(columns=[target, 'level']))

le = LabelEncoder()
y_train_enc = le.fit_transform(df_train[target])
y_test_enc  = le.transform(df_test[target])
num_classes = len(le.classes_)

# ===========================================
# 2️⃣ Sampler & Logit Adjustment Setup
# ===========================================
class_counts = np.bincount(y_train_enc)
# Logit adjustment: log(class_frequency). Helps adjust the decision boundary.
logit_adj = torch.tensor(class_counts + 1).float().log().to('cuda' if torch.cuda.is_available() else 'cpu')

class_weights = 1.0 / np.sqrt(class_counts + 1)
sampler = WeightedRandomSampler(weights=class_weights[y_train_enc], num_samples=len(y_train_enc), replacement=True)

train_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_train_proc, dtype=torch.float32), 
                                                        torch.tensor(y_train_enc, dtype=torch.long)), 
                          batch_size=512, sampler=sampler)
test_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_test_proc, dtype=torch.float32), 
                                                       torch.tensor(y_test_enc, dtype=torch.long)), 
                         batch_size=512, shuffle=False)

# ===========================================
# 3️⃣ Architecture: Gated Residual Net
# ===========================================
class GatedBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.fc = nn.Linear(dim, dim * 2)
        self.ln = nn.LayerNorm(dim)

    def forward(self, x):
        res = x
        x = self.fc(x)
        x, gate = x.chunk(2, dim=-1)
        x = x * torch.sigmoid(gate)
        return self.ln(x + res)

class GatedResNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.stem = nn.Linear(input_dim, 512)
        self.blocks = nn.Sequential(
            GatedBlock(512),
            nn.Linear(512, 256),
            GatedBlock(256),
            nn.Dropout(0.3)
        )
        self.head = nn.Linear(256, num_classes)

    def forward(self, x):
        x = torch.relu(self.stem(x))
        x = self.blocks(x)
        return self.head(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = GatedResNet(X_train_proc.shape[1], num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-2)

# ===========================================
# 4️⃣ Training with Logit Adjustment
# ===========================================
for epoch in range(20):
    model.train()
    for X_b, y_b in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        logits = model(X_b)
        # Apply logit adjustment during loss to favor minority classes
        loss = nn.functional.cross_entropy(logits - 0.5 * logit_adj, y_b)
        loss.backward()
        optimizer.step()

# ===========================================
# 5️⃣ Final Evaluation
# ===========================================
model.eval()
all_p, all_y = [], []
with torch.no_grad():
    for X_b, y_b in test_loader:
        out = model(X_b.to(device))
        all_p.extend(torch.argmax(out, dim=1).cpu().numpy())
        all_y.extend(y_b.numpy())

print(classification_report(all_y, all_p, labels=np.arange(num_classes), target_names=le.classes_, zero_division=0))

Epoch 1: 100%|██████████| 247/247 [00:01<00:00, 156.08it/s]
Epoch 2: 100%|██████████| 247/247 [00:01<00:00, 175.84it/s]
Epoch 3: 100%|██████████| 247/247 [00:01<00:00, 156.02it/s]
Epoch 4: 100%|██████████| 247/247 [00:01<00:00, 174.84it/s]
Epoch 5: 100%|██████████| 247/247 [00:01<00:00, 154.22it/s]
Epoch 6: 100%|██████████| 247/247 [00:01<00:00, 156.84it/s]
Epoch 7: 100%|██████████| 247/247 [00:01<00:00, 174.81it/s]
Epoch 8: 100%|██████████| 247/247 [00:01<00:00, 158.87it/s]
Epoch 9: 100%|██████████| 247/247 [00:01<00:00, 176.45it/s]
Epoch 10: 100%|██████████| 247/247 [00:01<00:00, 159.97it/s]
Epoch 11: 100%|██████████| 247/247 [00:01<00:00, 176.81it/s]
Epoch 12: 100%|██████████| 247/247 [00:01<00:00, 154.73it/s]
Epoch 13: 100%|██████████| 247/247 [00:01<00:00, 175.57it/s]
Epoch 14: 100%|██████████| 247/247 [00:01<00:00, 159.22it/s]
Epoch 15: 100%|██████████| 247/247 [00:01<00:00, 178.40it/s]
Epoch 16: 100%|██████████| 247/247 [00:01<00:00, 158.88it/s]
Epoch 17: 100%|██████████| 247/24

                 precision    recall  f1-score   support

           back       0.99      0.90      0.94       359
buffer_overflow       0.86      0.30      0.44        20
      ftp_write       0.05      0.67      0.10         3
   guess_passwd       0.00      0.00      0.00      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.78      0.99      0.87       141
           land       0.00      0.00      0.00         7
     loadmodule       1.00      1.00      1.00         2
       multihop       0.00      0.00      0.00        18
        neptune       1.00      1.00      1.00      4657
           nmap       1.00      0.99      0.99        73
         normal       0.82      0.97      0.89      9711
           perl       0.50      0.50      0.50         2
            phf       0.50      0.50      0.50         2
            pod       0.72      0.95      0.82        41
      portsweep       0.76      0.90      0.82       157
        rootkit       0.00    

In [25]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
from tqdm import tqdm

# ===========================================
# 1️⃣ Stable Data Pipeline (Fixed Mapping)
# ===========================================
local_path = "/kaggle/input/nslkdd/" 
columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
    'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
    'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
    'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
    'outcome', 'level'
]

df_train = pd.read_csv(local_path + "KDDTrain+.txt", header=None, names=columns)
df_test  = pd.read_csv(local_path + "KDDTest+.txt", header=None, names=columns)

df_train['outcome'] = df_train['outcome'].astype(str)
df_test['outcome'] = df_test['outcome'].astype(str)
train_labels = set(df_train['outcome'].unique())
df_test = df_test[df_test['outcome'].isin(train_labels)].reset_index(drop=True)

target = 'outcome'
cat_cols = ['protocol_type', 'service', 'flag']
num_cols = [c for c in columns if c not in cat_cols + [target, 'level']]

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols)
])

X_train_proc = preprocessor.fit_transform(df_train.drop(columns=[target, 'level']))
X_test_proc  = preprocessor.transform(df_test.drop(columns=[target, 'level']))

le = LabelEncoder()
y_train_enc = le.fit_transform(df_train[target])
y_test_enc  = le.transform(df_test[target])
num_classes = len(le.classes_)

# ===========================================
# 2️⃣ Logit Adjustment & Sampler Setup
# ===========================================
class_counts = np.bincount(y_train_enc)
# Novelty: Logit Adjustment (LA) term - shifts decision boundary for minority classes
logit_adj = torch.tensor(class_counts + 1).float().log().to('cuda' if torch.cuda.is_available() else 'cpu')

class_weights = 1.0 / np.sqrt(class_counts + 1)
sampler = WeightedRandomSampler(weights=class_weights[y_train_enc], num_samples=len(y_train_enc), replacement=True)

train_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_train_proc, dtype=torch.float32), 
                                                        torch.tensor(y_train_enc, dtype=torch.long)), 
                          batch_size=512, sampler=sampler)
test_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_test_proc, dtype=torch.float32), 
                                                       torch.tensor(y_test_enc, dtype=torch.long)), 
                         batch_size=512, shuffle=False)

# ===========================================
# 3️⃣ Architecture: Gated Multi-Head Residual Net
# ===========================================
class GatedBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.fc = nn.Linear(dim, dim * 2)
        self.ln = nn.LayerNorm(dim)

    def forward(self, x):
        res = x
        x = self.fc(x)
        content, gate = x.chunk(2, dim=-1)
        x = content * torch.sigmoid(gate) # GLU Gating
        return self.ln(x + res) # Residual Connection

class GMHResNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.stem = nn.Linear(input_dim, 512)
        self.blocks = nn.Sequential(
            GatedBlock(512),
            nn.Linear(512, 256),
            GatedBlock(256),
            nn.Dropout(0.3)
        )
        self.head = nn.Linear(256, num_classes)

    def forward(self, x):
        x = torch.relu(self.stem(x))
        x = self.blocks(x)
        return self.head(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = GMHResNet(X_train_proc.shape[1], num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-2)

# ===========================================
# 4️⃣ Training with Logit Adjustment
# ===========================================
for epoch in range(20):
    model.train()
    for X_b, y_b in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        logits = model(X_b)
        
        # Novelty: Applying logit adjustment in the loss to favor rare classes
        loss = nn.functional.cross_entropy(logits - 0.5 * logit_adj, y_b)
        
        loss.backward()
        optimizer.step()

# ===========================================
# 5️⃣ Evaluation
# ===========================================
model.eval()
all_p, all_y = [], []
with torch.no_grad():
    for X_b, y_b in test_loader:
        out = model(X_b.to(device))
        all_p.extend(torch.argmax(out, dim=1).cpu().numpy())
        all_y.extend(y_b.numpy())

print(classification_report(all_y, all_p, labels=np.arange(num_classes), target_names=le.classes_, zero_division=0))

Epoch 1: 100%|██████████| 247/247 [00:01<00:00, 171.57it/s]
Epoch 2: 100%|██████████| 247/247 [00:01<00:00, 157.15it/s]
Epoch 3: 100%|██████████| 247/247 [00:01<00:00, 178.38it/s]
Epoch 4: 100%|██████████| 247/247 [00:01<00:00, 158.70it/s]
Epoch 5: 100%|██████████| 247/247 [00:01<00:00, 178.12it/s]
Epoch 6: 100%|██████████| 247/247 [00:01<00:00, 158.17it/s]
Epoch 7: 100%|██████████| 247/247 [00:01<00:00, 159.23it/s]
Epoch 8: 100%|██████████| 247/247 [00:01<00:00, 173.63it/s]
Epoch 9: 100%|██████████| 247/247 [00:01<00:00, 159.86it/s]
Epoch 10: 100%|██████████| 247/247 [00:01<00:00, 176.42it/s]
Epoch 11: 100%|██████████| 247/247 [00:01<00:00, 159.17it/s]
Epoch 12: 100%|██████████| 247/247 [00:01<00:00, 178.02it/s]
Epoch 13: 100%|██████████| 247/247 [00:01<00:00, 158.61it/s]
Epoch 14: 100%|██████████| 247/247 [00:01<00:00, 178.35it/s]
Epoch 15: 100%|██████████| 247/247 [00:01<00:00, 155.84it/s]
Epoch 16: 100%|██████████| 247/247 [00:01<00:00, 173.40it/s]
Epoch 17: 100%|██████████| 247/24

                 precision    recall  f1-score   support

           back       1.00      0.74      0.85       359
buffer_overflow       0.00      0.00      0.00        20
      ftp_write       0.04      0.33      0.07         3
   guess_passwd       1.00      0.00      0.00      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.98      0.98      0.98       141
           land       1.00      1.00      1.00         7
     loadmodule       1.00      1.00      1.00         2
       multihop       0.00      0.00      0.00        18
        neptune       1.00      0.99      0.99      4657
           nmap       1.00      1.00      1.00        73
         normal       0.79      0.98      0.87      9711
           perl       0.50      0.50      0.50         2
            phf       0.50      0.50      0.50         2
            pod       0.72      0.95      0.82        41
      portsweep       0.74      0.89      0.81       157
        rootkit       0.00    

In [26]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
from tqdm import tqdm

# ===========================================
# 1️⃣ Stable Data Pipeline
# ===========================================
local_path = "/kaggle/input/nslkdd/" 
columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
    'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
    'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
    'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
    'outcome', 'level'
]

df_train = pd.read_csv(local_path + "KDDTrain+.txt", header=None, names=columns)
df_test  = pd.read_csv(local_path + "KDDTest+.txt", header=None, names=columns)

df_train['outcome'] = df_train['outcome'].astype(str)
df_test['outcome'] = df_test['outcome'].astype(str)
train_labels = set(df_train['outcome'].unique())
df_test = df_test[df_test['outcome'].isin(train_labels)].reset_index(drop=True)

target = 'outcome'
cat_cols = ['protocol_type', 'service', 'flag']
num_cols = [c for c in columns if c not in cat_cols + [target, 'level']]

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols)
])

X_train_proc = preprocessor.fit_transform(df_train.drop(columns=[target, 'level']))
X_test_proc  = preprocessor.transform(df_test.drop(columns=[target, 'level']))

le = LabelEncoder()
y_train_enc = le.fit_transform(df_train[target])
y_test_enc  = le.transform(df_test[target])
num_classes = len(le.classes_)

# ===========================================
# 2️⃣ Sampler & Logit Adjustment Setup
# ===========================================
class_counts = np.bincount(y_train_enc)
# Novelty: Logit adjustment term - log(class_frequency)
logit_adj = torch.tensor(class_counts + 1).float().log().to('cuda' if torch.cuda.is_available() else 'cpu')

class_weights = 1.0 / np.sqrt(class_counts + 1)
sampler = WeightedRandomSampler(weights=class_weights[y_train_enc], num_samples=len(y_train_enc), replacement=True)

train_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_train_proc, dtype=torch.float32), 
                                                        torch.tensor(y_train_enc, dtype=torch.long)), 
                          batch_size=512, sampler=sampler)
test_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_test_proc, dtype=torch.float32), 
                                                       torch.tensor(y_test_enc, dtype=torch.long)), 
                         batch_size=512, shuffle=False)

# ===========================================
# 3️⃣ Architecture: Gated Multi-Head Residual Net
# ===========================================
class GatedBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.fc = nn.Linear(dim, dim * 2)
        self.ln = nn.LayerNorm(dim)

    def forward(self, x):
        res = x
        x = self.fc(x)
        content, gate = x.chunk(2, dim=-1)
        x = content * torch.sigmoid(gate) # GLU Gating novelty
        return self.ln(x + res) # Residual Connection novelty

class GMHResNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.stem = nn.Linear(input_dim, 512)
        self.blocks = nn.Sequential(
            GatedBlock(512),
            nn.Linear(512, 256),
            GatedBlock(256),
            nn.Dropout(0.3)
        )
        self.head = nn.Linear(256, num_classes)

    def forward(self, x):
        x = torch.relu(self.stem(x))
        x = self.blocks(x)
        return self.head(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = GMHResNet(X_train_proc.shape[1], num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-2)

# ===========================================
# 4️⃣ Training with Logit Adjustment
# ===========================================
for epoch in range(20):
    model.train()
    for X_b, y_b in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        logits = model(X_b)
        
        # Applying Logit Adjustment in loss calculation
        loss = nn.functional.cross_entropy(logits - 0.5 * logit_adj, y_b)
        
        loss.backward()
        optimizer.step()

# ===========================================
# 5️⃣ Evaluation
# ===========================================
model.eval()
all_p, all_y = [], []
with torch.no_grad():
    for X_b, y_b in test_loader:
        out = model(X_b.to(device))
        all_p.extend(torch.argmax(out, dim=1).cpu().numpy())
        all_y.extend(y_b.numpy())

print(classification_report(all_y, all_p, labels=np.arange(num_classes), target_names=le.classes_, zero_division=0))

Epoch 1: 100%|██████████| 247/247 [00:01<00:00, 176.64it/s]
Epoch 2: 100%|██████████| 247/247 [00:01<00:00, 154.03it/s]
Epoch 3: 100%|██████████| 247/247 [00:01<00:00, 172.10it/s]
Epoch 4: 100%|██████████| 247/247 [00:01<00:00, 155.95it/s]
Epoch 5: 100%|██████████| 247/247 [00:01<00:00, 175.88it/s]
Epoch 6: 100%|██████████| 247/247 [00:01<00:00, 157.34it/s]
Epoch 7: 100%|██████████| 247/247 [00:01<00:00, 176.42it/s]
Epoch 8: 100%|██████████| 247/247 [00:01<00:00, 155.08it/s]
Epoch 9: 100%|██████████| 247/247 [00:01<00:00, 177.62it/s]
Epoch 10: 100%|██████████| 247/247 [00:01<00:00, 157.68it/s]
Epoch 11: 100%|██████████| 247/247 [00:01<00:00, 176.71it/s]
Epoch 12: 100%|██████████| 247/247 [00:01<00:00, 156.82it/s]
Epoch 13: 100%|██████████| 247/247 [00:01<00:00, 173.46it/s]
Epoch 14: 100%|██████████| 247/247 [00:01<00:00, 157.72it/s]
Epoch 15: 100%|██████████| 247/247 [00:01<00:00, 156.00it/s]
Epoch 16: 100%|██████████| 247/247 [00:01<00:00, 176.29it/s]
Epoch 17: 100%|██████████| 247/24

                 precision    recall  f1-score   support

           back       0.99      0.91      0.95       359
buffer_overflow       0.80      0.20      0.32        20
      ftp_write       0.02      0.67      0.04         3
   guess_passwd       0.00      0.00      0.00      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.95      0.98      0.97       141
           land       1.00      1.00      1.00         7
     loadmodule       1.00      1.00      1.00         2
       multihop       0.00      0.00      0.00        18
        neptune       0.98      1.00      0.99      4657
           nmap       1.00      0.99      0.99        73
         normal       0.81      0.97      0.88      9711
           perl       0.50      0.50      0.50         2
            phf       0.50      0.50      0.50         2
            pod       0.72      0.95      0.82        41
      portsweep       0.61      0.96      0.75       157
        rootkit       0.00    

In [28]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
from tqdm import tqdm

# ===========================================
# 1️⃣ Stable Pipeline: Data & Mapping
# ===========================================
local_path = "/kaggle/input/nslkdd/" 
columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
    'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
    'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
    'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
    'outcome', 'level'
]

df_train = pd.read_csv(local_path + "KDDTrain+.txt", header=None, names=columns)
df_test  = pd.read_csv(local_path + "KDDTest+.txt", header=None, names=columns)

df_train['outcome'] = df_train['outcome'].astype(str)
df_test['outcome'] = df_test['outcome'].astype(str)
train_labels = set(df_train['outcome'].unique())
df_test = df_test[df_test['outcome'].isin(train_labels)].reset_index(drop=True)

target = 'outcome'
cat_cols = ['protocol_type', 'service', 'flag']
num_cols = [c for c in columns if c not in cat_cols + [target, 'level']]

# Stable ColumnTransformer
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_cols)
])

X_train_proc = preprocessor.fit_transform(df_train.drop(columns=[target, 'level']))
X_test_proc  = preprocessor.transform(df_test.drop(columns=[target, 'level']))

le = LabelEncoder()
y_train_enc = le.fit_transform(df_train[target])
y_test_enc  = le.transform(df_test[target])
num_classes = len(le.classes_)

# ===========================================
# 2️⃣ Balanced Batch Sampling & Logit Adj
# ===========================================
class_counts = np.bincount(y_train_enc)
# Novelty: Logit Adjustment shifts boundary for R2L/U2R
logit_adj = torch.tensor(class_counts + 1).float().log().to('cuda' if torch.cuda.is_available() else 'cpu')

# Stronger balancing (1/n) to force minority class learning
class_weights = 1.0 / (class_counts + 1e-6)
sample_weights = class_weights[y_train_enc]

sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(y_train_enc), replacement=True)

train_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_train_proc, dtype=torch.float32), 
                                                        torch.tensor(y_train_enc, dtype=torch.long)), 
                          batch_size=512, sampler=sampler)
test_loader = DataLoader(torch.utils.data.TensorDataset(torch.tensor(X_test_proc, dtype=torch.float32), 
                                                       torch.tensor(y_test_enc, dtype=torch.long)), 
                         batch_size=512, shuffle=False)

# ===========================================
# 3️⃣ Architecture: Gated Residual Network
# ===========================================
class GLUBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.fc = nn.Linear(dim, dim * 2)
        self.ln = nn.LayerNorm(dim)
    def forward(self, x):
        res = x
        x = self.fc(x)
        content, gate = x.chunk(2, dim=-1)
        x = content * torch.sigmoid(gate)
        return self.ln(x + res) # Residual connection

class GDPRNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.stem = nn.Linear(input_dim, 1024)
        self.blocks = nn.Sequential(
            GLUBlock(1024),
            nn.Linear(1024, 512),
            GLUBlock(512),
            nn.Dropout(0.4)
        )
        self.head = nn.Linear(512, num_classes)
    def forward(self, x):
        x = F.gelu(self.stem(x))
        x = self.blocks(x)
        return self.head(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = GDPRNet(X_train_proc.shape[1], num_classes).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.05)

# ===========================================
# 4️⃣ Training with Boundary Refinement
# ===========================================
for epoch in range(25):
    model.train()
    for X_b, y_b in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        X_b, y_b = X_b.to(device), y_b.to(device)
        optimizer.zero_grad()
        logits = model(X_b)
        # Novelty: Logit Adjustment in CrossEntropy
        loss = F.cross_entropy(logits - 0.5 * logit_adj, y_b, label_smoothing=0.1)
        loss.backward()
        optimizer.step()

# ===========================================
# 5️⃣ Q1 Evaluation
# ===========================================
model.eval()
all_p, all_y = [], []
with torch.no_grad():
    for X_b, y_b in test_loader:
        out = model(X_b.to(device))
        all_p.extend(torch.argmax(out, dim=1).cpu().numpy())
        all_y.extend(y_b.numpy())

print(classification_report(all_y, all_p, labels=np.arange(num_classes), 
                            target_names=le.classes_, zero_division=0))

Epoch 1: 100%|██████████| 247/247 [00:01<00:00, 149.06it/s]
Epoch 2: 100%|██████████| 247/247 [00:01<00:00, 171.27it/s]
Epoch 3: 100%|██████████| 247/247 [00:01<00:00, 151.38it/s]
Epoch 4: 100%|██████████| 247/247 [00:01<00:00, 170.72it/s]
Epoch 5: 100%|██████████| 247/247 [00:01<00:00, 149.25it/s]
Epoch 6: 100%|██████████| 247/247 [00:01<00:00, 169.23it/s]
Epoch 7: 100%|██████████| 247/247 [00:01<00:00, 148.59it/s]
Epoch 8: 100%|██████████| 247/247 [00:01<00:00, 167.85it/s]
Epoch 9: 100%|██████████| 247/247 [00:01<00:00, 151.14it/s]
Epoch 10: 100%|██████████| 247/247 [00:01<00:00, 168.81it/s]
Epoch 11: 100%|██████████| 247/247 [00:01<00:00, 153.15it/s]
Epoch 12: 100%|██████████| 247/247 [00:01<00:00, 150.18it/s]
Epoch 13: 100%|██████████| 247/247 [00:01<00:00, 170.31it/s]
Epoch 14: 100%|██████████| 247/247 [00:01<00:00, 153.70it/s]
Epoch 15: 100%|██████████| 247/247 [00:01<00:00, 170.84it/s]
Epoch 16: 100%|██████████| 247/247 [00:01<00:00, 153.91it/s]
Epoch 17: 100%|██████████| 247/24

                 precision    recall  f1-score   support

           back       0.47      1.00      0.64       359
buffer_overflow       0.86      0.30      0.44        20
      ftp_write       0.06      0.33      0.11         3
   guess_passwd       1.00      0.00      0.00      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.85      0.96      0.90       141
           land       1.00      1.00      1.00         7
     loadmodule       1.00      1.00      1.00         2
       multihop       0.00      0.00      0.00        18
        neptune       0.99      1.00      1.00      4657
           nmap       0.95      0.99      0.97        73
         normal       0.80      0.93      0.86      9711
           perl       0.50      0.50      0.50         2
            phf       0.50      0.50      0.50         2
            pod       0.72      0.95      0.82        41
      portsweep       0.76      0.87      0.81       157
        rootkit       0.02    