In [13]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nsl-kdd-augmented/smote_augmented.csv
/kaggle/input/nslkdd/KDDTest+.arff
/kaggle/input/nslkdd/KDDTest-21.arff
/kaggle/input/nslkdd/KDDTest1.jpg
/kaggle/input/nslkdd/KDDTrain+.txt
/kaggle/input/nslkdd/KDDTrain+_20Percent.txt
/kaggle/input/nslkdd/KDDTest-21.txt
/kaggle/input/nslkdd/KDDTest+.txt
/kaggle/input/nslkdd/KDDTrain+.arff
/kaggle/input/nslkdd/index.html
/kaggle/input/nslkdd/KDDTrain+_20Percent.arff
/kaggle/input/nslkdd/KDDTrain1.jpg
/kaggle/input/nslkdd/nsl-kdd/KDDTest+.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTest-21.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTest1.jpg
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+_20Percent.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTest-21.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTest+.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+.arff
/kaggle/input/nslkdd/nsl-kdd/index.html
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+_20Percent.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTrain1.jpg


In [48]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import numpy as np

# ===========================================
# 1️⃣ Path A: The PSMG Specialist (Updated Logic)
# ===========================================
def final_psmg_fusion(X_proc, df_orig):
    model_lso.eval()
    with torch.no_grad():
        logits = model_lso(torch.tensor(X_proc, dtype=torch.float32).to(device))
        # Tempering: Sharpen the attack signals
        probs_l = torch.softmax(logits * 1.5, dim=1).cpu().numpy()
    
    probs_x = anchor_model.predict_proba(X_proc)
    final_preds = []
    
    # Pre-calculated Indices
    idx_back = le.transform(['back'])[0]
    u2r_idx = [le.transform([c])[0] for c in ['rootkit', 'buffer_overflow', 'loadmodule', 'perl']]
    r2l_idx = [le.transform([c])[0] for c in ['guess_passwd', 'warezmaster', 'ftp_write']]

    for i in range(len(X_proc)):
        p_l = probs_l[i]
        p_x = probs_x[i]
        protocol = df_orig['protocol_type'].iloc[i]
        
        # --- RULE 1: THE DOS ANCHOR (Fixing 'Back') ---
        # If XGBoost is very confident in 'back' and bytes are high, LOCK IT.
        if p_x[idx_back] > 0.4 and df_orig['src_bytes'].iloc[i] > 5000:
            final_preds.append(idx_back)
            
        # --- RULE 2: U2R PROTOCOL GUARD ---
        # User-to-Root usually involves specific flags
        elif (df_orig['root_shell'].iloc[i] > 0 or df_orig['num_root'].iloc[i] > 0) and protocol == 'tcp':
            final_preds.append(u2r_idx[np.argmax(p_l[u2r_idx])])
            
        # --- RULE 3: R2L CONTENT GUARD ---
        elif (df_orig['num_failed_logins'].iloc[i] > 0 or df_orig['hot'].iloc[i] > 0):
            # Pick the strongest R2L signal
            final_preds.append(r2l_idx[np.argmax(p_l[r2l_idx])])
            
        # --- RULE 4: STABILITY GATE ---
        elif np.max(p_x) > 0.95:
            final_preds.append(np.argmax(p_x))
            
        # --- DEFAULT: Weighted Majority ---
        else:
            # Shift balance toward Specialist for residual samples
            final_preds.append(np.argmax(0.5 * p_x + 0.5 * p_l))
            
    return np.array(final_preds)

# ===========================================
# 2️⃣ Execution
# ===========================================
print("Executing PSMG-Net Final Fusion...")
final_preds = final_psmg_fusion(X_test_proc, df_test)

unique_test_classes = np.unique(np.concatenate([y_test_enc, final_preds]))
target_names = [le.classes_[i] for i in unique_test_classes]

print("\n--- PSMG-Net Q1 FINAL RESULTS ---")
print(classification_report(y_test_enc, final_preds, 
                            labels=unique_test_classes, 
                            target_names=target_names, 
                            zero_division=0))

Executing PSMG-Net Final Fusion...

--- PSMG-Net Q1 FINAL RESULTS ---
                 precision    recall  f1-score   support

           back       1.00      0.95      0.98       359
buffer_overflow       0.12      0.05      0.07        20
      ftp_write       0.01      0.33      0.02         3
   guess_passwd       0.85      0.38      0.53      1231
           imap       0.00      0.00      0.00         1
        ipsweep       0.99      0.99      0.99       141
           land       1.00      1.00      1.00         7
     loadmodule       0.00      0.00      0.00         2
       multihop       0.00      0.00      0.00        18
        neptune       1.00      1.00      1.00      4657
           nmap       1.00      1.00      1.00        73
         normal       0.90      0.97      0.93      9711
           perl       0.20      0.50      0.29         2
            phf       0.00      0.00      0.00         2
            pod       0.72      0.95      0.82        41
      portsweep  