In [13]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/nsl-kdd-augmented/smote_augmented.csv
/kaggle/input/nslkdd/KDDTest+.arff
/kaggle/input/nslkdd/KDDTest-21.arff
/kaggle/input/nslkdd/KDDTest1.jpg
/kaggle/input/nslkdd/KDDTrain+.txt
/kaggle/input/nslkdd/KDDTrain+_20Percent.txt
/kaggle/input/nslkdd/KDDTest-21.txt
/kaggle/input/nslkdd/KDDTest+.txt
/kaggle/input/nslkdd/KDDTrain+.arff
/kaggle/input/nslkdd/index.html
/kaggle/input/nslkdd/KDDTrain+_20Percent.arff
/kaggle/input/nslkdd/KDDTrain1.jpg
/kaggle/input/nslkdd/nsl-kdd/KDDTest+.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTest-21.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTest1.jpg
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+_20Percent.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTest-21.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTest+.txt
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+.arff
/kaggle/input/nslkdd/nsl-kdd/index.html
/kaggle/input/nslkdd/nsl-kdd/KDDTrain+_20Percent.arff
/kaggle/input/nslkdd/nsl-kdd/KDDTrain1.jpg


In [14]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from sklearn.preprocessing import RobustScaler, LabelEncoder
from sklearn.metrics import classification_report, f1_score
from sklearn.utils.class_weight import compute_class_weight
from tqdm import tqdm

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

columns = [
    'duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
    'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
    'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells',
    'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count',
    'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
    'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
    'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
    'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
    'outcome', 'level'
]

# ===========================================
# CRITICAL: LOAD ORIGINAL DATA FIRST
# ===========================================
print("Loading ORIGINAL unbalanced training data...")

# You'll need to load your original training data before SMOTE
# If you don't have it, we can use a subset of SMOTE data with duplicates removed
# For now, I'll assume you have the original file - adjust path as needed
try:
    df_train_orig = pd.read_csv("/kaggle/input/nslkdd/KDDTrain+.txt", header=None)
    df_train_orig.columns = columns
    print("✓ Loaded original training data")
    use_original = True
except:
    print("⚠ Original data not found, will use SMOTE with weighting")
    use_original = False

df_train_smote = pd.read_csv("/kaggle/input/nsl-kdd-augmented/smote_augmented.csv")
df_test = pd.read_csv("/kaggle/input/nslkdd/KDDTest+.txt", header=None)
df_test.columns = columns

# Filter test to only classes in training
if use_original:
    train_labels = set(df_train_orig['outcome'].unique())
else:
    train_labels = set(df_train_smote['outcome'].unique())
    
df_test = df_test[df_test['outcome'].isin(train_labels)].reset_index(drop=True)

cat_cols = ['protocol_type', 'service', 'flag']
num_cols = [c for c in df_train_smote.columns if c not in cat_cols + ['outcome', 'level']]

# ===========================================
# PREPROCESSING
# ===========================================

def preprocess_data(df, le_cat_dict=None, le_target=None, scaler=None, fit=False):
    """Preprocess data with optional fitting"""
    df = df.copy()
    
    # Categorical encoding
    if fit:
        le_cat_dict = {}
        cat_dims = []
        for col in cat_cols:
            le_c = LabelEncoder()
            df[col] = le_c.fit_transform(df[col].astype(str))
            le_cat_dict[col] = le_c
            cat_dims.append(len(le_c.classes_))
    else:
        cat_dims = []
        for col in cat_cols:
            train_classes = {cls: i for i, cls in enumerate(le_cat_dict[col].classes_)}
            df[col] = df[col].map(lambda x: train_classes.get(str(x), 0))
            cat_dims.append(len(le_cat_dict[col].classes_))
    
    # Numerical scaling
    if fit:
        scaler = RobustScaler()
        X_num = scaler.fit_transform(df[num_cols]).astype(np.float32)
    else:
        X_num = scaler.transform(df[num_cols]).astype(np.float32)
    
    # Target encoding
    if fit:
        le_target = LabelEncoder()
        y = le_target.fit_transform(df['outcome'])
    else:
        y = le_target.transform(df['outcome'])
    
    X_cat = df[cat_cols].values
    
    if fit:
        return X_cat, X_num, y, cat_dims, le_cat_dict, le_target, scaler
    else:
        return X_cat, X_num, y

# Fit on SMOTE data (has all classes)
X_cat_smote, X_num_smote, y_smote, cat_dims, le_cat_dict, le_target, scaler = \
    preprocess_data(df_train_smote, fit=True)

if use_original:
    X_cat_orig, X_num_orig, y_orig = preprocess_data(df_train_orig, le_cat_dict, le_target, scaler, fit=False)
    
X_cat_test, X_num_test, y_test = preprocess_data(df_test, le_cat_dict, le_target, scaler, fit=False)

num_classes = len(le_target.classes_)

if use_original:
    orig_class_counts = np.bincount(y_orig, minlength=num_classes)
    print("\nOriginal data distribution:")
    for i, (name, count) in enumerate(zip(le_target.classes_, orig_class_counts)):
        print(f"{name:20s}: {count:8d}")

test_class_counts = np.bincount(y_test, minlength=num_classes)
print(f"\nTest samples: {len(y_test)}")

# ===========================================
# ENSEMBLE ARCHITECTURE
# ===========================================

class AttentiveResidualBlock(nn.Module):
    """Residual block with channel attention"""
    def __init__(self, dim, dropout=0.3):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, dim * 2),
            nn.BatchNorm1d(dim * 2),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(dim * 2, dim),
            nn.BatchNorm1d(dim)
        )
        # Channel attention
        self.attention = nn.Sequential(
            nn.Linear(dim, dim // 4),
            nn.ReLU(),
            nn.Linear(dim // 4, dim),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        residual = x
        out = self.net(x)
        # Apply attention
        attn = self.attention(out)
        out = out * attn
        return F.gelu(out + residual)

class HybridNet(nn.Module):
    """
    Architecture optimized for hybrid training:
    1. Learn from real data first
    2. Fine-tune with synthetic augmentation
    """
    def __init__(self, cat_dims, num_feat_dim, num_classes, emb_dim=48, hidden_dim=384):
        super().__init__()
        
        # Categorical embeddings
        self.embeddings = nn.ModuleList([
            nn.Embedding(d, emb_dim) for d in cat_dims
        ])
        cat_total = emb_dim * len(cat_dims)
        
        # Input layers
        self.cat_proj = nn.Sequential(
            nn.BatchNorm1d(cat_total),
            nn.Linear(cat_total, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(0.3)
        )
        
        self.num_proj = nn.Sequential(
            nn.BatchNorm1d(num_feat_dim),
            nn.Linear(num_feat_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(0.3)
        )
        
        # Fusion
        self.fusion = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.GELU(),
            nn.Dropout(0.3)
        )
        
        # Deep blocks with attention
        self.blocks = nn.ModuleList([
            AttentiveResidualBlock(hidden_dim, dropout=0.35) for _ in range(5)
        ])
        
        # Multi-head output
        self.head1 = nn.Sequential(
            nn.Dropout(0.4),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.BatchNorm1d(hidden_dim // 2),
            nn.GELU(),
            nn.Dropout(0.4),
            nn.Linear(hidden_dim // 2, num_classes)
        )
        
        self.head2 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(hidden_dim, num_classes)
        )
        
    def forward(self, x_cat, x_num, return_features=False):
        # Process inputs
        cat_emb = torch.cat([emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)], dim=1)
        cat_feat = self.cat_proj(cat_emb)
        num_feat = self.num_proj(x_num)
        
        # Fuse
        x = torch.cat([cat_feat, num_feat], dim=1)
        x = self.fusion(x)
        
        # Deep processing
        for block in self.blocks:
            x = block(x)
        
        # Two heads for ensemble
        out1 = self.head1(x)
        out2 = self.head2(x)
        
        if return_features:
            return out1, out2, x
        return out1, out2

# ===========================================
# ADVANCED FOCAL LOSS
# ===========================================

class AdaptiveFocalLoss(nn.Module):
    """Focal loss with adaptive gamma based on class difficulty"""
    def __init__(self, class_counts, base_gamma=2.0):
        super().__init__()
        # Compute class weights
        weights = 1.0 / np.power(class_counts, 0.5)
        weights = weights / weights.min()
        self.weights = torch.tensor(weights, dtype=torch.float32)
        self.base_gamma = base_gamma
        
    def forward(self, pred, target):
        self.weights = self.weights.to(pred.device)
        
        ce_loss = F.cross_entropy(pred, target, reduction='none', weight=self.weights)
        pt = torch.exp(-ce_loss)
        
        # Adaptive gamma: harder samples get higher gamma
        gamma = self.base_gamma + (1 - pt)
        focal_loss = ((1 - pt) ** gamma) * ce_loss
        
        return focal_loss.mean()

# ===========================================
# DATASET
# ===========================================

class NSLDataset(Dataset):
    def __init__(self, c, n, y, is_synthetic=False):
        self.c = torch.tensor(c, dtype=torch.long)
        self.n = torch.tensor(n, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
        self.is_synthetic = is_synthetic
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, i):
        return self.c[i], self.n[i], self.y[i]

# ===========================================
# HYBRID TRAINING STRATEGY
# ===========================================

def train_phase(model, train_loader, criterion, optimizer, scheduler, phase_name):
    """Train for one phase"""
    model.train()
    total_loss = 0
    
    pbar = tqdm(train_loader, desc=phase_name)
    for xc, xn, y in pbar:
        xc, xn, y = xc.to(DEVICE), xn.to(DEVICE), y.to(DEVICE)
        
        optimizer.zero_grad()
        out1, out2 = model(xc, xn)
        
        # Loss on both heads
        loss1 = criterion(out1, y)
        loss2 = criterion(out2, y)
        
        # Consistency loss
        loss_consist = F.kl_div(F.log_softmax(out1, dim=1), F.softmax(out2, dim=1), reduction='batchmean')
        
        loss = loss1 + 0.5 * loss2 + 0.2 * loss_consist
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        if scheduler is not None:
            scheduler.step()
        
        total_loss += loss.item()
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    return total_loss / len(train_loader)

def evaluate(model, test_loader, le_target):
    """Evaluate model"""
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for xc, xn, y in test_loader:
            xc, xn = xc.to(DEVICE), xn.to(DEVICE)
            out1, out2 = model(xc, xn)
            # Ensemble both heads
            ensemble = (out1 + out2) / 2
            preds = torch.argmax(ensemble, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(y.numpy())
    
    macro_f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
    weighted_f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    
    return macro_f1, weighted_f1, all_preds, all_labels

# ===========================================
# MAIN TRAINING
# ===========================================

model = HybridNet(
    cat_dims=cat_dims,
    num_feat_dim=X_num_smote.shape[1],
    num_classes=num_classes,
    emb_dim=48,
    hidden_dim=384
).to(DEVICE)

print(f"\nModel parameters: {sum(p.numel() for p in model.parameters()):,}")

# Create datasets
test_dataset = NSLDataset(X_cat_test, X_num_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=1024, shuffle=False, num_workers=2)

best_macro_f1 = 0.0
best_model_state = None

print("\n" + "="*70)
print("HYBRID TRAINING STRATEGY")
print("="*70)

if use_original:
    # PHASE 1: Train on original real data with extreme focal loss
    print("\n>>> PHASE 1: Learning from REAL data (30 epochs)")
    print("="*70)
    
    orig_dataset = NSLDataset(X_cat_orig, X_num_orig, y_orig, is_synthetic=False)
    orig_loader = DataLoader(orig_dataset, batch_size=256, shuffle=True, num_workers=2)
    
    criterion_phase1 = AdaptiveFocalLoss(orig_class_counts, base_gamma=3.0)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-3)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30, eta_min=1e-6)
    
    for epoch in range(30):
        train_loss = train_phase(model, orig_loader, criterion_phase1, optimizer, scheduler, 
                                 f"Phase 1 - Epoch {epoch+1}/30")
        
        if (epoch + 1) % 5 == 0:
            macro_f1, weighted_f1, _, _ = evaluate(model, test_loader, le_target)
            print(f"  Validation - Macro F1: {macro_f1:.4f}, Weighted F1: {weighted_f1:.4f}")
            
            if macro_f1 > best_macro_f1:
                best_macro_f1 = macro_f1
                best_model_state = model.state_dict().copy()

# PHASE 2: Fine-tune with SMOTE data
print("\n>>> PHASE 2: Fine-tuning with SMOTE augmentation (40 epochs)")
print("="*70)

smote_dataset = NSLDataset(X_cat_smote, X_num_smote, y_smote, is_synthetic=True)

# Mix original (if available) and SMOTE with 30-70 ratio
if use_original and len(y_orig) > 0:
    # Sample original data
    orig_indices = np.random.choice(len(y_orig), size=min(len(y_orig), len(y_smote) // 2), replace=False)
    orig_subset = NSLDataset(X_cat_orig[orig_indices], X_num_orig[orig_indices], 
                            y_orig[orig_indices], is_synthetic=False)
    mixed_dataset = ConcatDataset([orig_subset, smote_dataset])
    print(f"Mixed dataset: {len(orig_subset)} original + {len(smote_dataset)} SMOTE")
else:
    mixed_dataset = smote_dataset

mixed_loader = DataLoader(mixed_dataset, batch_size=512, shuffle=True, num_workers=2)

smote_class_counts = np.bincount(y_smote, minlength=num_classes)
criterion_phase2 = AdaptiveFocalLoss(smote_class_counts, base_gamma=2.0)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=40, eta_min=1e-7)

patience = 15
patience_counter = 0

for epoch in range(40):
    train_loss = train_phase(model, mixed_loader, criterion_phase2, optimizer, scheduler,
                             f"Phase 2 - Epoch {epoch+1}/40")
    
    macro_f1, weighted_f1, all_preds, all_labels = evaluate(model, test_loader, le_target)
    
    per_class_f1 = f1_score(all_labels, all_preds, average=None, zero_division=0, labels=range(num_classes))
    testable = per_class_f1[test_class_counts > 0]
    good_classes = (testable > 0.3).sum()
    
    print(f"\n  Train Loss: {train_loss:.4f}")
    print(f"  Macro F1: {macro_f1:.4f} | Weighted F1: {weighted_f1:.4f}")
    print(f"  Classes F1 > 0.3: {good_classes}/{len(testable)} | Mean: {testable.mean():.4f}")
    
    if macro_f1 > best_macro_f1:
        best_macro_f1 = macro_f1
        best_model_state = model.state_dict().copy()
        patience_counter = 0
        print(f"  ✓ New best Macro F1: {best_macro_f1:.4f}")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"\nEarly stopping")
            break

# ===========================================
# FINAL EVALUATION
# ===========================================

print("\n" + "="*70)
print("FINAL EVALUATION")
print("="*70)

model.load_state_dict(best_model_state)
_, _, all_preds, all_labels = evaluate(model, test_loader, le_target)

labels_in_test = sorted(list(set(all_labels)))
target_names_filtered = [le_target.classes_[i] for i in labels_in_test]

print("\nCLASSIFICATION REPORT:")
print("="*70)
print(classification_report(all_labels, all_preds, labels=labels_in_test,
                          target_names=target_names_filtered, zero_division=0, digits=4))

print("\n" + "="*70)
print(f"BEST MACRO F1: {best_macro_f1:.4f}")
print("="*70)

Using device: cuda
Loading ORIGINAL unbalanced training data...
✓ Loaded original training data

Original data distribution:
back                :      956
buffer_overflow     :       30
ftp_write           :        8
guess_passwd        :       53
imap                :       11
ipsweep             :     3599
land                :       18
loadmodule          :        9
multihop            :        7
neptune             :    41214
nmap                :     1493
normal              :    67343
perl                :        3
phf                 :        4
pod                 :      201
portsweep           :     2931
rootkit             :       10
satan               :     3633
smurf               :     2646
spy                 :        2
teardrop            :      892
warezclient         :      890
warezmaster         :       20

Test samples: 18794

Model parameters: 3,614,138

HYBRID TRAINING STRATEGY

>>> PHASE 1: Learning from REAL data (30 epochs)


Phase 1 - Epoch 1/30: 100%|██████████| 493/493 [00:08<00:00, 60.21it/s, loss=0.0538]
Phase 1 - Epoch 2/30: 100%|██████████| 493/493 [00:08<00:00, 60.32it/s, loss=0.3026]
Phase 1 - Epoch 3/30: 100%|██████████| 493/493 [00:08<00:00, 59.55it/s, loss=1.3090]
Phase 1 - Epoch 4/30: 100%|██████████| 493/493 [00:08<00:00, 60.12it/s, loss=0.3785]
Phase 1 - Epoch 5/30: 100%|██████████| 493/493 [00:08<00:00, 60.89it/s, loss=0.1711]


  Validation - Macro F1: 0.4791, Weighted F1: 0.7989


Phase 1 - Epoch 6/30: 100%|██████████| 493/493 [00:08<00:00, 60.02it/s, loss=0.0279]
Phase 1 - Epoch 7/30: 100%|██████████| 493/493 [00:08<00:00, 59.63it/s, loss=0.0178]
Phase 1 - Epoch 8/30: 100%|██████████| 493/493 [00:08<00:00, 60.05it/s, loss=0.0084]
Phase 1 - Epoch 9/30: 100%|██████████| 493/493 [00:08<00:00, 60.22it/s, loss=0.0265]
Phase 1 - Epoch 10/30: 100%|██████████| 493/493 [00:08<00:00, 60.28it/s, loss=nan]   


  Validation - Macro F1: 0.0018, Weighted F1: 0.0007


Phase 1 - Epoch 11/30: 100%|██████████| 493/493 [00:08<00:00, 59.65it/s, loss=nan]
Phase 1 - Epoch 12/30: 100%|██████████| 493/493 [00:08<00:00, 60.07it/s, loss=nan]
Phase 1 - Epoch 13/30: 100%|██████████| 493/493 [00:08<00:00, 60.10it/s, loss=nan]
Phase 1 - Epoch 14/30: 100%|██████████| 493/493 [00:08<00:00, 59.41it/s, loss=nan]
Phase 1 - Epoch 15/30: 100%|██████████| 493/493 [00:08<00:00, 60.60it/s, loss=nan]


  Validation - Macro F1: 0.0018, Weighted F1: 0.0007


Phase 1 - Epoch 16/30: 100%|██████████| 493/493 [00:08<00:00, 59.44it/s, loss=nan]
Phase 1 - Epoch 17/30: 100%|██████████| 493/493 [00:08<00:00, 60.91it/s, loss=nan]
Phase 1 - Epoch 18/30: 100%|██████████| 493/493 [00:08<00:00, 59.84it/s, loss=nan]
Phase 1 - Epoch 19/30: 100%|██████████| 493/493 [00:08<00:00, 60.08it/s, loss=nan]
Phase 1 - Epoch 20/30: 100%|██████████| 493/493 [00:08<00:00, 60.09it/s, loss=nan]


  Validation - Macro F1: 0.0018, Weighted F1: 0.0007


Phase 1 - Epoch 21/30: 100%|██████████| 493/493 [00:08<00:00, 59.84it/s, loss=nan]
Phase 1 - Epoch 22/30: 100%|██████████| 493/493 [00:08<00:00, 60.85it/s, loss=nan]
Phase 1 - Epoch 23/30: 100%|██████████| 493/493 [00:08<00:00, 60.25it/s, loss=nan]
Phase 1 - Epoch 24/30: 100%|██████████| 493/493 [00:08<00:00, 60.43it/s, loss=nan]
Phase 1 - Epoch 25/30: 100%|██████████| 493/493 [00:08<00:00, 59.15it/s, loss=nan]


  Validation - Macro F1: 0.0018, Weighted F1: 0.0007


Phase 1 - Epoch 26/30: 100%|██████████| 493/493 [00:08<00:00, 60.19it/s, loss=nan]
Phase 1 - Epoch 27/30: 100%|██████████| 493/493 [00:08<00:00, 60.47it/s, loss=nan]
Phase 1 - Epoch 28/30: 100%|██████████| 493/493 [00:08<00:00, 60.50it/s, loss=nan]
Phase 1 - Epoch 29/30: 100%|██████████| 493/493 [00:08<00:00, 59.66it/s, loss=nan]
Phase 1 - Epoch 30/30: 100%|██████████| 493/493 [00:08<00:00, 60.43it/s, loss=nan]


  Validation - Macro F1: 0.0018, Weighted F1: 0.0007

>>> PHASE 2: Fine-tuning with SMOTE augmentation (40 epochs)
Mixed dataset: 125973 original + 557934 SMOTE


Phase 2 - Epoch 1/40: 100%|██████████| 1336/1336 [00:23<00:00, 56.82it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 2/40: 100%|██████████| 1336/1336 [00:23<00:00, 56.48it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 3/40: 100%|██████████| 1336/1336 [00:23<00:00, 56.07it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 4/40: 100%|██████████| 1336/1336 [00:23<00:00, 56.59it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 5/40: 100%|██████████| 1336/1336 [00:23<00:00, 57.65it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 6/40: 100%|██████████| 1336/1336 [00:23<00:00, 56.32it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 7/40: 100%|██████████| 1336/1336 [00:23<00:00, 57.04it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 8/40: 100%|██████████| 1336/1336 [00:23<00:00, 56.93it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 9/40: 100%|██████████| 1336/1336 [00:23<00:00, 56.53it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 10/40: 100%|██████████| 1336/1336 [00:23<00:00, 57.06it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 11/40: 100%|██████████| 1336/1336 [00:23<00:00, 57.40it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 12/40: 100%|██████████| 1336/1336 [00:23<00:00, 57.41it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 13/40: 100%|██████████| 1336/1336 [00:23<00:00, 57.39it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 14/40: 100%|██████████| 1336/1336 [00:23<00:00, 56.50it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018


Phase 2 - Epoch 15/40: 100%|██████████| 1336/1336 [00:23<00:00, 56.67it/s, loss=nan]



  Train Loss: nan
  Macro F1: 0.0018 | Weighted F1: 0.0007
  Classes F1 > 0.3: 0/21 | Mean: 0.0018

Early stopping

FINAL EVALUATION

CLASSIFICATION REPORT:
                 precision    recall  f1-score   support

           back     0.0191    1.0000    0.0375       359
buffer_overflow     0.0000    0.0000    0.0000        20
      ftp_write     0.0000    0.0000    0.0000         3
   guess_passwd     0.0000    0.0000    0.0000      1231
           imap     0.0000    0.0000    0.0000         1
        ipsweep     0.0000    0.0000    0.0000       141
           land     0.0000    0.0000    0.0000         7
     loadmodule     0.0000    0.0000    0.0000         2
       multihop     0.0000    0.0000    0.0000        18
        neptune     0.0000    0.0000    0.0000      4657
           nmap     0.0000    0.0000    0.0000        73
         normal     0.0000    0.0000    0.0000      9711
           perl     0.0000    0.0000    0.0000         2
            phf     0.0000    0.0000    0.0