In [1]:
import os
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.data import Subset
import torch.optim as optim
import torch.nn.functional as F

In [2]:
# === CONFIG ===
INPUT_DIR = "/home/HardDisk/Satang/thesis_proj/New_45/15/split_tws/X_csv_split_31"
CHUNK_SIZE = 31
NUM_FEATURES = 8
NUM_EPOCHS = 50
BATCH_SIZE = 32
K_FOLDS = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# === HELPERS ===
def collate_fn(batch):
    streams, labels = zip(*batch)
    streams = list(zip(*streams))
    streams = [torch.stack(s) for s in streams]
    labels = torch.tensor(labels)
    return streams, labels

def compute_class_weights(labels, device):
    counter = Counter(labels)
    total = sum(counter.values())
    weights = [np.log(total / (counter[i] + 1)) for i in range(len(counter))]
    return torch.tensor(weights, dtype=torch.float).to(device)

def print_model_info(model):
    print("Model Architecture:")
    print(model)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Total parameters: {total_params:,}")

def plot_loss_curve(train_losses, val_losses):
    epochs = range(len(train_losses))
    plt.figure(figsize=(10, 5))
    plt.plot(epochs, train_losses, label='Train Loss', color='blue')
    plt.plot(epochs, val_losses, label='Val Loss', color='orange')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training vs Validation Loss')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def plot_accuracy_curve(train_acc, val_acc):
    epochs = range(len(train_acc))
    plt.figure(figsize=(10, 5))
    plt.plot(epochs, train_acc, label='Train Accuracy', color='green')
    plt.plot(epochs, val_acc, label='Val Accuracy', color='red')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training vs Validation Accuracy')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [4]:
# === DATASET CLASS ===
class MultiStreamDataset(Dataset):
    def __init__(self, data, labels, label_encoder, augment=False):
        self.data = data
        self.labels = label_encoder.transform(labels)
        self.augment = augment

    def __len__(self):
        return len(self.data)

    def augment_stream(self, stream):
        jitter = np.random.normal(0, 0.01, stream.shape)
        scale = np.random.normal(1.0, 0.05, stream.shape)
        return stream * scale + jitter

    def __getitem__(self, idx):
        sample = self.data[idx]  # shape: (T, 8)
        
        if self.augment:
            # Apply augmentation to each feature independently
            jitter = np.random.normal(0, 0.01, sample.shape)
            scale = np.random.normal(1.0, 0.05, sample.shape)
            sample = sample * scale + jitter

        sample_tensor = torch.tensor(sample, dtype=torch.float32)  # shape: (T, 8)
        label_tensor = torch.tensor(self.labels[idx], dtype=torch.long)
    
        return sample_tensor, label_tensor


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PatchMLP(nn.Module):
    def __init__(self, dim, hidden_dim):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Linear(hidden_dim, dim)
        )

    def forward(self, x):
        return self.mlp(x)

class ChannelMLP(nn.Module):
    def __init__(self, num_patches, hidden_dim):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(num_patches, hidden_dim),
            nn.GELU(),
            nn.Linear(hidden_dim, num_patches)
        )

    def forward(self, x):
        return self.mlp(x)

class MLPMixerRansomwareClassifier(nn.Module):
    def __init__(self, seq_len=32, feature_dim=8, num_classes=12, hidden_dim=128):
        super().__init__()
        self.patch_mlp = PatchMLP(feature_dim, hidden_dim)
        self.channel_mlp = ChannelMLP(seq_len, hidden_dim)
        self.global_pool = nn.AdaptiveAvgPool1d(1)

        self.projection = nn.Linear(feature_dim, 128)  # for KD
        self.classifier = nn.Sequential(
            nn.LayerNorm(feature_dim),
            nn.Flatten(),
            nn.Linear(feature_dim, num_classes)
        )

    def forward(self, x, return_features=False):  # x: (B, T, 8)
        x = self.patch_mlp(x)          # (B, T, 8)
        x = x.permute(0, 2, 1)         # (B, 8, T)
        x = self.channel_mlp(x)        # (B, 8, T)

        x = self.global_pool(x)        # (B, 8, 1)
        feat = x.squeeze(-1)           # (B, 8)

        feat_proj = self.projection(feat)      # (B, 128)
        logits = self.classifier(feat)         # (B, num_classes)

        if return_features:
            return logits, feat_proj
        else:
            return logits


In [6]:

# # === COLLATE FUNCTION ===
# def collate_fn(batch):
#     streams, labels = zip(*batch)
#     streams = list(zip(*streams))
#     streams = [torch.stack(s) for s in streams]
#     labels = torch.tensor(labels)
#     return streams, labels

# === LOAD SPLIT FUNCTION ===
def load_split_from_folder(split_dir, expected_shape):
    X, y = [], []
    for class_name in sorted(os.listdir(split_dir)):
        class_path = os.path.join(split_dir, class_name)
        for fname in sorted(os.listdir(class_path)):
            if fname.endswith(".csv"):
                fpath = os.path.join(class_path, fname)
                chunk = pd.read_csv(fpath, header=None).values
                if chunk.shape == expected_shape:
                    X.append(chunk)
                    y.append(class_name)
    return np.array(X), np.array(y)

In [7]:
# === APPLY SMOTE ===
def apply_smote_on_training(X_chunks, y_labels, chunk_size, num_features):
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y_labels)

    smote_encoder = LabelEncoder()
    y_encoded_for_smote = smote_encoder.fit_transform(y_labels)

    X_flat = X_chunks.reshape(X_chunks.shape[0], -1)
    X_resampled, y_resampled = SMOTE().fit_resample(X_flat, y_encoded_for_smote)

    X_res = X_resampled.reshape(-1, chunk_size, num_features)
    y_res_str = smote_encoder.inverse_transform(y_resampled)

    return X_res, y_res_str, label_encoder

# # === LOAD DATASETS ===
# expected_shape = (CHUNK_SIZE, NUM_FEATURES)

# X_train_raw, y_train_raw = load_split_from_folder(os.path.join(INPUT_DIR, "train"), expected_shape)
# X_val_raw,   y_val_raw   = load_split_from_folder(os.path.join(INPUT_DIR, "val"), expected_shape)

# # === SMOTE ONLY ON TRAIN ===
# X_train_balanced, y_train_str, label_encoder = apply_smote_on_training(
#     X_train_raw, y_train_raw, CHUNK_SIZE, NUM_FEATURES
# )

# # === CREATE DATASETS ===
# train_dataset = MultiStreamDataset(X_train_balanced, y_train_str, label_encoder, augment=True)
# val_dataset   = MultiStreamDataset(X_val_raw, y_val_raw, label_encoder, augment=False)

# train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [8]:
# === CLASS WEIGHTING ===
def compute_class_weights(labels, device):
    from collections import Counter
    total = len(labels)
    counts = Counter(labels)
    weights = [np.log(total / (counts[i] + 1)) for i in range(len(counts))]
    return torch.tensor(weights, dtype=torch.float).to(device)

# class_weights_tensor = compute_class_weights(label_encoder.transform(y_train_str), device="cpu")

In [9]:
def train_model(model, train_loader, val_loader, device, epochs=50, lr=0.001,
                  class_weights=None, optimizer=None, scheduler=None,
                  best_model_path="mlpmixer_teacher.pth"):

    print("🔍 Class Weights Tensor:")
    print(class_weights)

    criterion_ce = nn.CrossEntropyLoss(weight=class_weights)

    if optimizer is None:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    model.to(device)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    best_val_acc = 0

    for epoch in range(epochs):
        model.train()
        total_loss, correct = 0.0, 0

        if scheduler:
            scheduler.step()

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            logits = model(inputs)

            loss = criterion_ce(logits, labels)

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
            optimizer.step()

            total_loss += loss.item()
            correct += (logits.argmax(1) == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        train_losses.append(total_loss)
        train_accuracies.append(train_acc)

        # === Validation ===
        model.eval()
        val_loss, val_correct = 0.0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                logits = model(inputs)

                loss = criterion_ce(logits, labels)
                val_loss += loss.item()
                val_correct += (logits.argmax(1) == labels).sum().item()

        val_acc = val_correct / len(val_loader.dataset)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(f"Epoch {epoch+1}/{epochs} - "
              f"Train Loss: {total_loss:.4f} - Val Loss: {val_loss:.4f} - "
              f"Train Acc: {train_acc:.4f} - Val Acc: {val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), best_model_path)
            print(f"💾 Best model saved to: {best_model_path}")

    model.load_state_dict(torch.load(best_model_path))
    return train_accuracies, val_accuracies, train_losses, val_losses



In [10]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader
from imblearn.over_sampling import SMOTE

# === 🧩 Config ===
NUM_EPOCHS = 50
BEST_LR = 0.01
NUM_FEATURES = 8  # Assuming you use 8D input
BATCH_SIZE = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

base_dir = "/home/HardDisk/Satang/thesis_proj"
save_root = "/home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher"
os.makedirs(save_root, exist_ok=True)

detection_times = [30, 45, 60]
window_sizes = [10, 15, 20]

for T_d in detection_times:
    for T_w in window_sizes:
        print(f"\n🚀 Running for Td={T_d}, Tw={T_w}")
        model_name = os.path.join(save_root, f"mlpmixer_Td{T_d}_Tw{T_w}.pth")
        T_len = T_d - T_w + 1
        folder_name = f"X_csv_split_{T_len}"

        input_dir = os.path.join(base_dir, f"New_{T_d}", f"{T_w}", "split_tws", folder_name)
        train_path = os.path.join(input_dir, "train")
        val_path   = os.path.join(input_dir, "val")

        # === 1. Load Data ===
        expected_shape = (T_len, NUM_FEATURES)
        X_train_raw, y_train_raw = load_split_from_folder(train_path, expected_shape)
        X_val_raw, y_val_raw     = load_split_from_folder(val_path, expected_shape)

        # === 2. SMOTE + Encode ===
        label_encoder = LabelEncoder()
        y_train_encoded = label_encoder.fit_transform(y_train_raw)
        X_train_flat = X_train_raw.reshape(X_train_raw.shape[0], -1)
        X_resampled, y_resampled = SMOTE().fit_resample(X_train_flat, y_train_encoded)
        X_train_bal = X_resampled.reshape(-1, expected_shape[0], NUM_FEATURES)
        y_train_str = label_encoder.inverse_transform(y_resampled)

        # === 3. Datasets + Loaders ===
        train_dataset = MultiStreamDataset(X_train_bal, y_train_str, label_encoder, augment=True)
        val_dataset   = MultiStreamDataset(X_val_raw, y_val_raw, label_encoder, augment=False)
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

        # === 4. MLP-Mixer Model ===
        model = MLPMixerRansomwareClassifier(
            seq_len=expected_shape[0],
            feature_dim=NUM_FEATURES,
            num_classes=len(label_encoder.classes_),
            hidden_dim=128
        ).to(device)
        optimizer = optim.AdamW(model.parameters(), lr=BEST_LR, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)
        class_weights_tensor = compute_class_weights(label_encoder.transform(y_train_str), device)

        # === 5. Train MLP-Mixer ===
        train_accs, val_accs, train_losses, val_losses = train_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            device=device,
            epochs=NUM_EPOCHS,
            class_weights=class_weights_tensor,
            lr=BEST_LR,
            optimizer=optimizer,
            scheduler=scheduler,
            best_model_path=model_name
        )

        print(f"✅ Finished Td={T_d}, Tw={T_w} — saved to {model_name}")



🚀 Running for Td=30, Tw=10
🔍 Class Weights Tensor:
tensor([2.4843, 2.4843, 2.4843, 2.4843, 2.4843, 2.4843, 2.4843, 2.4843, 2.4843,
        2.4843, 2.4843, 2.4843], device='cuda:0')




Epoch 1/50 - Train Loss: 336.0935 - Val Loss: 35.1825 - Train Acc: 0.5693 - Val Acc: 0.5486
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw10.pth
Epoch 2/50 - Train Loss: 221.1273 - Val Loss: 28.9831 - Train Acc: 0.7053 - Val Acc: 0.6187
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw10.pth
Epoch 3/50 - Train Loss: 190.3074 - Val Loss: 27.8167 - Train Acc: 0.7472 - Val Acc: 0.6508
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw10.pth
Epoch 4/50 - Train Loss: 165.7556 - Val Loss: 25.9019 - Train Acc: 0.7779 - Val Acc: 0.6628
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw10.pth
Epoch 5/50 - Train Loss: 146.8350 - Val Loss: 22.4268 - Train Acc: 0.8091 - Val Acc: 0.7425
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learni

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4844, 2.4844, 2.4844, 2.4844, 2.4844, 2.4844, 2.4844, 2.4844, 2.4844,
        2.4844, 2.4844, 2.4844], device='cuda:0')




Epoch 1/50 - Train Loss: 437.2708 - Val Loss: 42.4754 - Train Acc: 0.5970 - Val Acc: 0.6063
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw15.pth
Epoch 2/50 - Train Loss: 229.7026 - Val Loss: 36.3275 - Train Acc: 0.7827 - Val Acc: 0.6932
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw15.pth
Epoch 3/50 - Train Loss: 192.2737 - Val Loss: 29.7900 - Train Acc: 0.8171 - Val Acc: 0.7411
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw15.pth
Epoch 4/50 - Train Loss: 168.2614 - Val Loss: 27.3415 - Train Acc: 0.8400 - Val Acc: 0.7582
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw15.pth
Epoch 5/50 - Train Loss: 158.4143 - Val Loss: 27.5267 - Train Acc: 0.8510 - Val Acc: 0.7468
Epoch 6/50 - Train Loss: 145.4029 - Val Loss: 27.2810 - Train Acc: 0

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4847, 2.4847, 2.4847, 2.4847, 2.4847, 2.4847, 2.4847, 2.4847, 2.4847,
        2.4847, 2.4847, 2.4847], device='cuda:0')




Epoch 1/50 - Train Loss: 687.5276 - Val Loss: 73.6451 - Train Acc: 0.6747 - Val Acc: 0.6543
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw20.pth
Epoch 2/50 - Train Loss: 420.9087 - Val Loss: 68.8242 - Train Acc: 0.7937 - Val Acc: 0.6960
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw20.pth
Epoch 3/50 - Train Loss: 365.2520 - Val Loss: 67.2361 - Train Acc: 0.8237 - Val Acc: 0.7061
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw20.pth
Epoch 4/50 - Train Loss: 336.7744 - Val Loss: 63.5200 - Train Acc: 0.8405 - Val Acc: 0.7281
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td30_Tw20.pth
Epoch 5/50 - Train Loss: 309.0777 - Val Loss: 53.0765 - Train Acc: 0.8547 - Val Acc: 0.7679
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learni

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4837, 2.4837, 2.4837, 2.4837, 2.4837, 2.4837, 2.4837, 2.4837, 2.4837,
        2.4837, 2.4837, 2.4837], device='cuda:0')




Epoch 1/50 - Train Loss: 222.0285 - Val Loss: 17.6494 - Train Acc: 0.5185 - Val Acc: 0.5862
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw10.pth
Epoch 2/50 - Train Loss: 108.6802 - Val Loss: 13.9528 - Train Acc: 0.7528 - Val Acc: 0.6628
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw10.pth
Epoch 3/50 - Train Loss: 80.0092 - Val Loss: 9.9733 - Train Acc: 0.8181 - Val Acc: 0.7742
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw10.pth
Epoch 4/50 - Train Loss: 66.0590 - Val Loss: 13.9503 - Train Acc: 0.8515 - Val Acc: 0.6996
Epoch 5/50 - Train Loss: 59.4781 - Val Loss: 8.4655 - Train Acc: 0.8666 - Val Acc: 0.7946
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw10.pth
Epoch 6/50 - Train Loss: 52.9078 - Val Loss: 7.7631 - Train Acc: 0.8794 -

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4840, 2.4840, 2.4840, 2.4840, 2.4840, 2.4840, 2.4840, 2.4840, 2.4840,
        2.4840, 2.4840, 2.4840], device='cuda:0')




Epoch 1/50 - Train Loss: 267.6854 - Val Loss: 23.5324 - Train Acc: 0.5562 - Val Acc: 0.6026
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw15.pth
Epoch 2/50 - Train Loss: 128.3177 - Val Loss: 18.0604 - Train Acc: 0.7711 - Val Acc: 0.6713
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw15.pth
Epoch 3/50 - Train Loss: 100.3712 - Val Loss: 15.2395 - Train Acc: 0.8209 - Val Acc: 0.7392
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw15.pth
Epoch 4/50 - Train Loss: 87.2376 - Val Loss: 16.2161 - Train Acc: 0.8456 - Val Acc: 0.7611
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw15.pth
Epoch 5/50 - Train Loss: 77.3550 - Val Loss: 13.0925 - Train Acc: 0.8638 - Val Acc: 0.7801
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4841, 2.4841, 2.4841, 2.4841, 2.4841, 2.4841, 2.4841, 2.4841, 2.4841,
        2.4841, 2.4841, 2.4841], device='cuda:0')




Epoch 1/50 - Train Loss: 285.8109 - Val Loss: 23.2564 - Train Acc: 0.5979 - Val Acc: 0.6423
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw20.pth
Epoch 2/50 - Train Loss: 143.7862 - Val Loss: 19.8373 - Train Acc: 0.7836 - Val Acc: 0.6902
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw20.pth
Epoch 3/50 - Train Loss: 113.8500 - Val Loss: 19.9279 - Train Acc: 0.8278 - Val Acc: 0.7066
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw20.pth
Epoch 4/50 - Train Loss: 96.5533 - Val Loss: 15.6458 - Train Acc: 0.8563 - Val Acc: 0.7716
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td45_Tw20.pth
Epoch 5/50 - Train Loss: 89.6440 - Val Loss: 15.3178 - Train Acc: 0.8650 - Val Acc: 0.7874
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4832, 2.4832, 2.4832, 2.4832, 2.4832, 2.4832, 2.4832, 2.4832, 2.4832,
        2.4832, 2.4832, 2.4832], device='cuda:0')




Epoch 1/50 - Train Loss: 169.0825 - Val Loss: 13.7576 - Train Acc: 0.4900 - Val Acc: 0.4905
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw10.pth
Epoch 2/50 - Train Loss: 80.7735 - Val Loss: 9.2341 - Train Acc: 0.7529 - Val Acc: 0.6885
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw10.pth
Epoch 3/50 - Train Loss: 53.6513 - Val Loss: 7.9797 - Train Acc: 0.8339 - Val Acc: 0.7511
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw10.pth
Epoch 4/50 - Train Loss: 41.2439 - Val Loss: 6.6379 - Train Acc: 0.8689 - Val Acc: 0.7904
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw10.pth
Epoch 5/50 - Train Loss: 34.6265 - Val Loss: 6.9452 - Train Acc: 0.8912 - Val Acc: 0.7773
Epoch 6/50 - Train Loss: 30.1530 - Val Loss: 4.8365 - Train Acc: 0.9089 - Va

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4834, 2.4834, 2.4834, 2.4834, 2.4834, 2.4834, 2.4834, 2.4834, 2.4834,
        2.4834, 2.4834, 2.4834], device='cuda:0')




Epoch 1/50 - Train Loss: 189.2689 - Val Loss: 14.6601 - Train Acc: 0.4937 - Val Acc: 0.5823
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw15.pth
Epoch 2/50 - Train Loss: 83.4394 - Val Loss: 11.6205 - Train Acc: 0.7614 - Val Acc: 0.6468
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw15.pth
Epoch 3/50 - Train Loss: 61.9773 - Val Loss: 9.7807 - Train Acc: 0.8217 - Val Acc: 0.7051
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw15.pth
Epoch 4/50 - Train Loss: 50.6494 - Val Loss: 8.8200 - Train Acc: 0.8519 - Val Acc: 0.7582
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw15.pth
Epoch 5/50 - Train Loss: 44.5759 - Val Loss: 8.8688 - Train Acc: 0.8695 - Val Acc: 0.7646
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cros

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4835, 2.4835, 2.4835, 2.4835, 2.4835, 2.4835, 2.4835, 2.4835, 2.4835,
        2.4835, 2.4835, 2.4835], device='cuda:0')




Epoch 1/50 - Train Loss: 215.0630 - Val Loss: 16.3670 - Train Acc: 0.4426 - Val Acc: 0.5316
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw20.pth
Epoch 2/50 - Train Loss: 92.5499 - Val Loss: 11.1087 - Train Acc: 0.7667 - Val Acc: 0.6698
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw20.pth
Epoch 3/50 - Train Loss: 66.9137 - Val Loss: 8.6636 - Train Acc: 0.8243 - Val Acc: 0.7518
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw20.pth
Epoch 4/50 - Train Loss: 55.2634 - Val Loss: 7.6946 - Train Acc: 0.8544 - Val Acc: 0.7904
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/teacher/mlpmixer_Td60_Tw20.pth
Epoch 5/50 - Train Loss: 45.9841 - Val Loss: 8.2227 - Train Acc: 0.8823 - Val Acc: 0.7506
Epoch 6/50 - Train Loss: 43.4650 - Val Loss: 7.7386 - Train Acc: 0.8857 - V

  model.load_state_dict(torch.load(best_model_path))


In [11]:
import torch
import torch.nn as nn

class StudentCNN(nn.Module):
    def __init__(self, input_length, num_classes=12):
        super().__init__()
        self.streams = nn.ModuleList([
            nn.Sequential(
                nn.Conv1d(1, 4, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.Conv1d(4, 8, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.AdaptiveAvgPool1d(1),
                nn.Flatten()
            ) for _ in range(8)
        ])

        self.fc = nn.Sequential(
            nn.Linear(8 * 8, 128),
            nn.LayerNorm(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, num_classes)
        )

        self.proj = nn.Linear(8 * 8, 128)

    def forward(self, x, return_features=False):
        B, T, C = x.shape
        assert C == 8, f"Expected 8 feature streams, got {C}"

        # Split into 8 streams (B, 1, T) → process → (B, 8)
        streams = [x[:, :, i].unsqueeze(1) for i in range(C)]
        features = [self.streams[i](streams[i]) for i in range(8)]

        x = torch.cat(features, dim=1)  # (B, 64)



        if return_features:
            feat_proj = self.proj(x)      # (B, 128)
            logits = self.fc(x)           # (B, num_classes)
            return logits, feat_proj
        else:
            return self.fc(x)


In [12]:
import torch.nn.functional as F

def distillation_loss(student_logits, teacher_logits, student_feat, teacher_feat, true_labels, T=3.0, alpha=0.5, beta=0.3, gamma=0.2):
    """
    alpha: weight for hard loss (CE)
    beta: weight for soft loss (KL)
    gamma: weight for feature-based distillation (MSE)
    T: temperature for soft distillation
    """
    # Hard loss
    ce_loss = F.cross_entropy(student_logits, true_labels)

    # Soft loss (logits)
    soft_teacher = F.softmax(teacher_logits / T, dim=1)
    soft_student = F.log_softmax(student_logits / T, dim=1)
    kl_loss = F.kl_div(soft_student, soft_teacher, reduction='batchmean') * (T * T)

    # Feature loss (projection-matched MSE)
    feat_loss = F.mse_loss(student_feat, teacher_feat)

    return alpha * ce_loss + beta * kl_loss + gamma * feat_loss


In [13]:
def train_distilled(student, teacher, train_loader, val_loader, device,
                    epochs=50, lr=0.0005, class_weights=None,
                    T=3.0, alpha=0.5, beta=0.3, gamma=0.2,
                    save_path="best_student.pth"):

    teacher.eval()
    student.to(device)
    teacher.to(device)

    optimizer = torch.optim.Adam(student.parameters(), lr=lr)
    ce_criterion = nn.CrossEntropyLoss(weight=class_weights)

    best_val_acc = 0
    train_losses, train_accuracies, val_losses, val_accuracies = [], [], [], []

    for epoch in range(epochs):
        student.train()
        total_loss, correct = 0.0, 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # === Forward passes (both must return logits, features)
            student_logits, student_feat = student(inputs, return_features=True)
            with torch.no_grad():
                teacher_logits, teacher_feat = teacher(inputs, return_features=True)

            # === KD loss
            loss = distillation_loss(
                student_logits, teacher_logits,
                student_feat, teacher_feat,
                labels,
                T=T, alpha=alpha, beta=beta, gamma=gamma
            )

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (student_logits.argmax(1) == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        train_losses.append(total_loss)
        train_accuracies.append(train_acc)

        # === Validation (using standard CE loss)
        student.eval()
        val_loss, val_correct = 0.0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                logits = student(inputs)  # ❗ Note: assuming return_features=False in eval
                loss = ce_criterion(logits, labels)
                val_loss += loss.item()
                val_correct += (logits.argmax(1) == labels).sum().item()

        val_acc = val_correct / len(val_loader.dataset)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(f"[Distill] Epoch {epoch+1:02d}/{epochs} - "
              f"Loss: {total_loss:.4f} - Train Acc: {train_acc:.4f} - Val Acc: {val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(student.state_dict(), save_path)
            print(f"💾 Best student model saved to: {save_path}")

    student.load_state_dict(torch.load(save_path))
    return train_accuracies, val_accuracies, train_losses, val_losses


In [24]:
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report
import os
import torch
from torch.utils.data import DataLoader

# === Configs ===
NUM_FEATURES = 8
BATCH_SIZE = 64
NUM_EPOCHS = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

detection_times = [30, 45, 60]
window_sizes = [10, 15, 20]

base_dir = "/home/HardDisk/Satang/thesis_proj"
teacher_dir = os.path.join(base_dir, "Deep_Learning", "cross_archi","mlp", "teacher")  # ← not AE
student_save_dir = os.path.join(base_dir, "Deep_Learning", "cross_archi","mlp", "student")
os.makedirs(student_save_dir, exist_ok=True)

for T_d in detection_times:
    for T_w in window_sizes:
        T_len = T_d - T_w + 1
        expected_shape = (T_len, NUM_FEATURES)

        print(f"\n🚀 Distilling MLP-Mixer → StudentCNN for Td={T_d}, Tw={T_w} (T_len={T_len})")

        # === Paths
        folder_name = f"X_csv_split_{T_len}"
        input_dir = os.path.join(base_dir, f"New_{T_d}", f"{T_w}", "split_tws", folder_name)
        train_path = os.path.join(input_dir, "train")
        val_path   = os.path.join(input_dir, "val")
        teacher_path = os.path.join(teacher_dir, f"mlpmixer_Td{T_d}_Tw{T_w}.pth")
        student_path = os.path.join(student_save_dir, f"student_from_mlp_Td{T_d}_Tw{T_w}.pth")

        # === 1. Load Data
        X_train_raw, y_train_raw = load_split_from_folder(train_path, expected_shape)
        X_val_raw, y_val_raw     = load_split_from_folder(val_path, expected_shape)

        # === 2. Encode & Balance
        label_encoder = LabelEncoder()
        y_train_encoded = label_encoder.fit_transform(y_train_raw)
        X_train_flat = X_train_raw.reshape(X_train_raw.shape[0], -1)
        X_resampled, y_resampled = SMOTE().fit_resample(X_train_flat, y_train_encoded)
        X_train_bal = X_resampled.reshape(-1, T_len, NUM_FEATURES)
        y_train_str = label_encoder.inverse_transform(y_resampled)

        # === 3. Dataset & Dataloader
        train_dataset = MultiStreamDataset(X_train_bal, y_train_str, label_encoder, augment=True)
        val_dataset   = MultiStreamDataset(X_val_raw, y_val_raw, label_encoder, augment=False)
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

        class_weights_tensor = compute_class_weights(label_encoder.transform(y_train_str), device)

        # === 4. Load MLP-Mixer Teacher
        teacher = MLPMixerRansomwareClassifier(
            seq_len=T_len,
            feature_dim=NUM_FEATURES,
            num_classes=len(label_encoder.classes_),
            hidden_dim=128,

        )
        teacher.load_state_dict(torch.load(teacher_path, map_location=device))
        teacher.to(device)
        teacher.eval()
        for p in teacher.parameters():
            p.requires_grad = False

        # === 5. Init StudentCNN
        student = StudentCNN(
            input_length=T_len,
            num_classes=len(label_encoder.classes_)
        ).to(device)

        # === 6. Train with Knowledge Distillation
        train_accs, val_accs, train_losses, val_losses = train_distilled(
            student=student,
            teacher=teacher,
            train_loader=train_loader,
            val_loader=val_loader,
            device=device,
            epochs=NUM_EPOCHS,
            lr=0.01,
            class_weights=class_weights_tensor,
            T=3,
            alpha=0.6,
            beta=0.3,
            gamma=0.3,
            save_path=student_path
        )

        # === 7. Final Evaluation
        student.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = student(inputs)  # (logits only)
                preds = torch.argmax(outputs, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        print("\n📊 Final Classification Report (StudentCNN):")
        print(classification_report(all_labels, all_preds, target_names=label_encoder.classes_))
        print(f"✅ Student model saved: {student_path}")



🚀 Distilling MLP-Mixer → StudentCNN for Td=30, Tw=10 (T_len=21)


  teacher.load_state_dict(torch.load(teacher_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 103172.3219 - Train Acc: 0.4912 - Val Acc: 0.5541
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw10.pth
[Distill] Epoch 02/50 - Loss: 51221.1766 - Train Acc: 0.6715 - Val Acc: 0.5902
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw10.pth
[Distill] Epoch 03/50 - Loss: 43765.6398 - Train Acc: 0.6981 - Val Acc: 0.6358
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw10.pth
[Distill] Epoch 04/50 - Loss: 41058.7328 - Train Acc: 0.7351 - Val Acc: 0.6478
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw10.pth
[Distill] Epoch 05/50 - Loss: 39624.7720 - Train Acc: 0.7501 - Val Acc: 0.6498
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Dee

  student.load_state_dict(torch.load(save_path))


              precision    recall  f1-score   support

    AESCrypt       1.00      1.00      1.00        85
      Cerber       0.73      0.77      0.75       189
    Darkside       0.96      0.75      0.85       323
       Excel       1.00      1.00      1.00       147
     Firefox       1.00      0.99      1.00       204
   GandCrab4       0.71      0.78      0.74       319
        Ryuk       0.76      0.77      0.76       196
     SDelete       1.00      1.00      1.00        79
  Sodinokibi       0.79      0.73      0.76       205
  TeslaCrypt       0.65      1.00      0.79        85
    WannaCry       1.00      1.00      1.00        79
         Zip       1.00      1.00      1.00        85

    accuracy                           0.85      1996
   macro avg       0.88      0.90      0.89      1996
weighted avg       0.86      0.85      0.85      1996

✅ Student model saved: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw10.pth

🚀 Dist

  teacher.load_state_dict(torch.load(teacher_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 228792.3068 - Train Acc: 0.5144 - Val Acc: 0.6595
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw15.pth
[Distill] Epoch 02/50 - Loss: 120378.6178 - Train Acc: 0.7307 - Val Acc: 0.7039
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw15.pth
[Distill] Epoch 03/50 - Loss: 109528.4743 - Train Acc: 0.7721 - Val Acc: 0.7020
[Distill] Epoch 04/50 - Loss: 103541.0109 - Train Acc: 0.7985 - Val Acc: 0.7301
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw15.pth
[Distill] Epoch 05/50 - Loss: 100620.9624 - Train Acc: 0.8126 - Val Acc: 0.7479
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw15.pth
[Distill] Epoch 06/50 - Loss: 98312.2857 - Train Acc: 0.8203 - V

  student.load_state_dict(torch.load(save_path))



📊 Final Classification Report (StudentCNN):
              precision    recall  f1-score   support

    AESCrypt       0.97      0.99      0.98       112
      Cerber       0.88      0.85      0.87       249
    Darkside       0.97      0.83      0.90       427
       Excel       1.00      1.00      1.00       194
     Firefox       0.97      0.92      0.95       271
   GandCrab4       0.72      0.81      0.76       420
        Ryuk       0.79      0.69      0.73       258
     SDelete       1.00      1.00      1.00       103
  Sodinokibi       0.85      0.93      0.89       271
  TeslaCrypt       0.69      0.96      0.80       114
    WannaCry       1.00      1.00      1.00       103
         Zip       1.00      0.96      0.98       112

    accuracy                           0.88      2634
   macro avg       0.90      0.91      0.90      2634
weighted avg       0.89      0.88      0.88      2634

✅ Student model saved: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/s

  teacher.load_state_dict(torch.load(teacher_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 1679728.3918 - Train Acc: 0.5655 - Val Acc: 0.5990
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw20.pth
[Distill] Epoch 02/50 - Loss: 1008621.4836 - Train Acc: 0.7295 - Val Acc: 0.6384
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw20.pth
[Distill] Epoch 03/50 - Loss: 856461.7225 - Train Acc: 0.7724 - Val Acc: 0.6832
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw20.pth
[Distill] Epoch 04/50 - Loss: 799379.3102 - Train Acc: 0.8019 - Val Acc: 0.7138
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td30_Tw20.pth
[Distill] Epoch 05/50 - Loss: 769447.9376 - Train Acc: 0.8198 - Val Acc: 0.7723
💾 Best student model saved to: /home/HardDisk/Satang/thesis_pr

  student.load_state_dict(torch.load(save_path))



📊 Final Classification Report (StudentCNN):
              precision    recall  f1-score   support

    AESCrypt       0.97      0.98      0.98       221
      Cerber       0.95      0.84      0.89       496
    Darkside       0.95      0.83      0.89       850
       Excel       0.99      1.00      0.99       386
     Firefox       0.98      0.90      0.94       539
   GandCrab4       0.69      0.76      0.72       836
        Ryuk       0.67      0.71      0.69       514
     SDelete       1.00      0.99      0.99       204
  Sodinokibi       0.87      0.84      0.85       537
  TeslaCrypt       0.66      0.97      0.79       224
    WannaCry       1.00      1.00      1.00       203
         Zip       1.00      0.97      0.99       220

    accuracy                           0.86      5230
   macro avg       0.89      0.90      0.89      5230
weighted avg       0.87      0.86      0.86      5230

✅ Student model saved: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/s

  teacher.load_state_dict(torch.load(teacher_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 18360.4200 - Train Acc: 0.5250 - Val Acc: 0.6056
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw10.pth
[Distill] Epoch 02/50 - Loss: 7367.7193 - Train Acc: 0.7217 - Val Acc: 0.6599
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw10.pth
[Distill] Epoch 03/50 - Loss: 5496.1423 - Train Acc: 0.7722 - Val Acc: 0.6657
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw10.pth
[Distill] Epoch 04/50 - Loss: 4974.9322 - Train Acc: 0.8049 - Val Acc: 0.7587
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw10.pth
[Distill] Epoch 05/50 - Loss: 4642.2274 - Train Acc: 0.8167 - Val Acc: 0.7083
[Distill] Epoch 06/50 - Loss: 4444.2775 - Train Acc: 0.8218 - Val Acc: 0.

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 60786.9074 - Train Acc: 0.4919 - Val Acc: 0.4923
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw15.pth
[Distill] Epoch 02/50 - Loss: 20969.9231 - Train Acc: 0.7058 - Val Acc: 0.6647
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw15.pth
[Distill] Epoch 03/50 - Loss: 17194.7648 - Train Acc: 0.7749 - Val Acc: 0.6932
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw15.pth
[Distill] Epoch 04/50 - Loss: 15713.9803 - Train Acc: 0.8194 - Val Acc: 0.6801
[Distill] Epoch 05/50 - Loss: 14138.2543 - Train Acc: 0.8332 - Val Acc: 0.7283
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw15.pth
[Distill] Epoch 06/50 - Loss: 13136.8051 - Train Acc: 0.8460 - Val Ac

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 62977.1840 - Train Acc: 0.4857 - Val Acc: 0.5672
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw20.pth
[Distill] Epoch 02/50 - Loss: 27750.8771 - Train Acc: 0.6904 - Val Acc: 0.5943
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw20.pth
[Distill] Epoch 03/50 - Loss: 23933.0349 - Train Acc: 0.7284 - Val Acc: 0.6852
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw20.pth
[Distill] Epoch 04/50 - Loss: 22902.4106 - Train Acc: 0.7570 - Val Acc: 0.6675
[Distill] Epoch 05/50 - Loss: 22043.4213 - Train Acc: 0.7890 - Val Acc: 0.6940
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td45_Tw20.pth
[Distill] Epoch 06/50 - Loss: 21148.4081 - Train Acc: 0.8155 - Val Ac

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 11990.4833 - Train Acc: 0.3378 - Val Acc: 0.4003
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw10.pth
[Distill] Epoch 02/50 - Loss: 5242.9965 - Train Acc: 0.5993 - Val Acc: 0.5051
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw10.pth
[Distill] Epoch 03/50 - Loss: 4533.9770 - Train Acc: 0.6621 - Val Acc: 0.4934
[Distill] Epoch 04/50 - Loss: 4013.7594 - Train Acc: 0.6884 - Val Acc: 0.5284
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw10.pth
[Distill] Epoch 05/50 - Loss: 3732.6768 - Train Acc: 0.7339 - Val Acc: 0.6608
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw10.pth
[Distill] Epoch 06/50 - Loss: 3554.4155 - Train Acc: 0.7618 - Val Acc: 0.

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 8624.7069 - Train Acc: 0.6049 - Val Acc: 0.6405
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw15.pth
[Distill] Epoch 02/50 - Loss: 3568.0302 - Train Acc: 0.7772 - Val Acc: 0.6949
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw15.pth
[Distill] Epoch 03/50 - Loss: 3184.7275 - Train Acc: 0.8168 - Val Acc: 0.7468
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw15.pth
[Distill] Epoch 04/50 - Loss: 2902.1499 - Train Acc: 0.8437 - Val Acc: 0.7987
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw15.pth
[Distill] Epoch 05/50 - Loss: 2702.2410 - Train Acc: 0.8486 - Val Acc: 0.7519
[Distill] Epoch 06/50 - Loss: 2584.3240 - Train Acc: 0.8642 - Val Acc: 0.7

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 16039.2905 - Train Acc: 0.4151 - Val Acc: 0.3326
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw20.pth
[Distill] Epoch 02/50 - Loss: 5970.4138 - Train Acc: 0.6829 - Val Acc: 0.6417
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw20.pth
[Distill] Epoch 03/50 - Loss: 4851.9650 - Train Acc: 0.7727 - Val Acc: 0.7564
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw20.pth
[Distill] Epoch 04/50 - Loss: 4442.7708 - Train Acc: 0.7982 - Val Acc: 0.7014
[Distill] Epoch 05/50 - Loss: 4041.2446 - Train Acc: 0.8319 - Val Acc: 0.7670
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/student/student_from_mlp_Td60_Tw20.pth
[Distill] Epoch 06/50 - Loss: 3780.3928 - Train Acc: 0.8505 - Val Acc: 0.

  student.load_state_dict(torch.load(save_path))


In [25]:
import torch
import torch.nn as nn
import torch.optim as optim

def train_student_baseline(student, train_loader, val_loader, device,
                           epochs=50, lr=0.0005, class_weights=None,
                           save_path="best_student_baseline.pth"):

    student.to(device)
    optimizer = optim.Adam(student.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    best_val_acc = 0.0
    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    for epoch in range(epochs):
        student.train()
        running_loss, correct = 0.0, 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            logits = student(inputs)  # 🔹 logits only for baseline
            loss = criterion(logits, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)  # weighted sum
            correct += (logits.argmax(1) == labels).sum().item()

        train_loss = running_loss / len(train_loader.dataset)
        train_acc = correct / len(train_loader.dataset)
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)

        # === Validation ===
        student.eval()
        val_running_loss, val_correct = 0.0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                logits = student(inputs)
                loss = criterion(logits, labels)

                val_running_loss += loss.item() * inputs.size(0)
                val_correct += (logits.argmax(1) == labels).sum().item()

        val_loss = val_running_loss / len(val_loader.dataset)
        val_acc = val_correct / len(val_loader.dataset)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(f"[Baseline] Epoch {epoch+1:02d}/{epochs} - "
              f"Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - "
              f"Train Acc: {train_acc:.4f} - Val Acc: {val_acc:.4f}")

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(student.state_dict(), save_path)
            print(f"💾 Best standalone student model saved to: {save_path}")

    student.load_state_dict(torch.load(save_path))
    return train_accuracies, val_accuracies, train_losses, val_losses


In [26]:
import os
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from torch.utils.data import DataLoader

baseline_save_dir = "/home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student"
os.makedirs(baseline_save_dir, exist_ok=True)

for T_d in detection_times:
    for T_w in window_sizes:
        T_len = T_d - T_w + 1
        expected_shape = (T_len, NUM_FEATURES)

        print(f"\n🚀 Training Standalone Student for Td={T_d}, Tw={T_w} (T_len={T_len})")

        # === Paths
        folder_name = f"X_csv_split_{T_len}"
        input_dir = os.path.join(base_dir, f"New_{T_d}", f"{T_w}", "split_tws", folder_name)
        train_path = os.path.join(input_dir, "train")
        val_path   = os.path.join(input_dir, "val")
        student_model_path = os.path.join(baseline_save_dir, f"student_baseline_Td{T_d}_Tw{T_w}.pth")

        # === 1. Load Raw Data
        X_train_raw, y_train_raw = load_split_from_folder(train_path, expected_shape)
        X_val_raw, y_val_raw     = load_split_from_folder(val_path, expected_shape)

        # === 2. Encode Labels & Apply SMOTE
        label_encoder = LabelEncoder()
        y_train_encoded = label_encoder.fit_transform(y_train_raw)
        X_train_flat = X_train_raw.reshape(X_train_raw.shape[0], -1)

        X_resampled, y_resampled = SMOTE().fit_resample(X_train_flat, y_train_encoded)
        X_train_bal = X_resampled.reshape(-1, T_len, NUM_FEATURES)
        y_train_str = label_encoder.inverse_transform(y_resampled)

        # === 3. Dataset & DataLoader
        train_dataset = MultiStreamDataset(X_train_bal, y_train_str, label_encoder, augment=True)
        val_dataset   = MultiStreamDataset(X_val_raw, y_val_raw, label_encoder, augment=False)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

        # === 4. Class Weights
        class_weights_tensor = compute_class_weights(label_encoder.transform(y_train_str), device)

        # === 5. Initialize StudentCNN
        student_baseline = StudentCNN(
            input_length=T_len,
            num_classes=len(label_encoder.classes_)
        ).to(device)

        # === 6. Train Student Model (No KD)
        train_accs_b, val_accs_b, train_losses_b, val_losses_b = train_student_baseline(
            student=student_baseline,
            train_loader=train_loader,
            val_loader=val_loader,
            device=device,
            epochs=NUM_EPOCHS,
            lr=0.0001,
            class_weights=class_weights_tensor,
            save_path=student_model_path
        )

        print(f"✅ Saved standalone student model: {student_model_path}")



🚀 Training Standalone Student for Td=30, Tw=10 (T_len=21)


[Baseline] Epoch 01/50 - Train Loss: 2.2955 - Val Loss: 1.9555 - Train Acc: 0.1961 - Val Acc: 0.3141
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td30_Tw10.pth
[Baseline] Epoch 02/50 - Train Loss: 1.5758 - Val Loss: 1.5551 - Train Acc: 0.4764 - Val Acc: 0.4765
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td30_Tw10.pth
[Baseline] Epoch 03/50 - Train Loss: 1.2834 - Val Loss: 1.3521 - Train Acc: 0.5754 - Val Acc: 0.5476
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td30_Tw10.pth
[Baseline] Epoch 04/50 - Train Loss: 1.0933 - Val Loss: 1.2085 - Train Acc: 0.6357 - Val Acc: 0.5691
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Train Loss: 2.4214 - Val Loss: 2.2727 - Train Acc: 0.1514 - Val Acc: 0.2965
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td30_Tw15.pth
[Baseline] Epoch 02/50 - Train Loss: 1.7378 - Val Loss: 1.6632 - Train Acc: 0.4360 - Val Acc: 0.4590
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td30_Tw15.pth
[Baseline] Epoch 03/50 - Train Loss: 1.3216 - Val Loss: 1.4112 - Train Acc: 0.5892 - Val Acc: 0.5399
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td30_Tw15.pth
[Baseline] Epoch 04/50 - Train Loss: 1.1076 - Val Loss: 1.2449 - Train Acc: 0.6565 - Val Acc: 0.5866
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Train Loss: 1.9434 - Val Loss: 1.5120 - Train Acc: 0.3625 - Val Acc: 0.4929
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td30_Tw20.pth
[Baseline] Epoch 02/50 - Train Loss: 1.1883 - Val Loss: 1.2060 - Train Acc: 0.6189 - Val Acc: 0.6096
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td30_Tw20.pth
[Baseline] Epoch 03/50 - Train Loss: 0.9574 - Val Loss: 1.0419 - Train Acc: 0.6906 - Val Acc: 0.6421
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td30_Tw20.pth
[Baseline] Epoch 04/50 - Train Loss: 0.8070 - Val Loss: 0.9267 - Train Acc: 0.7360 - Val Acc: 0.6728
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Train Loss: 2.4644 - Val Loss: 2.3780 - Train Acc: 0.1291 - Val Acc: 0.2800
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td45_Tw10.pth
[Baseline] Epoch 02/50 - Train Loss: 2.1455 - Val Loss: 1.9063 - Train Acc: 0.3244 - Val Acc: 0.4331
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td45_Tw10.pth
[Baseline] Epoch 03/50 - Train Loss: 1.5489 - Val Loss: 1.5661 - Train Acc: 0.5639 - Val Acc: 0.5436
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td45_Tw10.pth
[Baseline] Epoch 04/50 - Train Loss: 1.2418 - Val Loss: 1.3263 - Train Acc: 0.6606 - Val Acc: 0.6085
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Train Loss: 2.4684 - Val Loss: 2.4031 - Train Acc: 0.1145 - Val Acc: 0.2330
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td45_Tw15.pth
[Baseline] Epoch 02/50 - Train Loss: 2.1041 - Val Loss: 1.8262 - Train Acc: 0.2998 - Val Acc: 0.4449
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td45_Tw15.pth
[Baseline] Epoch 03/50 - Train Loss: 1.3963 - Val Loss: 1.4145 - Train Acc: 0.6209 - Val Acc: 0.5763
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td45_Tw15.pth
[Baseline] Epoch 04/50 - Train Loss: 1.1066 - Val Loss: 1.2706 - Train Acc: 0.6920 - Val Acc: 0.6012
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Train Loss: 2.4238 - Val Loss: 2.3271 - Train Acc: 0.1606 - Val Acc: 0.2069
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td45_Tw20.pth
[Baseline] Epoch 02/50 - Train Loss: 1.8648 - Val Loss: 1.7529 - Train Acc: 0.3842 - Val Acc: 0.4265
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td45_Tw20.pth
[Baseline] Epoch 03/50 - Train Loss: 1.4441 - Val Loss: 1.5021 - Train Acc: 0.5550 - Val Acc: 0.5028
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td45_Tw20.pth
[Baseline] Epoch 04/50 - Train Loss: 1.2256 - Val Loss: 1.3425 - Train Acc: 0.6208 - Val Acc: 0.5356
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Train Loss: 2.4830 - Val Loss: 2.4601 - Train Acc: 0.1280 - Val Acc: 0.2285
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td60_Tw10.pth
[Baseline] Epoch 02/50 - Train Loss: 2.3813 - Val Loss: 2.3890 - Train Acc: 0.2088 - Val Acc: 0.2300
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td60_Tw10.pth
[Baseline] Epoch 03/50 - Train Loss: 2.0930 - Val Loss: 2.0003 - Train Acc: 0.3222 - Val Acc: 0.3275
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td60_Tw10.pth
[Baseline] Epoch 04/50 - Train Loss: 1.5852 - Val Loss: 1.6131 - Train Acc: 0.5313 - Val Acc: 0.5546
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Train Loss: 2.5073 - Val Loss: 2.4620 - Train Acc: 0.0994 - Val Acc: 0.1443
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td60_Tw15.pth
[Baseline] Epoch 02/50 - Train Loss: 2.3875 - Val Loss: 2.3619 - Train Acc: 0.2119 - Val Acc: 0.2924
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td60_Tw15.pth
[Baseline] Epoch 03/50 - Train Loss: 2.0764 - Val Loss: 1.9494 - Train Acc: 0.3823 - Val Acc: 0.4000
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td60_Tw15.pth
[Baseline] Epoch 04/50 - Train Loss: 1.5613 - Val Loss: 1.6389 - Train Acc: 0.5268 - Val Acc: 0.5000
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Train Loss: 2.4633 - Val Loss: 2.4337 - Train Acc: 0.1136 - Val Acc: 0.1370
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td60_Tw20.pth
[Baseline] Epoch 02/50 - Train Loss: 2.2445 - Val Loss: 2.0877 - Train Acc: 0.2811 - Val Acc: 0.3407
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td60_Tw20.pth
[Baseline] Epoch 03/50 - Train Loss: 1.6023 - Val Loss: 1.6121 - Train Acc: 0.5627 - Val Acc: 0.5375
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_baseline_Td60_Tw20.pth
[Baseline] Epoch 04/50 - Train Loss: 1.2694 - Val Loss: 1.3703 - Train Acc: 0.6687 - Val Acc: 0.6077
💾 Best standalone student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


In [27]:
import os
import torch
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F

# === Model Definition ===
class PatchMLP(nn.Module):
    def __init__(self, dim, hidden_dim):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Linear(hidden_dim, dim)
        )

    def forward(self, x):
        return self.mlp(x)

class ChannelMLP(nn.Module):
    def __init__(self, num_patches, hidden_dim):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(num_patches, hidden_dim),
            nn.GELU(),
            nn.Linear(hidden_dim, num_patches)
        )

    def forward(self, x):
        return self.mlp(x)

class MLPMixerRansomwareClassifier(nn.Module):
    def __init__(self, seq_len=32, feature_dim=8, num_classes=12, hidden_dim=128):
        super().__init__()
        self.patch_mlp = PatchMLP(feature_dim, hidden_dim)
        self.channel_mlp = ChannelMLP(seq_len, hidden_dim)
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.projection = nn.Linear(feature_dim, 128)
        self.classifier = nn.Sequential(
            nn.LayerNorm(feature_dim),
            nn.Flatten(),
            nn.Linear(feature_dim, num_classes)
        )

    def forward(self, x, return_features=False):
        x = self.patch_mlp(x)
        x = x.permute(0, 2, 1)
        x = self.channel_mlp(x)
        x = self.global_pool(x)
        feat = x.squeeze(-1)
        feat_proj = self.projection(feat)
        logits = self.classifier(feat)
        return (logits, feat_proj) if return_features else logits

# === CONFIG ===
detection_times = [30, 45, 60]
window_sizes = [10, 15, 20]
NUM_FEATURES = 8
BATCH_SIZE = 32
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

base_dir = "/home/HardDisk/Satang/thesis_proj/"
base_teacher_dir = os.path.join(base_dir, "Deep_Learning", "cross_archi", "mlp", "teacher")
student_kd_dir = os.path.join(base_dir, "Deep_Learning","cross_archi", "mlp", "student")
student_baseline_dir = os.path.join(base_dir, "Deep_Learning", "cross_archi","mlp", "baseline_student")
csv_output_path = os.path.join(base_dir, "Deep_Learning", "cross_archi","mlp", "results", "model_eval_summary.csv")
os.makedirs(os.path.dirname(csv_output_path), exist_ok=True)

results = []

# === Evaluation Function ===
def evaluate_and_log(model, model_path, test_loader, label_encoder, T_d, T_w, model_type, results_list):
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.to(DEVICE)
    model.eval()

    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            logits, _ = model(inputs, return_features=True)
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    report = classification_report(all_labels, all_preds, target_names=label_encoder.classes_, output_dict=True)
    acc = accuracy_score(all_labels, all_preds)

    results_list.append({
        "Td": T_d,
        "Tw": T_w,
        "T_len": T_d - T_w + 1,
        "Model": model_type,
        "Accuracy": round(acc, 4),
        "Precision_macro": round(report["macro avg"]["precision"], 4),
        "Recall_macro": round(report["macro avg"]["recall"], 4),
        "F1_macro": round(report["macro avg"]["f1-score"], 4),
        "F1_weighted": round(report["weighted avg"]["f1-score"], 4),
    })

# === MAIN EVALUATION LOOP ===
for T_d in detection_times:
    for T_w in window_sizes:
        T_len = T_d - T_w + 1
        expected_shape = (T_len, NUM_FEATURES)
        folder_name = f"X_csv_split_{T_len}"
        test_path = os.path.join(base_dir, f"New_{T_d}", f"{T_w}", "split_tws", folder_name, "test")

        # === Load Test Data ===
        X_test_raw, y_test_raw = load_split_from_folder(test_path, expected_shape)
        label_encoder = LabelEncoder()
        label_encoder.fit(y_test_raw)

        test_dataset = MultiStreamDataset(X_test_raw, y_test_raw, label_encoder, augment=False)
        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

        # === Evaluate Teacher ===
        teacher_path = os.path.join(base_teacher_dir, f"mlpmixer_Td{T_d}_Tw{T_w}.pth")
        teacher = MLPMixerRansomwareClassifier(
            seq_len=T_len,
            feature_dim=NUM_FEATURES,
            num_classes=len(label_encoder.classes_),
            hidden_dim=128
        )
        evaluate_and_log(teacher, teacher_path, test_loader, label_encoder, T_d, T_w, "Teacher", results)

        # === Evaluate KD Student ===
        student_kd_path = os.path.join(student_kd_dir, f"student_from_mlp_Td{T_d}_Tw{T_w}.pth")
        student_kd = StudentCNN(input_length=T_len, num_classes=len(label_encoder.classes_)).to(DEVICE)
        evaluate_and_log(student_kd, student_kd_path, test_loader, label_encoder, T_d, T_w, "Student_KD", results)

        # === Evaluate Baseline Student ===
        student_b_path = os.path.join(student_baseline_dir, f"student_baseline_Td{T_d}_Tw{T_w}.pth")
        student_b = StudentCNN(input_length=T_len, num_classes=len(label_encoder.classes_)).to(DEVICE)
        evaluate_and_log(student_b, student_b_path, test_loader, label_encoder, T_d, T_w, "Student_Baseline", results)

# === Save to CSV ===
df_results = pd.DataFrame(results)
df_results.to_csv(csv_output_path, index=False)
print(f"\n✅ All evaluation results saved to: {csv_output_path}")


  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load


✅ All evaluation results saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/mlp/results/model_eval_summary.csv


  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
