In [62]:
import os
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.data import Subset
import torch.optim as optim
import torch.nn.functional as F

In [63]:
# === CONFIG ===
CHUNK_SIZE = 31
NUM_FEATURES = 8
NUM_EPOCHS = 50
BATCH_SIZE = 32
K_FOLDS = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [64]:
# === HELPERS ===
def collate_fn(batch):
    streams, labels = zip(*batch)
    streams = list(zip(*streams))
    streams = [torch.stack(s) for s in streams]
    labels = torch.tensor(labels)
    return streams, labels

def compute_class_weights(labels, device):
    counter = Counter(labels)
    total = sum(counter.values())
    weights = [np.log(total / (counter[i] + 1)) for i in range(len(counter))]
    return torch.tensor(weights, dtype=torch.float).to(device)

def print_model_info(model):
    print("Model Architecture:")
    print(model)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Total parameters: {total_params:,}")

def plot_loss_curve(train_losses, val_losses):
    epochs = range(len(train_losses))
    plt.figure(figsize=(10, 5))
    plt.plot(epochs, train_losses, label='Train Loss', color='blue')
    plt.plot(epochs, val_losses, label='Val Loss', color='orange')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training vs Validation Loss')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

def plot_accuracy_curve(train_acc, val_acc):
    epochs = range(len(train_acc))
    plt.figure(figsize=(10, 5))
    plt.plot(epochs, train_acc, label='Train Accuracy', color='green')
    plt.plot(epochs, val_acc, label='Val Accuracy', color='red')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training vs Validation Accuracy')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [65]:
# === DATASET CLASS ===
class MultiStreamDataset(Dataset):
    def __init__(self, data, labels, label_encoder, augment=False):
        self.data = data
        self.labels = label_encoder.transform(labels)
        self.augment = augment

    def __len__(self):
        return len(self.data)

    def augment_stream(self, stream):
        jitter = np.random.normal(0, 0.01, stream.shape)
        scale = np.random.normal(1.0, 0.05, stream.shape)
        return stream * scale + jitter

    def __getitem__(self, idx):
        sample = self.data[idx]  # shape: (T, 8)
        
        if self.augment:
            # Apply augmentation to each feature independently
            jitter = np.random.normal(0, 0.01, sample.shape)
            scale = np.random.normal(1.0, 0.05, sample.shape)
            sample = sample * scale + jitter

        sample_tensor = torch.tensor(sample, dtype=torch.float32)  # shape: (T, 8)
        label_tensor = torch.tensor(self.labels[idx], dtype=torch.long)
        
        return sample_tensor, label_tensor


In [66]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class RansomwareTransformer(nn.Module):
    def __init__(self, input_dim=8, seq_len=32, num_classes=12,
                 d_model=32, nhead=2, num_layers=1, dim_feedforward=64, dropout=0.1):
        super(RansomwareTransformer, self).__init__()

        self.embedding = nn.Linear(input_dim, d_model)
        self.pos_encoding = nn.Parameter(torch.randn(1, seq_len, d_model))  # (1, T, d_model)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead,
            dim_feedforward=dim_feedforward, dropout=dropout,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.pooling = nn.AdaptiveAvgPool1d(1)  # pool across time steps
        self.proj = nn.Linear(d_model, 128)     # ⬅️ For feature distillation (match student)

        self.fc = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, num_classes)
        )

    def forward(self, x, return_features=False):  # x: (B, T, 8)
        x = self.embedding(x) + self.pos_encoding[:, :x.size(1)]
        x = self.transformer(x)         # (B, T, d_model)
        x = x.transpose(1, 2)           # (B, d_model, T)
        x = self.pooling(x).squeeze(-1) # (B, d_model)

        if return_features:
            feat_proj = self.proj(x)    # (B, 128)
            return self.fc(x), feat_proj

        return self.fc(x)


In [67]:

# # === COLLATE FUNCTION ===
# def collate_fn(batch):
#     streams, labels = zip(*batch)
#     streams = list(zip(*streams))
#     streams = [torch.stack(s) for s in streams]
#     labels = torch.tensor(labels)
#     return streams, labels

# === LOAD SPLIT FUNCTION ===
def load_split_from_folder(split_dir, expected_shape):
    X, y = [], []
    for class_name in sorted(os.listdir(split_dir)):
        class_path = os.path.join(split_dir, class_name)
        for fname in sorted(os.listdir(class_path)):
            if fname.endswith(".csv"):
                fpath = os.path.join(class_path, fname)
                chunk = pd.read_csv(fpath, header=None).values
                if chunk.shape == expected_shape:
                    X.append(chunk)
                    y.append(class_name)
    return np.array(X), np.array(y)

In [68]:
# === APPLY SMOTE ===
def apply_smote_on_training(X_chunks, y_labels, chunk_size, num_features):
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y_labels)

    smote_encoder = LabelEncoder()
    y_encoded_for_smote = smote_encoder.fit_transform(y_labels)

    X_flat = X_chunks.reshape(X_chunks.shape[0], -1)
    X_resampled, y_resampled = SMOTE().fit_resample(X_flat, y_encoded_for_smote)

    X_res = X_resampled.reshape(-1, chunk_size, num_features)
    y_res_str = smote_encoder.inverse_transform(y_resampled)

    return X_res, y_res_str, label_encoder

# # === LOAD DATASETS ===
# expected_shape = (CHUNK_SIZE, NUM_FEATURES)

# X_train_raw, y_train_raw = load_split_from_folder(os.path.join(INPUT_DIR, "train"), expected_shape)
# X_val_raw,   y_val_raw   = load_split_from_folder(os.path.join(INPUT_DIR, "val"), expected_shape)

# # === SMOTE ONLY ON TRAIN ===
# X_train_balanced, y_train_str, label_encoder = apply_smote_on_training(
#     X_train_raw, y_train_raw, CHUNK_SIZE, NUM_FEATURES
# )

# # === CREATE DATASETS ===
# train_dataset = MultiStreamDataset(X_train_balanced, y_train_str, label_encoder, augment=True)
# val_dataset   = MultiStreamDataset(X_val_raw, y_val_raw, label_encoder, augment=False)

# train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
# val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)


In [69]:

# === CLASS WEIGHTING ===
def compute_class_weights(labels, device):
    from collections import Counter
    total = len(labels)
    counts = Counter(labels)
    weights = [np.log(total / (counts[i] + 1)) for i in range(len(counts))]
    return torch.tensor(weights, dtype=torch.float).to(device)

# class_weights_tensor = compute_class_weights(label_encoder.transform(y_train_str), device="cpu")

In [70]:
def train_model(model, train_loader, val_loader, device, epochs=50, lr=0.001,
                class_weights=None, optimizer=None, scheduler=None, best_model_path="transformer_model.pth"):

    print("🔍 Class Weights Tensor:")
    print(class_weights)

    criterion = nn.CrossEntropyLoss()
    if optimizer is None:
        optimizer = optim.Adam(model.parameters(), lr=lr)

    model.to(device)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []

    best_val_acc = 0

    for epoch in range(epochs):
        model.train()
        total_loss, correct = 0, 0

        if scheduler:
            scheduler.step()

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
            optimizer.step()

            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        train_losses.append(total_loss)
        train_accuracies.append(train_acc)

        # === Validation ===
        model.eval()
        val_loss, val_correct = 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                val_correct += (outputs.argmax(1) == labels).sum().item()

        val_acc = val_correct / len(val_loader.dataset)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)

        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {total_loss:.4f} - Val Loss: {val_loss:.4f} - Train Acc: {train_acc:.4f} - Val Acc: {val_acc:.4f}")

        # ✅ Save model ONLY if it's the best so far
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), best_model_path)
            print(f"💾 Best model saved to: {best_model_path}")

    model.load_state_dict(torch.load(best_model_path))
    return train_accuracies, val_accuracies, train_losses, val_losses


In [71]:

# === UTILS ===
def load_split_from_folder(split_dir, expected_shape):
    X, y = [], []
    for class_name in sorted(os.listdir(split_dir)):
        class_path = os.path.join(split_dir, class_name)
        for fname in sorted(os.listdir(class_path)):
            if fname.endswith(".csv"):
                fpath = os.path.join(class_path, fname)
                chunk = pd.read_csv(fpath, header=None).values
                if chunk.shape == expected_shape:
                    X.append(chunk)
                    y.append(class_name)
    return np.array(X), np.array(y)

# # === LOAD DATA FROM FOLDER STRUCTURE ===
# expected_shape = (CHUNK_SIZE, NUM_FEATURES)
# X_train_raw, y_train_raw = load_split_from_folder(os.path.join(INPUT_DIR, "train"), expected_shape)
# X_val_raw, y_val_raw     = load_split_from_folder(os.path.join(INPUT_DIR, "val"), expected_shape)

# # === ENCODE LABELS & APPLY SMOTE ===
# label_encoder = LabelEncoder()
# y_train_encoded = label_encoder.fit_transform(y_train_raw)

# X_train_flat = X_train_raw.reshape(X_train_raw.shape[0], -1)
# X_resampled, y_resampled = SMOTE().fit_resample(X_train_flat, y_train_encoded)
# X_train_bal = X_resampled.reshape(-1, CHUNK_SIZE, NUM_FEATURES)
# y_train_str = label_encoder.inverse_transform(y_resampled)

# # === BUILD DATASETS AND LOADERS ===
# train_dataset = MultiStreamDataset(X_train_bal, y_train_str, label_encoder, augment=True)
# val_dataset   = MultiStreamDataset(X_val_raw, y_val_raw, label_encoder, augment=False)

# train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# # === CLASS WEIGHTS ===
# class_weights_tensor = compute_class_weights(label_encoder.transform(y_train_str), device)

In [72]:
# from torch_lr_finder import LRFinder

# NUM_EPOCHS = 70

# print("\n=== 🧠 Retraining Transformer Model ===")
# model_for_lr = RansomwareTransformer(
#     input_dim=NUM_FEATURES,
#     seq_len=CHUNK_SIZE,
#     num_classes=len(label_encoder.classes_)
# ).to(device)

# # ✅ Use AdamW for LR Finder
# optimizer = torch.optim.AdamW(model_for_lr.parameters(), lr=1e-7, weight_decay=1e-4)
# criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

# print("🔍 Finding optimal learning rate...")
# lr_finder = LRFinder(model_for_lr, optimizer, criterion, device=device)
# lr_finder.range_test(train_loader, end_lr=1, num_iter=200)
# lr_finder.plot()  # Manually inspect the curve
# lr_finder.reset()

# # After inspecting the LR plot, update this
# BEST_LR = 0.01  # ✅ Replace this based on the plotted curve

# # === Train Final Transformer ===
# final_transformer = RansomwareTransformer(
#     input_dim=NUM_FEATURES,
#     seq_len=CHUNK_SIZE,
#     num_classes=len(label_encoder.classes_)
# ).to(device)

# optimizer = torch.optim.AdamW(final_transformer.parameters(), lr=BEST_LR, weight_decay=1e-4)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)

# train_accs, val_accs, train_losses, val_losses = train_model(
#     model=final_transformer,
#     train_loader=train_loader,
#     val_loader=val_loader,
#     device=device,
#     epochs=NUM_EPOCHS,
#     class_weights=class_weights_tensor,
#     lr=BEST_LR,
#     optimizer=optimizer,
#     scheduler=scheduler
# )

# # === Save Model ===
# torch.save(final_transformer.state_dict(), "transformer.pth")
# print("✅ Final transformer model saved as 'transformer.pth'")


In [73]:
# # === 🔍 Plot training curves
# plot_loss_curve(train_losses, val_losses)
# plot_accuracy_curve(train_accs, val_accs)

# # === 📊 Evaluate on validation set (same used during training)
# final_transformer.eval()
# all_preds, all_labels = [], []
# with torch.no_grad():
#     for inputs, labels in val_loader:  # inputs: (B, T, 8)
#         inputs = inputs.to(device)
#         labels = labels.to(device)
#         outputs = final_transformer(inputs)
#         preds = torch.argmax(outputs, dim=1)
#         all_preds.extend(preds.cpu().numpy())
#         all_labels.extend(labels.cpu().numpy())

# # === Confusion Matrix & Report
# cm = confusion_matrix(all_labels, all_preds)
# plt.figure(figsize=(10, 8))
# sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
#             xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
# plt.title('Teacher Model - Confusion Matrix (Validation Set)')
# plt.xlabel('Predicted')
# plt.ylabel('Actual')
# plt.tight_layout()
# plt.show()

# print("\nFinal Classification Report (Teacher):\n")
# print(classification_report(all_labels, all_preds, target_names=label_encoder.classes_))

# print(f"\nFinal Training Accuracy: {train_accs[-1]:.4f}")
# print(f"Final Validation Accuracy: {val_accs[-1]:.4f}")

In [74]:
base_dir = "/home/HardDisk/Satang/thesis_proj"
detection_times = [30, 45, 60]
window_sizes = [10, 15, 20]
save_root = "/home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer"
os.makedirs(save_root, exist_ok=True)
for T_d in detection_times:
    for T_w in window_sizes:
        print(f"\n🚀 Running for Td={T_d}, Tw={T_w}")
        model_name = os.path.join(save_root, f"teacher/transformer_Td{T_d}_Tw{T_w}.pth")
        T_len = T_d - T_w + 1  # This determines expected shape and directory name
        folder_name = f"X_csv_split_{T_len}"

        input_dir = os.path.join(base_dir, f"New_{T_d}", f"{T_w}", "split_tws", folder_name)
        train_path = os.path.join(input_dir, "train")
        val_path   = os.path.join(input_dir, "val")

        # === 1. Load Data ===
        expected_shape = (T_len, NUM_FEATURES)
        X_train_raw, y_train_raw = load_split_from_folder(train_path, expected_shape)
        X_val_raw, y_val_raw     = load_split_from_folder(val_path, expected_shape)

        # === 2. SMOTE + Encode ===
        label_encoder = LabelEncoder()
        y_train_encoded = label_encoder.fit_transform(y_train_raw)
        X_train_flat = X_train_raw.reshape(X_train_raw.shape[0], -1)
        X_resampled, y_resampled = SMOTE().fit_resample(X_train_flat, y_train_encoded)
        X_train_bal = X_resampled.reshape(-1, expected_shape[0], NUM_FEATURES)
        y_train_str = label_encoder.inverse_transform(y_resampled)

        # === 3. Datasets + Loaders ===
        train_dataset = MultiStreamDataset(X_train_bal, y_train_str, label_encoder, augment=True)
        val_dataset   = MultiStreamDataset(X_val_raw, y_val_raw, label_encoder, augment=False)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

        # === 4. Transformer Model ===
        model = RansomwareTransformer(
            input_dim=NUM_FEATURES,
            seq_len=expected_shape[0],
            num_classes=len(label_encoder.classes_)
        ).to(device)

        optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=1e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)
        class_weights_tensor = compute_class_weights(label_encoder.transform(y_train_str), device)

        train_accs, val_accs, train_losses, val_losses = train_model(
            model=model,
            train_loader=train_loader,
            val_loader=val_loader,
            device=device,
            epochs=NUM_EPOCHS,
            class_weights=class_weights_tensor,
            lr=0.01,
            optimizer=optimizer,
            scheduler=scheduler,
            best_model_path=model_name
        )



🚀 Running for Td=30, Tw=10


🔍 Class Weights Tensor:
tensor([2.4843, 2.4843, 2.4843, 2.4843, 2.4843, 2.4843, 2.4843, 2.4843, 2.4843,
        2.4843, 2.4843, 2.4843], device='cuda:0')




Epoch 1/50 - Train Loss: 624.6613 - Val Loss: 75.4990 - Train Acc: 0.5724 - Val Acc: 0.5245
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw10.pth
Epoch 2/50 - Train Loss: 474.3682 - Val Loss: 59.0594 - Train Acc: 0.6746 - Val Acc: 0.6232
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw10.pth
Epoch 3/50 - Train Loss: 413.8850 - Val Loss: 59.4414 - Train Acc: 0.7205 - Val Acc: 0.6263
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw10.pth
Epoch 4/50 - Train Loss: 387.8245 - Val Loss: 49.5105 - Train Acc: 0.7374 - Val Acc: 0.6738
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw10.pth
Epoch 5/50 - Train Loss: 373.1636 - Val Loss: 55.0459 - Train Acc: 0.7449 - Val Acc: 0.6438
Epoch 6/50 - Train Loss:

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4844, 2.4844, 2.4844, 2.4844, 2.4844, 2.4844, 2.4844, 2.4844, 2.4844,
        2.4844, 2.4844, 2.4844], device='cuda:0')




Epoch 1/50 - Train Loss: 702.7678 - Val Loss: 70.2160 - Train Acc: 0.6632 - Val Acc: 0.6800
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw15.pth
Epoch 2/50 - Train Loss: 475.8542 - Val Loss: 67.5550 - Train Acc: 0.7658 - Val Acc: 0.6644
Epoch 3/50 - Train Loss: 440.9636 - Val Loss: 65.1731 - Train Acc: 0.7818 - Val Acc: 0.6997
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw15.pth
Epoch 4/50 - Train Loss: 411.5334 - Val Loss: 70.1555 - Train Acc: 0.7971 - Val Acc: 0.6629
Epoch 5/50 - Train Loss: 376.9889 - Val Loss: 57.3830 - Train Acc: 0.8118 - Val Acc: 0.7213
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw15.pth
Epoch 6/50 - Train Loss: 363.1743 - Val Loss: 57.7546 - Train Acc: 0.8211 - Val Acc: 0.7248
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4847, 2.4847, 2.4847, 2.4847, 2.4847, 2.4847, 2.4847, 2.4847, 2.4847,
        2.4847, 2.4847, 2.4847], device='cuda:0')




Epoch 1/50 - Train Loss: 1227.0316 - Val Loss: 129.1422 - Train Acc: 0.7030 - Val Acc: 0.6899
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw20.pth
Epoch 2/50 - Train Loss: 856.1394 - Val Loss: 106.6799 - Train Acc: 0.7887 - Val Acc: 0.7306
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw20.pth
Epoch 3/50 - Train Loss: 769.6849 - Val Loss: 103.5620 - Train Acc: 0.8103 - Val Acc: 0.7465
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw20.pth
Epoch 4/50 - Train Loss: 714.8654 - Val Loss: 125.1126 - Train Acc: 0.8242 - Val Acc: 0.7392
Epoch 5/50 - Train Loss: 679.0222 - Val Loss: 95.8768 - Train Acc: 0.8344 - Val Acc: 0.7807
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td30_Tw20.pth
Epoch 6/50 - Train 

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4837, 2.4837, 2.4837, 2.4837, 2.4837, 2.4837, 2.4837, 2.4837, 2.4837,
        2.4837, 2.4837, 2.4837], device='cuda:0')




Epoch 1/50 - Train Loss: 311.9090 - Val Loss: 29.9400 - Train Acc: 0.6420 - Val Acc: 0.6124
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw10.pth
Epoch 2/50 - Train Loss: 194.2908 - Val Loss: 25.0553 - Train Acc: 0.7728 - Val Acc: 0.7510
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw10.pth
Epoch 3/50 - Train Loss: 163.4268 - Val Loss: 25.6195 - Train Acc: 0.8060 - Val Acc: 0.6986
Epoch 4/50 - Train Loss: 140.8958 - Val Loss: 18.8329 - Train Acc: 0.8343 - Val Acc: 0.7994
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw10.pth
Epoch 5/50 - Train Loss: 128.6935 - Val Loss: 20.5885 - Train Acc: 0.8485 - Val Acc: 0.7791
Epoch 6/50 - Train Loss: 121.0768 - Val Loss: 17.4014 - Train Acc: 0.8627 - Val Acc: 0.7955
Epoch 7/50 - Train Loss: 114.8772 - Val Loss: 16.0257 - Train

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4840, 2.4840, 2.4840, 2.4840, 2.4840, 2.4840, 2.4840, 2.4840, 2.4840,
        2.4840, 2.4840, 2.4840], device='cuda:0')




Epoch 1/50 - Train Loss: 395.9564 - Val Loss: 38.1755 - Train Acc: 0.6484 - Val Acc: 0.6852
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw15.pth
Epoch 2/50 - Train Loss: 238.3484 - Val Loss: 47.2442 - Train Acc: 0.7788 - Val Acc: 0.6121
Epoch 3/50 - Train Loss: 209.9999 - Val Loss: 29.9208 - Train Acc: 0.8071 - Val Acc: 0.7283
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw15.pth
Epoch 4/50 - Train Loss: 184.2100 - Val Loss: 29.4032 - Train Acc: 0.8289 - Val Acc: 0.7268
Epoch 5/50 - Train Loss: 175.9933 - Val Loss: 24.5001 - Train Acc: 0.8426 - Val Acc: 0.7874
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw15.pth
Epoch 6/50 - Train Loss: 163.0370 - Val Loss: 31.4497 - Train Acc: 0.8537 - Val Acc: 0.7334
Epoch 7/50 - Train Loss: 157.3632 - Val Loss: 26.4075 - Train

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4841, 2.4841, 2.4841, 2.4841, 2.4841, 2.4841, 2.4841, 2.4841, 2.4841,
        2.4841, 2.4841, 2.4841], device='cuda:0')




Epoch 1/50 - Train Loss: 456.5804 - Val Loss: 40.9561 - Train Acc: 0.6529 - Val Acc: 0.6852
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw20.pth
Epoch 2/50 - Train Loss: 273.0977 - Val Loss: 34.5022 - Train Acc: 0.7877 - Val Acc: 0.7180
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw20.pth
Epoch 3/50 - Train Loss: 236.8279 - Val Loss: 46.4135 - Train Acc: 0.8118 - Val Acc: 0.6625
Epoch 4/50 - Train Loss: 212.2429 - Val Loss: 35.0282 - Train Acc: 0.8352 - Val Acc: 0.7401
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw20.pth
Epoch 5/50 - Train Loss: 202.2456 - Val Loss: 30.5795 - Train Acc: 0.8444 - Val Acc: 0.7678
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td45_Tw20.pth
Epoch 6/50 - Train Loss:

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4832, 2.4832, 2.4832, 2.4832, 2.4832, 2.4832, 2.4832, 2.4832, 2.4832,
        2.4832, 2.4832, 2.4832], device='cuda:0')




Epoch 1/50 - Train Loss: 291.1608 - Val Loss: 25.7114 - Train Acc: 0.5266 - Val Acc: 0.5706
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw10.pth
Epoch 2/50 - Train Loss: 136.1160 - Val Loss: 15.5064 - Train Acc: 0.7757 - Val Acc: 0.7540
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw10.pth
Epoch 3/50 - Train Loss: 113.8349 - Val Loss: 14.1567 - Train Acc: 0.8134 - Val Acc: 0.7409
Epoch 4/50 - Train Loss: 98.4747 - Val Loss: 13.6073 - Train Acc: 0.8370 - Val Acc: 0.7380
Epoch 5/50 - Train Loss: 88.4874 - Val Loss: 15.8671 - Train Acc: 0.8559 - Val Acc: 0.6841
Epoch 6/50 - Train Loss: 82.8551 - Val Loss: 11.3984 - Train Acc: 0.8596 - Val Acc: 0.8108
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw10.pth
Epoch 7/50 - Train Loss: 82.9998 - Val Loss: 15.3993 - Train Acc

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4834, 2.4834, 2.4834, 2.4834, 2.4834, 2.4834, 2.4834, 2.4834, 2.4834,
        2.4834, 2.4834, 2.4834], device='cuda:0')




Epoch 1/50 - Train Loss: 268.7120 - Val Loss: 23.8708 - Train Acc: 0.6108 - Val Acc: 0.6367
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw15.pth
Epoch 2/50 - Train Loss: 143.7432 - Val Loss: 21.4690 - Train Acc: 0.7871 - Val Acc: 0.6861
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw15.pth
Epoch 3/50 - Train Loss: 122.9996 - Val Loss: 18.8074 - Train Acc: 0.8133 - Val Acc: 0.7089
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw15.pth
Epoch 4/50 - Train Loss: 111.3475 - Val Loss: 15.7998 - Train Acc: 0.8318 - Val Acc: 0.7266
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw15.pth
Epoch 5/50 - Train Loss: 100.9744 - Val Loss: 15.6899 - Train Acc: 0.8505 - Val Acc: 0.7481
💾 Best model saved to: /

  model.load_state_dict(torch.load(best_model_path))


🔍 Class Weights Tensor:
tensor([2.4835, 2.4835, 2.4835, 2.4835, 2.4835, 2.4835, 2.4835, 2.4835, 2.4835,
        2.4835, 2.4835, 2.4835], device='cuda:0')




Epoch 1/50 - Train Loss: 300.8287 - Val Loss: 25.6997 - Train Acc: 0.5956 - Val Acc: 0.6218
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw20.pth
Epoch 2/50 - Train Loss: 160.4579 - Val Loss: 22.8530 - Train Acc: 0.7868 - Val Acc: 0.6616
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw20.pth
Epoch 3/50 - Train Loss: 134.4231 - Val Loss: 16.3399 - Train Acc: 0.8100 - Val Acc: 0.7717
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw20.pth
Epoch 4/50 - Train Loss: 117.9616 - Val Loss: 14.8309 - Train Acc: 0.8365 - Val Acc: 0.7822
💾 Best model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/teacher/transformer_Td60_Tw20.pth
Epoch 5/50 - Train Loss: 104.6295 - Val Loss: 23.4134 - Train Acc: 0.8569 - Val Acc: 0.6710
Epoch 6/50 - Train Loss:

  model.load_state_dict(torch.load(best_model_path))


In [75]:
import torch
import torch.nn as nn

class StudentCNN(nn.Module):
    def __init__(self, input_length, num_classes=12):
        super().__init__()
        self.streams = nn.ModuleList([
            nn.Sequential(
                nn.Conv1d(1, 4, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.Conv1d(4, 8, kernel_size=3, padding=1),
                nn.ReLU(),
                nn.AdaptiveAvgPool1d(1),
                nn.Flatten()
            ) for _ in range(8)
        ])

        self.fc = nn.Sequential(
            nn.Linear(8 * 8, 128),
            nn.LayerNorm(128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, num_classes)
        )

        # 🔄 Smaller projection for feature distillation
        self.proj = nn.Linear(8 * 8, 128)

    def forward(self, x, return_features=False):
        # ✅ x is shape (B, T, 8) — same as Transformer input
        B, T, C = x.shape
        assert C == 8, f"Expected 8 feature streams, got {C}"

        # Split into 8 (B, 1, T) tensors for each stream
        streams = [x[:, :, i].unsqueeze(1) for i in range(C)]  # (B, 1, T)

        # Apply each stream's CNN
        features = [self.streams[i](streams[i]) for i in range(8)]  # 8 × (B, 8)
        x = torch.cat(features, dim=1)  # (B, 64)

        if return_features:
            feat_proj = self.proj(x)  # (B, 128)
            return self.fc(x), feat_proj

        return self.fc(x)


In [76]:
import torch.nn.functional as F

def distillation_loss(student_logits, teacher_logits, student_feat, teacher_feat, true_labels, T=3.0, alpha=0.5, beta=0.3, gamma=0.2):
    """
    alpha: weight for hard loss (CE)
    beta: weight for soft loss (KL)
    gamma: weight for feature-based distillation (MSE)
    T: temperature for soft distillation
    """
    # Hard loss
    ce_loss = F.cross_entropy(student_logits, true_labels)

    # Soft loss (logits)
    soft_teacher = F.softmax(teacher_logits / T, dim=1)
    soft_student = F.log_softmax(student_logits / T, dim=1)
    kl_loss = F.kl_div(soft_student, soft_teacher, reduction='batchmean') * (T * T)

    # Feature loss (projection-matched MSE)
    feat_loss = F.mse_loss(student_feat, teacher_feat)

    return alpha * ce_loss + beta * kl_loss + gamma * feat_loss


In [77]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

def train_distilled(student, teacher, train_loader, val_loader, device,
                    epochs=50, lr=0.0005, class_weights=None,
                    T=2.0, alpha=0.2, beta=0.2, gamma=0.2,
                    save_path="best_student.pth"):

    teacher.eval()
    student.to(device)
    teacher.to(device)

    optimizer = optim.Adam(student.parameters(), lr=lr)
    ce_criterion = nn.CrossEntropyLoss(weight=class_weights)

    best_val_acc = 0
    train_losses, train_accuracies, val_accuracies, val_losses = [], [], [], []

    for epoch in range(epochs):
        student.train()
        total_loss, correct = 0, 0

        for inputs, labels in train_loader:  # ✅ inputs: (B, T, 8)
            inputs, labels = inputs.to(device), labels.to(device)
            student_logits, student_feat = student(inputs, return_features=True)
            with torch.no_grad():
                teacher_logits, teacher_feat = teacher(inputs, return_features=True)

            # Compute loss
            loss = distillation_loss(
                student_logits, teacher_logits,
                student_feat, teacher_feat,
                labels,
                T=T, alpha=alpha, beta=beta, gamma=gamma
            )

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (student_logits.argmax(1) == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        train_losses.append(total_loss)
        train_accuracies.append(train_acc)

        # === Validation ===
        student.eval()
        val_correct = 0
        val_loss = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = student(inputs)
                loss = ce_criterion(outputs, labels)
                val_loss += loss.item()
                val_correct += (outputs.argmax(1) == labels).sum().item()

        val_acc = val_correct / len(val_loader.dataset)
        val_accuracies.append(val_acc)
        val_losses.append(val_loss)

        print(f"[Distill] Epoch {epoch+1:02d}/{epochs} - Loss: {total_loss:.4f} "
              f"- Train Acc: {train_acc:.4f} - Val Acc: {val_acc:.4f}")

        # Save best student model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(student.state_dict(), save_path)
            print(f"💾 Best student model saved to: {save_path}")

    student.load_state_dict(torch.load(save_path))
    return train_accuracies, val_accuracies, train_losses, val_losses


In [111]:
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report
import os

detection_times = [30, 45, 60]
window_sizes = [10, 15, 20]

base_dir = "/home/HardDisk/Satang/thesis_proj"
base_teacher_dir = os.path.join(base_dir, "Deep_Learning","cross_archi", "transformer","teacher")
student_save_dir = os.path.join(base_dir, "Deep_Learning","cross_archi", "transformer","student")
os.makedirs(student_save_dir, exist_ok=True)

for T_d in detection_times:
    for T_w in window_sizes:
        T_len = T_d - T_w + 1
        expected_shape = (T_len, NUM_FEATURES)

        print(f"\n🚀 Training Student for Td={T_d}, Tw={T_w} (T_len={T_len})")

        # === Paths ===
        folder_name = f"X_csv_split_{T_len}"
        input_dir = os.path.join(base_dir, f"New_{T_d}", f"{T_w}", "split_tws", folder_name)
        train_path = os.path.join(input_dir, "train")
        val_path   = os.path.join(input_dir, "val")
        teacher_model_path = os.path.join(base_teacher_dir, f"transformer_Td{T_d}_Tw{T_w}.pth")
        student_model_path = os.path.join(student_save_dir, f"student_Td{T_d}_Tw{T_w}.pth")

        # === 1. Load Data ===
        X_train_raw, y_train_raw = load_split_from_folder(train_path, expected_shape)
        X_val_raw, y_val_raw     = load_split_from_folder(val_path, expected_shape)

        # === 2. Encode & Balance ===
        label_encoder = LabelEncoder()
        y_train_encoded = label_encoder.fit_transform(y_train_raw)

        X_train_flat = X_train_raw.reshape(X_train_raw.shape[0], -1)
        X_resampled, y_resampled = SMOTE().fit_resample(X_train_flat, y_train_encoded)
        X_train_bal = X_resampled.reshape(-1, T_len, NUM_FEATURES)
        y_train_str = label_encoder.inverse_transform(y_resampled)

        # === 3. Dataset & Dataloader ===
        train_dataset = MultiStreamDataset(X_train_bal, y_train_str, label_encoder, augment=True)
        val_dataset   = MultiStreamDataset(X_val_raw, y_val_raw, label_encoder, augment=False)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

        class_weights_tensor = compute_class_weights(label_encoder.transform(y_train_str), device)

        # === 4. Load Teacher ===
        teacher = RansomwareTransformer(
            input_dim=NUM_FEATURES,
            seq_len=T_len,
            num_classes=len(label_encoder.classes_)
        )
        teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))
        teacher.to(device)
        teacher.eval()
        for p in teacher.parameters():
            p.requires_grad = False

        # === 5. Init Student & Train ===
        student = StudentCNN(input_length=T_len, num_classes=len(label_encoder.classes_)).to(device)

        train_accs, val_accs, train_losses, val_losses = train_distilled(
            student=student,
            teacher=teacher,
            train_loader=train_loader,
            val_loader=val_loader,
            device=device,
            epochs=NUM_EPOCHS,
            lr=0.0001,
            class_weights=class_weights_tensor,
            T=2.5,
            alpha=0.8,
            beta=0.2,
            gamma=0.1,
            save_path=student_model_path
        )

        # === 6. Evaluate ===
        student.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = student(inputs)
                preds = torch.argmax(outputs, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        print("\n📊 Final Classification Report (Student):")
        print(classification_report(all_labels, all_preds, target_names=label_encoder.classes_))
        print(f"✅ Model saved: {student_model_path}")



🚀 Training Student for Td=30, Tw=10 (T_len=21)


  teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 2187.9115 - Train Acc: 0.3338 - Val Acc: 0.4674
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw10.pth
[Distill] Epoch 02/50 - Loss: 1409.1232 - Train Acc: 0.6088 - Val Acc: 0.5566
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw10.pth
[Distill] Epoch 03/50 - Loss: 1117.8612 - Train Acc: 0.6663 - Val Acc: 0.5932
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw10.pth
[Distill] Epoch 04/50 - Loss: 959.4028 - Train Acc: 0.6926 - Val Acc: 0.6037
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw10.pth
[Distill] Epoch 05/50 - Loss: 857.2742 - Train Acc: 0.7111 - Val Acc: 0.6202
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/c

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 3094.0534 - Train Acc: 0.3354 - Val Acc: 0.4472
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw15.pth
[Distill] Epoch 02/50 - Loss: 2161.1076 - Train Acc: 0.5753 - Val Acc: 0.5315
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw15.pth
[Distill] Epoch 03/50 - Loss: 1761.1518 - Train Acc: 0.6456 - Val Acc: 0.5881
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw15.pth
[Distill] Epoch 04/50 - Loss: 1450.7412 - Train Acc: 0.7032 - Val Acc: 0.6333
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw15.pth
[Distill] Epoch 05/50 - Loss: 1233.0324 - Train Acc: 0.7429 - Val Acc: 0.6522
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning

  student.load_state_dict(torch.load(save_path))



📊 Final Classification Report (Student):
              precision    recall  f1-score   support

    AESCrypt       0.97      1.00      0.98       112
      Cerber       0.85      0.80      0.82       249
    Darkside       0.98      0.81      0.89       427
       Excel       1.00      0.99      0.99       194
     Firefox       0.97      0.94      0.95       271
   GandCrab4       0.67      0.77      0.72       420
        Ryuk       0.76      0.73      0.74       258
     SDelete       1.00      1.00      1.00       103
  Sodinokibi       0.84      0.83      0.83       271
  TeslaCrypt       0.66      0.93      0.77       114
    WannaCry       1.00      1.00      1.00       103
         Zip       1.00      0.96      0.98       112

    accuracy                           0.86      2634
   macro avg       0.89      0.90      0.89      2634
weighted avg       0.87      0.86      0.86      2634

✅ Model saved: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/stud

  teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 5329.7681 - Train Acc: 0.4408 - Val Acc: 0.5472
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw20.pth
[Distill] Epoch 02/50 - Loss: 3268.5926 - Train Acc: 0.6654 - Val Acc: 0.5952
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw20.pth
[Distill] Epoch 03/50 - Loss: 2575.1115 - Train Acc: 0.7172 - Val Acc: 0.6380
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw20.pth
[Distill] Epoch 04/50 - Loss: 2179.6683 - Train Acc: 0.7522 - Val Acc: 0.6790
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td30_Tw20.pth
[Distill] Epoch 05/50 - Loss: 1912.3550 - Train Acc: 0.7779 - Val Acc: 0.6920
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning

  student.load_state_dict(torch.load(save_path))



📊 Final Classification Report (Student):
              precision    recall  f1-score   support

    AESCrypt       0.95      0.91      0.93       221
      Cerber       0.89      0.81      0.85       496
    Darkside       0.99      0.82      0.90       850
       Excel       1.00      0.99      0.99       386
     Firefox       0.99      0.96      0.97       539
   GandCrab4       0.75      0.81      0.78       836
        Ryuk       0.73      0.77      0.75       514
     SDelete       0.93      1.00      0.96       204
  Sodinokibi       0.80      0.86      0.83       537
  TeslaCrypt       0.68      0.95      0.80       224
    WannaCry       1.00      0.99      1.00       203
         Zip       1.00      0.95      0.97       220

    accuracy                           0.87      5230
   macro avg       0.89      0.90      0.89      5230
weighted avg       0.88      0.87      0.87      5230

✅ Model saved: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/stud

  teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 1506.5904 - Train Acc: 0.1672 - Val Acc: 0.2820
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw10.pth
[Distill] Epoch 02/50 - Loss: 1207.0527 - Train Acc: 0.4259 - Val Acc: 0.5203
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw10.pth
[Distill] Epoch 03/50 - Loss: 931.3287 - Train Acc: 0.6366 - Val Acc: 0.6037
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw10.pth
[Distill] Epoch 04/50 - Loss: 778.4278 - Train Acc: 0.6938 - Val Acc: 0.6172
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw10.pth
[Distill] Epoch 05/50 - Loss: 675.6561 - Train Acc: 0.7267 - Val Acc: 0.6328
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cr

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 1915.7062 - Train Acc: 0.1948 - Val Acc: 0.3747
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw15.pth
[Distill] Epoch 02/50 - Loss: 1356.5061 - Train Acc: 0.5360 - Val Acc: 0.5427
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw15.pth
[Distill] Epoch 03/50 - Loss: 1026.9329 - Train Acc: 0.6523 - Val Acc: 0.5661
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw15.pth
[Distill] Epoch 04/50 - Loss: 875.0133 - Train Acc: 0.6916 - Val Acc: 0.5858
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw15.pth
[Distill] Epoch 05/50 - Loss: 781.6161 - Train Acc: 0.7152 - Val Acc: 0.6172
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/c

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 2198.2554 - Train Acc: 0.2108 - Val Acc: 0.3842
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw20.pth
[Distill] Epoch 02/50 - Loss: 1566.6506 - Train Acc: 0.5151 - Val Acc: 0.4789
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw20.pth
[Distill] Epoch 03/50 - Loss: 1246.3492 - Train Acc: 0.6147 - Val Acc: 0.5287
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw20.pth
[Distill] Epoch 04/50 - Loss: 1053.6591 - Train Acc: 0.6662 - Val Acc: 0.5609
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td45_Tw20.pth
[Distill] Epoch 05/50 - Loss: 929.8140 - Train Acc: 0.6953 - Val Acc: 0.5830
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 1108.8767 - Train Acc: 0.1105 - Val Acc: 0.2256
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw10.pth
[Distill] Epoch 02/50 - Loss: 1014.8261 - Train Acc: 0.2955 - Val Acc: 0.4192
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw10.pth
[Distill] Epoch 03/50 - Loss: 746.1667 - Train Acc: 0.5683 - Val Acc: 0.5997
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw10.pth
[Distill] Epoch 04/50 - Loss: 605.7265 - Train Acc: 0.7035 - Val Acc: 0.6434
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw10.pth
[Distill] Epoch 05/50 - Loss: 523.0205 - Train Acc: 0.7375 - Val Acc: 0.6769
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cr

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 1220.4490 - Train Acc: 0.1594 - Val Acc: 0.1810
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw15.pth
[Distill] Epoch 02/50 - Loss: 1002.0874 - Train Acc: 0.4420 - Val Acc: 0.5772
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw15.pth
[Distill] Epoch 03/50 - Loss: 760.8072 - Train Acc: 0.6571 - Val Acc: 0.6089
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw15.pth
[Distill] Epoch 04/50 - Loss: 644.6548 - Train Acc: 0.6991 - Val Acc: 0.6278
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw15.pth
[Distill] Epoch 05/50 - Loss: 558.0945 - Train Acc: 0.7390 - Val Acc: 0.6430
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cr

  student.load_state_dict(torch.load(save_path))
  teacher.load_state_dict(torch.load(teacher_model_path, map_location=device))


[Distill] Epoch 01/50 - Loss: 1336.8097 - Train Acc: 0.1292 - Val Acc: 0.2354
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw20.pth
[Distill] Epoch 02/50 - Loss: 1117.6365 - Train Acc: 0.4269 - Val Acc: 0.5433
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw20.pth
[Distill] Epoch 03/50 - Loss: 806.2226 - Train Acc: 0.6621 - Val Acc: 0.5691
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw20.pth
[Distill] Epoch 04/50 - Loss: 675.4467 - Train Acc: 0.6971 - Val Acc: 0.5925
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/student/student_Td60_Tw20.pth
[Distill] Epoch 05/50 - Loss: 592.1709 - Train Acc: 0.7220 - Val Acc: 0.6066
💾 Best student model saved to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cr

  student.load_state_dict(torch.load(save_path))


In [112]:
import torch
import torch.nn as nn
import torch.optim as optim

def train_student_baseline(student, train_loader, val_loader, device,
                           epochs=50, lr=0.0005, class_weights=None, save_path="best_student_baseline.pth",
                           patience=10):

    student.to(device)
    optimizer = optim.Adam(student.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    best_val_acc = 0
    epochs_no_improve = 0
    train_losses, train_accuracies, val_accuracies, val_losses = [], [], [], []

    for epoch in range(epochs):
        student.train()
        total_loss, correct = 0, 0

        for inputs, labels in train_loader:  # ✅ inputs: (B, T, 8)
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = student(inputs)  # ✅ one-shot forward pass
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()

        train_acc = correct / len(train_loader.dataset)
        train_losses.append(total_loss)
        train_accuracies.append(train_acc)

        # === Validation ===
        student.eval()
        val_correct, val_loss = 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = student(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                val_correct += (outputs.argmax(1) == labels).sum().item()

        val_acc = val_correct / len(val_loader.dataset)
        val_accuracies.append(val_acc)
        val_losses.append(val_loss)

        print(f"[Baseline] Epoch {epoch+1:02d}/{epochs} - Loss: {total_loss:.4f} "
              f"- Train Acc: {train_acc:.4f} - Val Acc: {val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(student.state_dict(), save_path)
            print(f"💾 Saved best standalone student model to: {save_path}")
            epochs_no_improve = 0  # Reset counter
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"⏹ Early stopping triggered at epoch {epoch+1} due to no improvement for {patience} consecutive epochs.")
                break

    student.load_state_dict(torch.load(save_path))
    return train_accuracies, val_accuracies, train_losses, val_losses


In [120]:
baseline_save_dir = "/home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student"
os.makedirs(baseline_save_dir, exist_ok=True)
detection_times = [30, 45, 60]
window_sizes = [10, 15, 20]
base_dir = "/home/HardDisk/Satang/thesis_proj"
for T_d in detection_times:
    for T_w in window_sizes:
        T_len = T_d - T_w + 1
        expected_shape = (T_len, NUM_FEATURES)

        print(f"\n🚀 Training Standalone Student for Td={T_d}, Tw={T_w} (T_len={T_len})")

        # === Paths
        folder_name = f"X_csv_split_{T_len}"
        input_dir = os.path.join(base_dir, f"New_{T_d}", f"{T_w}", "split_tws", folder_name)
        train_path = os.path.join(input_dir, "train")
        val_path   = os.path.join(input_dir, "val")
        student_model_path = os.path.join(baseline_save_dir, f"student_baseline_Td{T_d}_Tw{T_w}.pth")

        # === Load Data
        X_train_raw, y_train_raw = load_split_from_folder(train_path, expected_shape)
        X_val_raw, y_val_raw     = load_split_from_folder(val_path, expected_shape)

        label_encoder = LabelEncoder()
        y_train_encoded = label_encoder.fit_transform(y_train_raw)

        X_train_flat = X_train_raw.reshape(X_train_raw.shape[0], -1)
        X_resampled, y_resampled = SMOTE().fit_resample(X_train_flat, y_train_encoded)
        X_train_bal = X_resampled.reshape(-1, T_len, NUM_FEATURES)
        y_train_str = label_encoder.inverse_transform(y_resampled)

        train_dataset = MultiStreamDataset(X_train_bal, y_train_str, label_encoder, augment=True)
        val_dataset   = MultiStreamDataset(X_val_raw, y_val_raw, label_encoder, augment=False)

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

        class_weights_tensor = compute_class_weights(label_encoder.transform(y_train_str), device)

        # === Initialize Student Model
        student_baseline = StudentCNN(input_length=T_len, num_classes=len(label_encoder.classes_)).to(device)

        # === Train (No KD)
        train_accs_b, val_accs_b, train_losses_b, val_losses_b = train_student_baseline(
            student=student_baseline,
            train_loader=train_loader,
            val_loader=val_loader,
            device=device,
            epochs=NUM_EPOCHS,
            lr=0.00005,
            class_weights=class_weights_tensor,
            save_path=student_model_path
        )

        print(f"✅ Saved standalone student model: {student_model_path}")



🚀 Training Standalone Student for Td=30, Tw=10 (T_len=21)
[Baseline] Epoch 01/50 - Loss: 1382.9703 - Train Acc: 0.1566 - Val Acc: 0.2119
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw10.pth
[Baseline] Epoch 02/50 - Loss: 984.7228 - Train Acc: 0.4519 - Val Acc: 0.4374
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw10.pth
[Baseline] Epoch 03/50 - Loss: 754.6250 - Train Acc: 0.5910 - Val Acc: 0.5010
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw10.pth
[Baseline] Epoch 04/50 - Loss: 666.4675 - Train Acc: 0.6286 - Val Acc: 0.5496
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_bas

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Loss: 1825.0223 - Train Acc: 0.1600 - Val Acc: 0.2737
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw15.pth
[Baseline] Epoch 02/50 - Loss: 1306.9692 - Train Acc: 0.4602 - Val Acc: 0.4609
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw15.pth
[Baseline] Epoch 03/50 - Loss: 999.0031 - Train Acc: 0.5871 - Val Acc: 0.5125
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw15.pth
[Baseline] Epoch 04/50 - Loss: 862.1184 - Train Acc: 0.6333 - Val Acc: 0.5619
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw15.pth
[Baseline] Epoch 05/50 - Loss: 767.751

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Loss: 2924.0144 - Train Acc: 0.3669 - Val Acc: 0.5140
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw20.pth
[Baseline] Epoch 02/50 - Loss: 1792.7137 - Train Acc: 0.6387 - Val Acc: 0.5876
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw20.pth
[Baseline] Epoch 03/50 - Loss: 1447.5177 - Train Acc: 0.6964 - Val Acc: 0.6143
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw20.pth
[Baseline] Epoch 04/50 - Loss: 1250.0200 - Train Acc: 0.7264 - Val Acc: 0.6315
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td30_Tw20.pth
[Baseline] Epoch 05/50 - Loss: 1123.

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Loss: 795.1850 - Train Acc: 0.0982 - Val Acc: 0.1715
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw10.pth
[Baseline] Epoch 02/50 - Loss: 757.9555 - Train Acc: 0.1814 - Val Acc: 0.2820
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw10.pth
[Baseline] Epoch 03/50 - Loss: 660.4844 - Train Acc: 0.3393 - Val Acc: 0.4729
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw10.pth
[Baseline] Epoch 04/50 - Loss: 496.3854 - Train Acc: 0.5267 - Val Acc: 0.5126
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw10.pth
[Baseline] Epoch 05/50 - Loss: 423.6376 

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Loss: 1021.2748 - Train Acc: 0.1151 - Val Acc: 0.1936
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw15.pth
[Baseline] Epoch 02/50 - Loss: 903.2810 - Train Acc: 0.2846 - Val Acc: 0.3535
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw15.pth
[Baseline] Epoch 03/50 - Loss: 646.2588 - Train Acc: 0.5116 - Val Acc: 0.5252
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw15.pth
[Baseline] Epoch 04/50 - Loss: 539.2468 - Train Acc: 0.6204 - Val Acc: 0.5515
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw15.pth
[Baseline] Epoch 05/50 - Loss: 474.6325

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Loss: 1189.9786 - Train Acc: 0.1133 - Val Acc: 0.2114
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw20.pth
[Baseline] Epoch 02/50 - Loss: 990.4392 - Train Acc: 0.3388 - Val Acc: 0.4095
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw20.pth
[Baseline] Epoch 03/50 - Loss: 696.7639 - Train Acc: 0.5528 - Val Acc: 0.4890
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw20.pth
[Baseline] Epoch 04/50 - Loss: 589.5908 - Train Acc: 0.6185 - Val Acc: 0.5199
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td45_Tw20.pth
[Baseline] Epoch 05/50 - Loss: 526.4869

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Loss: 563.7924 - Train Acc: 0.0908 - Val Acc: 0.0975
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw10.pth
[Baseline] Epoch 02/50 - Loss: 548.1748 - Train Acc: 0.1426 - Val Acc: 0.2518
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw10.pth
[Baseline] Epoch 03/50 - Loss: 516.0055 - Train Acc: 0.2524 - Val Acc: 0.3217
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw10.pth
[Baseline] Epoch 04/50 - Loss: 425.7091 - Train Acc: 0.4012 - Val Acc: 0.4396
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw10.pth
[Baseline] Epoch 05/50 - Loss: 331.3293 

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Loss: 616.3801 - Train Acc: 0.1204 - Val Acc: 0.2316
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw15.pth
[Baseline] Epoch 02/50 - Loss: 571.6800 - Train Acc: 0.2717 - Val Acc: 0.3063
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw15.pth
[Baseline] Epoch 03/50 - Loss: 475.4276 - Train Acc: 0.3946 - Val Acc: 0.4595
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw15.pth
[Baseline] Epoch 04/50 - Loss: 384.5420 - Train Acc: 0.5441 - Val Acc: 0.5278
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw15.pth
[Baseline] Epoch 05/50 - Loss: 330.1582 

  student.load_state_dict(torch.load(save_path))


[Baseline] Epoch 01/50 - Loss: 687.8594 - Train Acc: 0.1078 - Val Acc: 0.1874
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw20.pth
[Baseline] Epoch 02/50 - Loss: 661.6139 - Train Acc: 0.1793 - Val Acc: 0.3091
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw20.pth
[Baseline] Epoch 03/50 - Loss: 591.5781 - Train Acc: 0.3184 - Val Acc: 0.4450
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw20.pth
[Baseline] Epoch 04/50 - Loss: 442.2271 - Train Acc: 0.5093 - Val Acc: 0.5082
💾 Saved best standalone student model to: /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/baseline_student/student_baseline_Td60_Tw20.pth
[Baseline] Epoch 05/50 - Loss: 384.5699 

  student.load_state_dict(torch.load(save_path))


In [121]:
import os
import torch
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader

# === CONFIG ===
detection_times = [30, 45, 60]
window_sizes = [10, 15, 20]
NUM_FEATURES = 8
BATCH_SIZE = 32
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

base_dir = "/home/HardDisk/Satang/thesis_proj/"
base_teacher_dir = os.path.join(base_dir, "Deep_Learning", "cross_archi","transformer", "teacher")
student_kd_dir = os.path.join(base_dir, "Deep_Learning","cross_archi","transformer", "student")
student_baseline_dir = os.path.join(base_dir, "Deep_Learning","cross_archi","transformer", "baseline_student")
csv_output_path = os.path.join(base_dir, "Deep_Learning","cross_archi","transformer", "results", "model_eval_summary.csv")
os.makedirs(os.path.dirname(csv_output_path), exist_ok=True)

# # === YOUR MODULE IMPORTS ===
# from your_module import load_split_from_folder, MultiStreamDataset, RansomwareTransformer, StudentCNN

results = []

def evaluate_and_log(model, model_path, test_loader, label_encoder, T_d, T_w, model_type, results_list):
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.to(DEVICE)
    model.eval()

    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    report = classification_report(all_labels, all_preds, target_names=label_encoder.classes_, output_dict=True)
    acc = accuracy_score(all_labels, all_preds)
    
    results_list.append({
        "Td": T_d,
        "Tw": T_w,
        "T_len": T_d - T_w + 1,
        "Model": model_type,
        "Accuracy": round(acc, 4),
        "Precision_macro": round(report["macro avg"]["precision"], 4),
        "Recall_macro": round(report["macro avg"]["recall"], 4),
        "F1_macro": round(report["macro avg"]["f1-score"], 4),
        "F1_weighted": round(report["weighted avg"]["f1-score"], 4),
    })

# === MAIN EVALUATION LOOP ===
for T_d in detection_times:
    for T_w in window_sizes:
        T_len = T_d - T_w + 1
        expected_shape = (T_len, NUM_FEATURES)
        folder_name = f"X_csv_split_{T_len}"
        test_path = os.path.join(base_dir, f"New_{T_d}", f"{T_w}", "split_tws", folder_name, "test")

        # === Load test set ===
        X_test_raw, y_test_raw = load_split_from_folder(test_path, expected_shape)
        label_encoder = LabelEncoder()
        label_encoder.fit(y_test_raw)

        test_dataset = MultiStreamDataset(X_test_raw, y_test_raw, label_encoder, augment=False)
        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

        # === Teacher ===
        teacher_path = os.path.join(base_teacher_dir, f"transformer_Td{T_d}_Tw{T_w}.pth")
        teacher = RansomwareTransformer(input_dim=NUM_FEATURES, seq_len=T_len, num_classes=len(label_encoder.classes_))
        evaluate_and_log(teacher, teacher_path, test_loader, label_encoder, T_d, T_w, "Teacher", results)

        # === Student with KD ===
        student_kd_path = os.path.join(student_kd_dir, f"student_Td{T_d}_Tw{T_w}.pth")
        student_kd = StudentCNN(input_length=T_len, num_classes=len(label_encoder.classes_))
        evaluate_and_log(student_kd, student_kd_path, test_loader, label_encoder, T_d, T_w, "Student_KD", results)

        # === Student Baseline ===
        student_b_path = os.path.join(student_baseline_dir, f"student_baseline_Td{T_d}_Tw{T_w}.pth")
        student_b = StudentCNN(input_length=T_len, num_classes=len(label_encoder.classes_))
        evaluate_and_log(student_b, student_b_path, test_loader, label_encoder, T_d, T_w, "Student_Baseline", results)

# === Save to CSV ===
df_results = pd.DataFrame(results)
df_results.to_csv(csv_output_path, index=False)
print(f"\n✅ All evaluation results saved to {csv_output_path}")


  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load(model_path, map_location=DEVICE))
  model.load_state_dict(torch.load


✅ All evaluation results saved to /home/HardDisk/Satang/thesis_proj/Deep_Learning/cross_archi/transformer/results/model_eval_summary.csv
