In [None]:
import itertools
import json
import logging
import os
import pickle
import random
import shutil
import time
from collections import OrderedDict, defaultdict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import wandb
from kaggle_secrets import UserSecretsClient
from omegaconf import OmegaConf
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
from scipy.sparse import csr_matrix, diags
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.utils import class_weight as sklearn_class_weight
from torch.optim import SGD, Adam
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler

In [None]:
# # df = pd.read_parquet("/kaggle/input/cic-ids-2017-parquet/cic_ids_2017.parquet")
# df = pd.read_parquet("/kaggle/input/cic-ton-iot-parquet/cic_ton_iot.parquet")
# print(df.head())
# print("Shape:", df.shape)
# print("Columns:", df.columns.tolist())
# print("\nInfo:")
# print(df.info())
# print("\nDescription:")
# print(df.describe(include='all'))
# print("\nMissing Values:")
# print(df.isnull().sum())

In [None]:
# if os.path.isdir("processed_data"):
#     shutil.rmtree("processed_data")

# if os.path.isdir("logs"):
#     shutil.rmtree("logs")

# if os.path.isdir("temp"):
#     shutil.rmtree("temp")

In [None]:
!pip install powerlaw

In [None]:
models = [
    "mlp",
    # "cnn",
    # "cnn_lstm",
    # "cnn_gru",
    # "gru"
]

dataset_name = "cic_ids_2017"
dataset_name_d = "cic-ids-2017"
dataset_file = "cic_ids_2017.parquet"
original_datasets_files_path = "/kaggle/input/cic-ids-2017-parquet"
name = "cic_ids_2017"

# name = "cic_ton_iot"
# dataset_name = "cic_ton_iot"
# dataset_name_d = "cic-ton-iot"
# dataset_file = "cic_ton_iot.parquet"
# original_datasets_files_path = "/kaggle/input/cic-ton-iot-parquet"

dataset_file_raw_type = "parquet"
run_dtime = time.strftime("%Y%m%d-%H%M%S")

main_processed_dir = "processed_data"
os.makedirs(main_processed_dir, exist_ok=True)
# CHANGE 1.02
new_path = os.path.join("/kaggle/working/",f"{dataset_name}.parquet")
graph_path = os.path.join("/kaggle/working/",f"{dataset_name}.gpickle")

# cn_measures = ["degree", "betweenness", "closeness", "eigenvector"]
cn_measures = ["betweenness", "degree", "pagerank", "closeness", "k_truss"]
network_features = ['src_betweenness', 'dst_betweenness', 'src_degree', 'dst_degree', 'src_pagerank', 'dst_pagerank', 'src_closeness', 'dst_closeness', 'src_k_truss', 'dst_k_truss']
if dataset_name=="cic_ids_2017":
    dataset_DF_name = "CICIDS2017"
else:
    dataset_DF_name = "CICTONIOT"
    

In [None]:
# import wandb
# print(wandb.__version__)
# from wandb.keras import WandbCallback


# Configurations

In [None]:
logger = logging.getLogger(__name__)
# secret_label = "wandb-secret"
secret_label="mohammad_wandb_secret"
secret_value = UserSecretsClient().get_secret(secret_label)

if wandb.run:
    wandb.finish()
    
wandb.login(key=secret_value)

In [None]:
base_cfg = OmegaConf.create({
        "random_seed": 42,
        "training": {
            "multi_class": True,
            "batch_size": 256,
            # "batch_size": 512,
            "max_epochs": 15,
            # "max_epochs": 10,
            # "max_epochs": 1,
            "optimizer": "adam",
            # "learning_rate": 0.0008,
            # for first dataset
            "learning_rate": 0.001,
            "lr_decay": True,
            "lr_decay_rate": 0.5,
            "weight_decay": 0.01,
            "LAMBD_2": 0.01,
            "dropout": True,
            "dropout_rate": 0.5,
            "batch_norm": True,
            "use_weighted_loss": False,
            "weighted_loss_version": "v5",
            "loss_type": "cross_entropy",
            "sequence_length": 3,
            "stride": 1,
            "using_masking": False,
            "masked_class": 2,
            "oversample": True,
        },
        "logging": {
            # "selected_type": "tensorboard",
            "selected_type": "wandb",
            "wandb": {
                "project": f"DL-NIDS-2--{dataset_name_d}",
                "entity": "mtermos-cesi",
                "tags": [ "dl_nids", "dataset_name" ],
                "save_dir": "logs/wandb_runs",
            },
            "tensorboard": {
                "project": f"DL-NIDS-2--{dataset_name_d}",
                "save_dir": "logs/tensorboard_runs"
            }
        },
        "dataset": {
            "name": dataset_name,
            "raw": dataset_file,
            "raw_type": dataset_file_raw_type,
            "test_size": 0.1   
        },
        "dataset_properties": {
            "src_ip_col": "Src IP",
            "dst_ip_col": "Dst IP",
            "timestamp_col": "Timestamp",
            "flow_id_col": "Flow ID",
            "timestamp_format": "%d/%m/%Y %I:%M:%S %p",
            "label_col": "Label",
            "class_col": "Attack",
            "class_num_col": "Class",
            "val_size": 0.15,
            # "val_size": 0.1,
            "drop_columns": [ "Flow ID", "Src IP", "Dst IP", "Timestamp", "Src Port", "Dst Port", "Attack" ],
            "weak_columns": [ "Flow Duration", "Tot Bwd Pkts", "TotLen Bwd Pkts", "Fwd Pkt Len Max", "Fwd Pkt Len Mean", "Bwd Pkt Len Max", "Bwd Pkt Len Mean", "Bwd Pkt Len Std", "Flow Pkts/s", "Flow IAT Mean", "Flow IAT Max", "Fwd IAT Mean", "Bwd IAT Mean", "Pkt Len Max", "Pkt Len Mean", "Pkt Size Avg", "Fwd Byts/b Avg", "Fwd Pkts/b Avg", "Fwd Blk Rate Avg", "Active Mean, Idle Mean" ]
        },
    })

all_models_cfgs = {
    # # TRIAL ONE
    # "cnn": OmegaConf.create({
    #     "model": {"name": "cnn", "type": "cnn"},
    #     "layers": ["cnn", "dense"],
    #     "input_layer_norm": True,
    #     "cnn": {
    #         "filters": [32, 64, 128],
    #         "kernel_sizes": [7, 5, 3],
    #         "activation": "relu",
    #         "dropout": False,
    #         "dropout_rate": 0.0,
    #         "batch_norm": False,
    #         "layer_norm": False,
    #     },
    #     "dense": {
    #         "units": [128, 64],
    #         "activation": "relu",
    #         "dropout": True,
    #         "dropout_rate": 0.3,
    #         "batch_norm": True,
    #         "layer_norm": False,
    #     },
    # }),
#     # TRIAL TWO
#     "cnn": OmegaConf.create({
#     "model": {"name": "cnn", "type": "cnn"},
#     "layers": ["cnn", "dense"],
#     "input_layer_norm": False,
#     "cnn": {
#         "filters": [80, 160],            
#         # "filters": [80, 160, 240],            
#         "kernel_sizes": [7, 5],
#         # "kernel_sizes": [7, 5, 3],
#         "activation": "relu",
#         "dropout": True,
#         "dropout_rate": 0.2,
#         "batch_norm": True,
#         "layer_norm": False,
#     },
#     "dense": {
#         # "units": [160, 100, 80],
#         "units": [160, 80],
#         "activation": "relu",
#         "dropout": True,
#         "dropout_rate": 0.2,
#         "batch_norm": True,
#         "layer_norm": True,
#     },
# }),
# ORIGINAL
    "cnn": OmegaConf.create({
        "model": {"name": "cnn", "type": "cnn"},
        "layers": ["cnn", "dense"],
        "input_layer_norm": False,
        "cnn": {
            # "filters": [64, 128, 240],
            "filters": [80, 80],
            "kernel_sizes": [7, 7],
            # "kernel_sizes": [7, 5,3],
            # "activation": "leaky_relu",
            "activation": "relu",
            # "activation": "gelu",
            "dropout": True,
            "dropout_rate": 0.2,
            "batch_norm": True,
            "layer_norm": False,
        },
        "dense": {
            "units": [200, 100, 80],
            # "units": [240, 160, 80],
            # "units": [160, 80],
            # "units": [100],
            # "activation": "leaky_relu",
            "activation": "relu",
            # "activation": "gelu",
            "dropout": True,
            "dropout_rate": 0.2,
            "batch_norm": True,
            "layer_norm": False,
        },
    }),
    "mlp": OmegaConf.create({
        "model": {"name": "mlp", "type": "mlp"},
        "layers": ["dense"],
        "input_layer_norm": False,
        "dense": {
            # "units": [200,240, 160, 80],
            "units": [200, 100, 80],
            "activation": "relu",
            # "activation": "leaky_relu",
            "dropout": True,
            "dropout_rate": 0.25,
            "batch_norm": True,
            "layer_norm": False,
        },
    }),
    "gru": OmegaConf.create({
        # model": {"name": "cnn", "type": "cnn"},
        # "layers": ["cnn", "dense"],
        # "input_layer_norm": False,
        # "cnn":
        "model": {"name": "gru", "type": "gru"},
        "layers": ["gru", "dense"],
        "input_layer_norm": False,
        "gru": {
            "type": "gru",
            "hidden_units": [100, 100, 50],  # Three GRU layers
            "dropout": [0.2, 0.1, 0.1],      # Dropout after each GRU layer
            "bidirectional": False,
            "return_sequences": [True, True, False],  # Only last layer returns final output
        },
        "dense": {
            "units": [240,160,80],
            "activation": "relu",
        }
    }),
    "cnn_lstm": OmegaConf.create({
        "model": {"name": "cnn_lstm", "type": "cnn_lstm"},
        "layers": ["cnn", "lstm", "dense"],
        "input_layer_norm": False,
        "cnn": {
            "filters": [80, 80],
            "kernel_sizes": [3, 3],
            # "activation": "leaky_relu",
            "activation": "relu",
            "dropout": True,
            "dropout_rate": 0.3,
            "batch_norm": True,
            "layer_norm": False,
        },
        "lstm": {
            "hidden_size": [80],
            "activation": "relu",
            # "activation": "leaky_relu",
            "dropout": True,
            "dropout_rate": 0.3,
            "batch_norm": True,
            "layer_norm": False,
        },
        "dense": {
            "units": [200, 200, 80],
            "activation": "relu",
            # "activation": "leaky_relu",
            "dropout": True,
            "dropout_rate": 0.3,
            "batch_norm": True,
            "layer_norm": False,
        },
    }),
     "cnn_gru": OmegaConf.create({
        "model": {"name": "cnn_gru", "type": "cnn_gru"},
        "layers": ["cnn", "gru", "dense"],
        "input_layer_norm": False,
        "cnn": {
            "filters": [80, 80],
            "kernel_sizes": [3, 3],
            # "activation": "leaky_relu",
            "activation": "relu",
            "dropout": True,
            "dropout_rate": 0.3,
            "batch_norm": True,
            "layer_norm": False,
        },
        "gru": {
            "hidden_size": [80],
            "activation": "relu",
            # "activation": "leaky_relu",
            "dropout": True,
            "dropout_rate": 0.3,
            "batch_norm": True,
            "layer_norm": False,
        },
        "dense": {
            "units": [200, 200, 80],
            "activation": "relu",
            # "activation": "leaky_relu",
            "dropout": True,
            "dropout_rate": 0.3,
            "batch_norm": True,
            "layer_norm": False,
        },
    })
}

# Models

In [None]:
import networkx as nx
import igraph as ig
# change 1.02
# import /kaggle/input/centrality_network/pytorch/default/1 as network
import sys
sys.path.append("/kaggle/input/centrality_network/pytorch/default/1")

from network_features import separate_graph, cal_betweenness_centrality, cal_k_core, cal_k_truss
from CommCentralityCode import comm_centreality
from modularity_vitality import modularity_vitality

sys.path.append("/kaggle/input/githubrepofiles/pytorch/default/1")
from src.dataset.dataset_info import datasets

# change 1.04
def add_centralities(df, new_path, graph_path, dataset, cn_measures, network_features):
# def add_centralities(df, new_path, graph_path, dataset, cn_measures):
        # change 1.05
    # G = nx.from_pandas_edgelist(df, source="Src IP",target="Dst IP", create_using=nx.DiGraph())
    G = nx.from_pandas_edgelist( df, source=dataset.src_ip_col, target=dataset.dst_ip_col, create_using=nx.DiGraph())
    G.remove_nodes_from(list(nx.isolates(G)))
    for node in G.nodes():
        G.nodes[node]['label'] = node

    G1 = ig.Graph.from_networkx(G)
    labels = [G.nodes[node]['label'] for node in G.nodes()]
    G1.vs['label'] = labels

    part = G1.community_infomap()
    communities = []
    for com in part:
        communities.append([G1.vs[node_index]['label'] for node_index in com])

    community_labels = {}
    for i, community in enumerate(communities):
        for node in community:
            community_labels[node] = i

    nx.set_node_attributes(G, community_labels, "new_community")

    intra_graph, inter_graph = separate_graph(G, communities)
# ["betweenness", "degree", "pagerank", "closeness", "k_truss"]
    if "betweenness" in cn_measures:
        nx.set_node_attributes(G, cal_betweenness_centrality(G), "betweenness")
        print("calculated betweenness")
    if "local_betweenness" in cn_measures:
        nx.set_node_attributes(G, cal_betweenness_centrality(
            intra_graph), "local_betweenness")
        print("calculated local_betweenness")
    if "global_betweenness" in cn_measures:
        nx.set_node_attributes(G, cal_betweenness_centrality(
            inter_graph), "global_betweenness")
        print("calculated global_betweenness")
    if "degree" in cn_measures:
        nx.set_node_attributes(G, nx.degree_centrality(G), "degree")
        print("calculated degree")
    if "local_degree" in cn_measures:
        nx.set_node_attributes(
            G, nx.degree_centrality(intra_graph), "local_degree")
        print("calculated local_degree")
    if "global_degree" in cn_measures:
        nx.set_node_attributes(G, nx.degree_centrality(
            inter_graph), "global_degree")
        print("calculated global_degree")
    if "eigenvector" in cn_measures:
        nx.set_node_attributes(G, nx.eigenvector_centrality(
            G, max_iter=600), "eigenvector")
        print("calculated eigenvector")
    if "local_eigenvector" in cn_measures:
        nx.set_node_attributes(G, nx.eigenvector_centrality(
            intra_graph), "local_eigenvector")
        print("calculated local_eigenvector")
    if "global_eigenvector" in cn_measures:
        nx.set_node_attributes(G, nx.eigenvector_centrality(
            inter_graph), "global_eigenvector")
        print("calculated global_eigenvector")
    if "closeness" in cn_measures:
        nx.set_node_attributes(G, nx.closeness_centrality(G), "closeness")
        print("calculated closeness")
    if "local_closeness" in cn_measures:
        nx.set_node_attributes(G, nx.closeness_centrality(
            intra_graph), "local_closeness")
        print("calculated local_closeness")
    if "global_closeness" in cn_measures:
        nx.set_node_attributes(G, nx.closeness_centrality(
            inter_graph), "global_closeness")
        print("calculated global_closeness")
    if "pagerank" in cn_measures:
        nx.set_node_attributes(G, nx.pagerank(G, alpha=0.85), "pagerank")
        print("calculated pagerank")
    if "local_pagerank" in cn_measures:
        nx.set_node_attributes(G, nx.pagerank(
            intra_graph, alpha=0.85), "local_pagerank")
        print("calculated local_pagerank")
    if "global_pagerank" in cn_measures:
        nx.set_node_attributes(G, nx.pagerank(
            inter_graph, alpha=0.85), "global_pagerank")
        print("calculated global_pagerank")
    if "k_core" in cn_measures:
        nx.set_node_attributes(G, cal_k_core(G), "k_core")
        print("calculated k_core")
    if "k_truss" in cn_measures:
        nx.set_node_attributes(G, cal_k_truss(G), "k_truss")
        print("calculated k_truss")
    if "Comm" in cn_measures:
        nx.set_node_attributes(
            G, comm_centreality(G, community_labels), "Comm")
        print("calculated Comm")
    if "mv" in cn_measures:
        nx.set_node_attributes(G, modularity_vitality(G1, part), "mv")
        print("calculated mv")

    nx.write_gexf(G, graph_path)

    features_dicts = {}
    for measure in cn_measures:
        features_dicts[measure] = nx.get_node_attributes(G, measure)
        print(f"==>> features_dicts: {measure , len(features_dicts[measure])}")

    for feature in network_features:
        if feature[:3] == "src":
            df[feature] = df.apply(lambda row: features_dicts[feature[4:]].get(
                row[dataset.src_ip_col], -1), axis=1)
            # df[feature] = df.apply(lambda row: features_dicts[feature[4:]].get(
                # row['Src Ip'], -1), axis=1)
        if feature[:3] == "dst":
            df[feature] = df.apply(lambda row: features_dicts[feature[4:]].get(
                row[dataset.dst_ip_col], -1), axis=1)
            # df[feature] = df.apply(lambda row: features_dicts[feature[4:]].get(row['Dst IP'], -1), axis=1)
    df.to_parquet(new_path)
    print(f"DataFrame written to {new_path}")
    # print(df.columns)
    # return network_features
    return df

In [None]:
DATASET = datasets[name]

In [None]:
print(DATASET)

In [None]:
print(DATASET.src_ip_col)

In [None]:
# utils

ACTIVATIONS = {
    "relu":      nn.ReLU,
    "leaky_relu": nn.LeakyReLU,
    "gelu":      nn.GELU,
    "tanh":      nn.Tanh,
    "sigmoid": nn.Sigmoid,
    "none":      None,
}

class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NumpyEncoder, self).default(obj)


def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalized=False,
                          file_path=None,
                          show_figure=True):

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    fig = plt.figure(figsize=(12, 12))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    thresh = cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalized:
            plt.text(j, i, "{:0.3f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(
        accuracy, misclass))
    if file_path:
        plt.savefig(file_path)
    if show_figure:
        plt.show()
    return fig


def calculate_fpr_fnr_with_global(cm):
    """
    Calculate FPR and FNR for each class and globally for a multi-class confusion matrix.

    Parameters:
        cm (numpy.ndarray): Confusion matrix of shape (num_classes, num_classes).

    Returns:
        dict: A dictionary containing per-class and global FPR and FNR.
    """
    num_classes = cm.shape[0]
    results = {"per_class": {}, "global": {}}

    # Initialize variables for global calculation
    total_TP = 0
    total_FP = 0
    total_FN = 0
    total_TN = 0

    # Per-class calculation
    for class_idx in range(num_classes):
        TP = cm[class_idx, class_idx]
        FN = np.sum(cm[class_idx, :]) - TP
        FP = np.sum(cm[:, class_idx]) - TP
        TN = np.sum(cm) - (TP + FP + FN)

        # Calculate FPR and FNR for this class
        FPR = FP / (FP + TN) if (FP + TN) != 0 else None
        FNR = FN / (TP + FN) if (TP + FN) != 0 else None

        # Store per-class results
        results["per_class"][class_idx] = {"FPR": FPR, "FNR": FNR}

        # Update global counts
        total_TP += TP
        total_FP += FP
        total_FN += FN
        total_TN += TN

    # Global calculation
    global_FPR = total_FP / \
        (total_FP + total_TN) if (total_FP + total_TN) != 0 else None
    global_FNR = total_FN / \
        (total_FN + total_TP) if (total_FN + total_TP) != 0 else None

    results["global"]["FPR"] = global_FPR
    results["global"]["FNR"] = global_FNR

    return results


def compute_class_weights(targets: pd.Series,
                          classes: np.ndarray,
                          version: str = 'v4',
                          device: torch.device = torch.device('cpu')) -> torch.Tensor:
    """
    Compute class weights for CrossEntropyLoss, selectable by `version`.

    Parameters
    ----------
    targets : pd.Series
        Series of integer class labels.
    classes : ndarray of shape (num_classes,)
        Array of all possible class labels (e.g. np.arange(num_classes)).
    version : str, one of {'v1', 'v2', 'v3', 'v4'}
        Which weighting strategy to use:
          - v1: total/(num_classes*count)  (naïve “balanced”)
          - v2: normalize(1/(count + eps))
          - v3: raw inverse (1/count)
          - v4: sklearn compute_class_weight('balanced')
    device : torch.device
        Where to put the resulting tensor.

    Returns
    -------
    torch.Tensor of shape (num_classes,)
        Float tensor of weights.
    """
    # get counts per class label
    counts = targets.value_counts().to_dict()
    num_classes = len(classes)
    counts_arr = np.zeros(num_classes, dtype=float)
    for lbl, cnt in counts.items():
        counts_arr[int(lbl)] = cnt

    if version == 'v1':
        # v1: total samples divided equally across classes
        total = counts_arr.sum()
        weights_arr = np.zeros_like(counts_arr)
        mask = counts_arr > 0
        weights_arr[mask] = total / (num_classes * counts_arr[mask])
        weight_tensor = torch.tensor(
            weights_arr, dtype=torch.float, device=device)

    elif version == 'v2':
        # v2: normalized inverse-frequency with epsilon
        class_counts = torch.tensor(
            counts_arr, dtype=torch.float, device=device)
        weight_tensor = 1.0 / (class_counts + 1e-6)
        weight_tensor = weight_tensor / weight_tensor.sum()

    elif version == 'v3':
        # v3: raw inverse-frequency
        counts_tensor = torch.tensor(
            counts_arr, dtype=torch.float, device=device)
        # avoid division by zero
        inv = torch.zeros_like(counts_tensor)
        mask = counts_tensor > 0
        inv[mask] = 1.0 / counts_tensor[mask]
        weight_tensor = inv

    elif version == 'v4':
        # v4: sklearn compute_class_weight
        present = np.array(list(counts.keys()), dtype=int)
        w_present = sklearn_class_weight.compute_class_weight(
            class_weight='balanced', classes=present, y=targets.values
        )
        weights_arr = np.zeros_like(counts_arr)
        for cls, w in zip(present, w_present):
            weights_arr[int(cls)] = w
        weight_tensor = torch.tensor(
            weights_arr, dtype=torch.float, device=device)
        
    elif version == 'v5':
        class_counts = torch.tensor(
            counts_arr, dtype=torch.float, device=device)
        weight_tensor = class_counts.sum() / (num_classes * class_counts + 1e-6)
        
    else:
        raise ValueError(
            f"Unknown version '{version}', choose one of {{'v1','v2','v3','v4'}}")

    return weight_tensor


In [None]:
# normalization_layers

class ChannelLayerNorm(nn.Module):
    """LayerNorm over the CHANNEL dimension of a 1D conv output."""

    def __init__(self, num_channels, eps: float = 1e-5, elementwise_affine: bool = True):
        super().__init__()
        # This LN will normalize the last dim (channels after we transpose)
        self.ln = nn.LayerNorm(num_channels, eps=eps,
                               elementwise_affine=elementwise_affine)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x: (B, C, L) → (B, L, C)
        x = x.transpose(1, 2)
        x = self.ln(x)            # norm across C
        return x.transpose(1, 2)  # back to (B, C, L)


class SequenceNorm1d(nn.Module):
    def __init__(
        self,
        dim: int,
        use_batch_norm: bool = False,
        use_layer_norm: bool = False,
        **bn_kwargs,
    ):
        super().__init__()
        if use_batch_norm:
            # BN over the feature dimension: expect (B, D, L)
            self.norm = nn.BatchNorm1d(dim, **bn_kwargs)
            self.is_bn = True
        elif use_layer_norm:
            # LN over the feature dimension: expect (B, L, D)
            self.norm = nn.LayerNorm(dim)
            self.is_bn = False
        else:
            self.norm = None

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x: (B, L, D)
        if self.norm is None:
            return x
        if self.is_bn:
            # shuffle so BN sees (B, D, L)
            x = x.transpose(1, 2)
            x = self.norm(x)
            return x.transpose(1, 2)
        else:
            # LN works in‐place on last dim
            return self.norm(x)


In [None]:
# # CNN_LSTM model
# class CNNLSTM(nn.Module):
#     def __init__(self, model_cfg, num_features, num_classes):
#         super().__init__()

#         self.model_cfg = model_cfg

#         self.cnn_activation = ACTIVATIONS[model_cfg.cnn.activation]
#         self.lstm_activation = ACTIVATIONS[model_cfg.lstm.activation]
#         self.dense_activation = ACTIVATIONS[model_cfg.dense.activation]

#         conv_layers = []

#         in_channels = 1
#         for i, filter in enumerate(model_cfg.cnn.filters):
#             cnn = nn.Conv1d(
#                 in_channels=in_channels,
#                 out_channels=filter,
#                 kernel_size=model_cfg.cnn.kernel_sizes[i],
#                 padding=1
#             )
#             conv_layers.append(cnn)
#             conv_layers.append(self.cnn_activation())
#             if model_cfg.cnn.batch_norm:
#                 conv_layers.append(nn.BatchNorm1d(filter))
#             if model_cfg.cnn.layer_norm:
#                 conv_layers.append(ChannelLayerNorm(filter))
#             if model_cfg.cnn.dropout:
#                 conv_layers.append(nn.Dropout(model_cfg.cnn.dropout_rate))
#             in_channels = filter

#         self.features = nn.Sequential(*conv_layers)

#         self.lstm_layers = nn.ModuleList()
#         self.lstm_activations = nn.ModuleList()
#         self.lstm_normalization = nn.ModuleList()
#         lstm_input_size = filter

#         for hidden_dim in model_cfg.lstm.hidden_size:
#             self.lstm_layers.append(
#                 nn.LSTM(
#                     input_size=lstm_input_size,
#                     hidden_size=hidden_dim,
#                     num_layers=1,
#                     batch_first=True,
#                     dropout=model_cfg.lstm.dropout_rate if model_cfg.lstm.dropout else 0,
#                     bidirectional=False
#                 )
#             )
#             if self.lstm_activation:
#                 self.lstm_activations.append(self.lstm_activation())

#             self.lstm_normalization.append(
#                 SequenceNorm1d(
#                     dim=hidden_dim,
#                     use_batch_norm=model_cfg.lstm.batch_norm,
#                     use_layer_norm=model_cfg.lstm.layer_norm,
#                     # e.g. momentum=0.1, eps=1e-5 if you want custom BN args
#                 )
#             )

#             lstm_input_size = hidden_dim

#         # after LSTM, we'll take the last hidden‐state, so our
#         # `input_dim` for the dense layers is just the last hidden_dim
#         input_dim = model_cfg.lstm.hidden_size[-1]

#         fc_layers = []
#         for hidden_dim in model_cfg.dense.units:
#             fc_layers .append(nn.Linear(input_dim, hidden_dim))
#             fc_layers .append(self.dense_activation())
#             if model_cfg.dense.batch_norm:
#                 fc_layers .append(nn.BatchNorm1d(hidden_dim))
#             if model_cfg.dense.layer_norm:
#                 fc_layers.append(nn.LayerNorm(hidden_dim))
#             if model_cfg.dense.dropout:
#                 fc_layers .append(nn.Dropout(model_cfg.dense.dropout_rate))
#             input_dim = hidden_dim

#         fc_layers .append(nn.Linear(input_dim, num_classes))

#         self.classifier = nn.Sequential(*fc_layers)

#     def forward(self, x):
#         # x = x.view(x.size(0), 1, x.size(-1))
#         # → [batch, L_final, C_last]
#         x = self.features(x)
#         x = x.permute(0, 2, 1)

#         # pass through each LSTM layer
#         for i, lstm in enumerate(self.lstm_layers):
#             x, _ = lstm(x)                    # x: [batch, L, hidden_dim_i]

#             if i < len(self.lstm_activations):
#                 x = self.lstm_activations[i](x)

#             if len(self.lstm_normalization) > i:
#                 x = self.lstm_normalization[i](x)

#         # grab last time step
#         x = x[:, -1, :]                       # → [batch, hidden_dim_last]
#         return self.classifier(x)


In [None]:
class CNNGRU(nn.Module):
    def __init__(self, model_cfg, num_features, num_classes):
        super().__init__()

        self.model_cfg = model_cfg

        self.cnn_activation = ACTIVATIONS[model_cfg.cnn.activation]
        self.gru_activation = ACTIVATIONS[model_cfg.gru.activation]
        self.dense_activation = ACTIVATIONS[model_cfg.dense.activation]

        # Build CNN feature extractor
        conv_layers = []
        in_channels = 1
        for i, filter in enumerate(model_cfg.cnn.filters):
            conv_layers.append(nn.Conv1d(
                in_channels=in_channels,
                out_channels=filter,
                kernel_size=model_cfg.cnn.kernel_sizes[i],
                padding=1
            ))
            conv_layers.append(self.cnn_activation())
            if model_cfg.cnn.batch_norm:
                conv_layers.append(nn.BatchNorm1d(filter))
            if model_cfg.cnn.layer_norm:
                conv_layers.append(ChannelLayerNorm(filter))
            if model_cfg.cnn.dropout:
                conv_layers.append(nn.Dropout(model_cfg.cnn.dropout_rate))
            in_channels = filter

        self.features = nn.Sequential(*conv_layers)

        # Build GRU layers
        self.gru_layers = nn.ModuleList()
        self.gru_activations = nn.ModuleList()
        self.gru_normalization = nn.ModuleList()
        gru_input_size = filter

        for hidden_dim in model_cfg.gru.hidden_size:
            self.gru_layers.append(
                nn.GRU(
                    input_size=gru_input_size,
                    hidden_size=hidden_dim,
                    num_layers=1,
                    batch_first=True,
                    dropout=model_cfg.gru.dropout_rate if model_cfg.gru.dropout else 0,
                    bidirectional=False
                )
            )
            if self.gru_activation:
                self.gru_activations.append(self.gru_activation())

            self.gru_normalization.append(
                SequenceNorm1d(
                    dim=hidden_dim,
                    use_batch_norm=model_cfg.gru.batch_norm,
                    use_layer_norm=model_cfg.gru.layer_norm,
                )
            )
            gru_input_size = hidden_dim

        # Build classifier
        input_dim = model_cfg.gru.hidden_size[-1]
        fc_layers = []
        for hidden_dim in model_cfg.dense.units:
            fc_layers.append(nn.Linear(input_dim, hidden_dim))
            fc_layers.append(self.dense_activation())
            if model_cfg.dense.batch_norm:
                fc_layers.append(nn.BatchNorm1d(hidden_dim))
            if model_cfg.dense.layer_norm:
                fc_layers.append(nn.LayerNorm(hidden_dim))
            if model_cfg.dense.dropout:
                fc_layers.append(nn.Dropout(model_cfg.dense.dropout_rate))
            input_dim = hidden_dim

        fc_layers.append(nn.Linear(input_dim, num_classes))
        self.classifier = nn.Sequential(*fc_layers)

    def forward(self, x):
        # CNN expects input as [batch_size, channels, time_steps]
        x = self.features(x)
        x = x.permute(0, 2, 1)  # [batch, sequence_len, channels] for GRU

        for i, gru in enumerate(self.gru_layers):
            x, _ = gru(x)

            if i < len(self.gru_activations):
                x = self.gru_activations[i](x)

            if len(self.gru_normalization) > i:
                x = self.gru_normalization[i](x)

        x = x[:, -1, :]  # last time step
        return self.classifier(x)


In [None]:
# CNN_LSTM model
class CNNLSTM(nn.Module):
    def __init__(self, model_cfg, num_features, num_classes):
        super().__init__()

        self.model_cfg = model_cfg

        self.cnn_activation = ACTIVATIONS[model_cfg.cnn.activation]
        self.lstm_activation = ACTIVATIONS[model_cfg.lstm.activation]
        self.dense_activation = ACTIVATIONS[model_cfg.dense.activation]

        conv_layers = []

        in_channels = 1
        # first Block
        for i, filter in enumerate(model_cfg.cnn.filters):
            cnn = nn.Conv1d(
                in_channels=in_channels,
                out_channels=filter,
                kernel_size=model_cfg.cnn.kernel_sizes[i],
                padding=1
            )
            conv_layers.append(cnn)
            conv_layers.append(self.cnn_activation())
            if model_cfg.cnn.batch_norm:
                conv_layers.append(nn.BatchNorm1d(filter))
            if model_cfg.cnn.layer_norm:
                conv_layers.append(ChannelLayerNorm(filter))
            if model_cfg.cnn.dropout:
                conv_layers.append(nn.Dropout(model_cfg.cnn.dropout_rate))
            in_channels = filter
        conv_layers.append(nn.MaxPool1d(kernel_size=2))
        # Second Block
        for i, filter in enumerate(model_cfg.cnn.filters):
            cnn = nn.Conv1d(
                in_channels=in_channels,
                out_channels=filter,
                kernel_size=model_cfg.cnn.kernel_sizes[i],
                padding=1
            )
            conv_layers.append(cnn)
            conv_layers.append(self.cnn_activation())
            if model_cfg.cnn.batch_norm:
                conv_layers.append(nn.BatchNorm1d(filter))
            if model_cfg.cnn.layer_norm:
                conv_layers.append(ChannelLayerNorm(filter))
            if model_cfg.cnn.dropout:
                conv_layers.append(nn.Dropout(model_cfg.cnn.dropout_rate))
            in_channels = filter
        conv_layers.append(nn.AvgPool1d(kernel_size=2))
        
        self.features = nn.Sequential(*conv_layers)

        self.lstm_layers = nn.ModuleList()
        self.lstm_activations = nn.ModuleList()
        self.lstm_normalization = nn.ModuleList()
        lstm_input_size = filter

        for hidden_dim in model_cfg.lstm.hidden_size:
            self.lstm_layers.append(
                nn.LSTM(
                    input_size=lstm_input_size,
                    hidden_size=hidden_dim,
                    num_layers=1,
                    batch_first=True,
                    dropout=model_cfg.lstm.dropout_rate if model_cfg.lstm.dropout else 0,
                    bidirectional=False
                )
            )
            if self.lstm_activation:
                self.lstm_activations.append(self.lstm_activation())

            self.lstm_normalization.append(
                SequenceNorm1d(
                    dim=hidden_dim,
                    use_batch_norm=model_cfg.lstm.batch_norm,
                    use_layer_norm=model_cfg.lstm.layer_norm,
                    # e.g. momentum=0.1, eps=1e-5 if you want custom BN args
                )
            )

            lstm_input_size = hidden_dim

        # after LSTM, we'll take the last hidden‐state, so our
        # `input_dim` for the dense layers is just the last hidden_dim
        input_dim = model_cfg.lstm.hidden_size[-1]

        fc_layers = []
        for hidden_dim in model_cfg.dense.units:
            fc_layers .append(nn.Linear(input_dim, hidden_dim))
            fc_layers .append(self.dense_activation())
            if model_cfg.dense.batch_norm:
                fc_layers .append(nn.BatchNorm1d(hidden_dim))
            if model_cfg.dense.layer_norm:
                fc_layers.append(nn.LayerNorm(hidden_dim))
            if model_cfg.dense.dropout:
                fc_layers .append(nn.Dropout(model_cfg.dense.dropout_rate))
            input_dim = hidden_dim

        fc_layers .append(nn.Linear(input_dim, num_classes))

        self.classifier = nn.Sequential(*fc_layers)

    def forward(self, x):
        # x = x.view(x.size(0), 1, x.size(-1))
        # → [batch, L_final, C_last]
        x = self.features(x)
        x = x.permute(0, 2, 1)

        # pass through each LSTM layer
        for i, lstm in enumerate(self.lstm_layers):
            x, _ = lstm(x)                    # x: [batch, L, hidden_dim_i]

            if i < len(self.lstm_activations):
                x = self.lstm_activations[i](x)

            if len(self.lstm_normalization) > i:
                x = self.lstm_normalization[i](x)

        # grab last time step
        x = x[:, -1, :]                       # → [batch, hidden_dim_last]
        return self.classifier(x)


In [None]:
# # CNN model
# class CNN(nn.Module):
#     def __init__(self, model_cfg, num_features, num_classes):
#         super().__init__()

#         if model_cfg.input_layer_norm:
#             self.input_norm = nn.LayerNorm(num_features)
#         else:
#             self.input_norm = nn.Identity()

#         self.cnn_activation = ACTIVATIONS[model_cfg.cnn.activation]
#         self.dense_activation = ACTIVATIONS[model_cfg.dense.activation]

#         conv_layers = []

#         in_channels = 1
#         for i, filter in enumerate(model_cfg.cnn.filters):
#             k = model_cfg.cnn.kernel_sizes[i]
#             cnn = nn.Conv1d(
#                 in_channels=in_channels,
#                 out_channels=filter,
#                 kernel_size=k,
#                 padding=(k - 1) // 2
#             )
#             conv_layers.append(cnn)
#             conv_layers.append(self.cnn_activation())
#             if model_cfg.cnn.batch_norm:
#                 conv_layers.append(nn.BatchNorm1d(filter))
#             if model_cfg.cnn.layer_norm:
#                 conv_layers.append(ChannelLayerNorm(filter))
#             if model_cfg.cnn.dropout:
#                 conv_layers.append(nn.Dropout(model_cfg.cnn.dropout_rate))
#             in_channels = filter

#         self.features = nn.Sequential(*conv_layers)
#         with torch.no_grad():
#             # make a dummy of shape (1, channels, length)
#             dummy = torch.zeros(1, 1, num_features)
#             feat = self.features(dummy)
#             # feat.shape -> [1, C_last, L_final]
#             input_dim = feat.size(1) * feat.size(2)

#         # print(f"==>> input_dim: {input_dim}")

#         # fc_layers = []
#         # for hidden_dim in model_cfg.dense.units:
#         #     fc_layers.append(nn.Linear(input_dim, hidden_dim))
#         #     fc_layers.append(self.dense_activation())
#         #     if model_cfg.dense.batch_norm:
#         #         fc_layers.append(nn.BatchNorm1d(hidden_dim))
#         #     if model_cfg.dense.layer_norm:
#         #         fc_layers.append(nn.LayerNorm(hidden_dim))
#         #     if model_cfg.dense.dropout:
#         #         fc_layers.append(nn.Dropout(model_cfg.dense.dropout_rate))
#         #     input_dim = hidden_dim

#         fc_layers = OrderedDict()
#         idx = 0
#         for i, hidden_dim in enumerate(model_cfg.dense.units, start=1):
#             fc_layers[f"linear{i}"] = nn.Linear(input_dim, hidden_dim)
#             idx += 1

#             fc_layers[f"act{i}"] = self.dense_activation()

#             if model_cfg.dense.batch_norm:
#                 fc_layers[f"bn{i}"] = nn.BatchNorm1d(hidden_dim)
#                 idx += 1

#             if model_cfg.dense.layer_norm:
#                 fc_layers[f"ln{i}"] = nn.LayerNorm(hidden_dim)
#                 idx += 1

#             if model_cfg.dense.dropout:
#                 fc_layers[f"drop{i}"] = nn.Dropout(
#                     model_cfg.dense.dropout_rate)
#                 idx += 1

#             input_dim = hidden_dim
#         # fc_layers .append(nn.Linear(input_dim, num_classes))
#         fc_layers["out"] = nn.Linear(input_dim, num_classes)

#         self.classifier = nn.Sequential(fc_layers)
#         # self.classifier = nn.Sequential(*fc_layers)

#     def forward(self, x):
#         x = self.input_norm(x)
#         # print(f"==>> x.shape: {x.shape}")
#         # x = x.view(x.size(0), 1, x.size(-1))
#         # print(f"==>> x.shape: {x.shape}")
#         x = self.features(x)
#         x = x.flatten(1)
#         return self.classifier(x)


In [None]:
import torch
import torch.nn as nn

class GRUModel(nn.Module):
    def __init__(self, config, input_size, output_size):
        super(GRUModel, self).__init__()
        self.layers = nn.ModuleList()
        self.dropouts = []

        hidden_units = config.gru.hidden_units
        dropout_rates = config.gru.dropout

        prev_size = input_size
        for i in range(len(hidden_units)):
            gru_layer = nn.GRU(
                input_size=prev_size,
                hidden_size=hidden_units[i],
                batch_first=True
            )
            self.layers.append(gru_layer)
            self.dropouts.append(nn.Dropout(dropout_rates[i]))
            prev_size = hidden_units[i]

        self.final_dense = nn.Sequential(
            nn.Linear(prev_size, output_size),
            nn.Sigmoid()
        )

    def __getitem__(self, idx):
        start_idx = idx
        end_idx = start_idx + self.seq_length
    
        if end_idx > len(self.data):
            raise IndexError(f"Sequence from {start_idx} to {end_idx} is out of bounds.")
    
        x_seq = self.data[start_idx:end_idx]
    
        if (end_idx - 1) >= len(self.targets):
            raise IndexError(f"Label index {end_idx - 1} is out of bounds for targets of length {len(self.targets)}.")
    
        y_label = self.targets[end_idx - 1]
    
        if self.transform:
            x_seq = self.transform(x_seq)

        return x_seq, y_label

    def forward(self, x):
        for i, gru in enumerate(self.layers):
            x, _ = gru(x)
            if i < len(self.layers) - 1:
                # Return full sequence for stacking
                x = self.dropouts[i](x)
            else:
                # Last GRU returns only the last hidden state
                x = x[:, -1, :]
                x = self.dropouts[i](x)

        out = self.final_dense(x)
        return out


# class GRUModel(nn.Module):
#     def __init__(self, config, input_size, num_classes):
#         super(GRUModel, self).__init__()
#         self.layers = nn.ModuleList()
#         self.dropouts = []

#         hidden_units = config.gru.hidden_units
#         dropout_rates = config.gru.dropout
#         return_sequences = config.gru.return_sequences  # can be used later if needed

#         prev_size = input_size
#         for i in range(len(hidden_units)):
#             gru_layer = nn.GRU(
#                 input_size=prev_size,
#                 hidden_size=hidden_units[i],
#                 batch_first=True
#             )
#             self.layers.append(gru_layer)
#             self.dropouts.append(nn.Dropout(dropout_rates[i]))
#             prev_size = hidden_units[i]

#         self.final_dense = nn.Sequential(
#             nn.Linear(prev_size, num_classes),
#             nn.Sigmoid()
#         )

#     def forward(self, x):
#         for i, gru in enumerate(self.layers):
#             x, _ = gru(x)
#             if i < len(self.layers) - 1:
#                 # Apply dropout after intermediate GRU layers
#                 x = self.dropouts[i](x)
#             else:
#                 # Take only the last timestep output for the last GRU layer
#                 x = x[:, -1, :]
#                 x = self.dropouts[i](x)

#         out = self.final_dense(x)
#         return out


In [None]:
class CNN(nn.Module):
    def __init__(self, model_cfg, num_features, num_classes):
        super().__init__()

        if model_cfg.input_layer_norm:
            self.input_norm = nn.LayerNorm(num_features)
        else:
            self.input_norm = nn.Identity()

        self.cnn_activation = ACTIVATIONS[model_cfg.cnn.activation]
        self.dense_activation = ACTIVATIONS[model_cfg.dense.activation]

        in_channels = 1
        filters = model_cfg.cnn.filters
        kernel_sizes = model_cfg.cnn.kernel_sizes

        conv_blocks = []

        # Block 1
        conv_blocks.append(nn.Conv1d(in_channels, filters[0], kernel_sizes[0], padding=(kernel_sizes[0]-1)//2))
        conv_blocks.append(self.cnn_activation())
        if model_cfg.cnn.batch_norm:
            conv_blocks.append(nn.BatchNorm1d(filters[0]))
        if model_cfg.cnn.layer_norm:
            conv_blocks.append(ChannelLayerNorm(filters[0]))
        if model_cfg.cnn.dropout:
            conv_blocks.append(nn.Dropout(model_cfg.cnn.dropout_rate))
        conv_blocks.append(nn.MaxPool1d(kernel_size=2))

        # Block 2
        conv_blocks.append(nn.Conv1d(filters[0], filters[1], kernel_sizes[1], padding=(kernel_sizes[1]-1)//2))
        conv_blocks.append(self.cnn_activation())
        if model_cfg.cnn.batch_norm:
            conv_blocks.append(nn.BatchNorm1d(filters[1]))
        if model_cfg.cnn.layer_norm:
            conv_blocks.append(ChannelLayerNorm(filters[1]))
        if model_cfg.cnn.dropout:
            conv_blocks.append(nn.Dropout(model_cfg.cnn.dropout_rate))
        conv_blocks.append(nn.AvgPool1d(kernel_size=2))
        # conv_blocks.append(nn.AdaptiveAvgPool1d(1))
        self.features = nn.Sequential(*conv_blocks)

        with torch.no_grad():
            dummy = torch.zeros(1, 1, num_features)
            feat = self.features(dummy)
            input_dim = feat.size(1) * feat.size(2)

        # Dense Layers
        fc_layers = OrderedDict()
        for i, hidden_dim in enumerate(model_cfg.dense.units, start=1):
            fc_layers[f"linear{i}"] = nn.Linear(input_dim, hidden_dim)
            fc_layers[f"act{i}"] = self.dense_activation()
            if model_cfg.dense.batch_norm:
                fc_layers[f"bn{i}"] = nn.BatchNorm1d(hidden_dim)
            if model_cfg.dense.layer_norm:
                fc_layers[f"ln{i}"] = nn.LayerNorm(hidden_dim)
            if model_cfg.dense.dropout:
                fc_layers[f"drop{i}"] = nn.Dropout(model_cfg.dense.dropout_rate)
            input_dim = hidden_dim

        fc_layers["out"] = nn.Linear(input_dim, num_classes)
        self.classifier = nn.Sequential(fc_layers)

    def forward(self, x):
        x = self.input_norm(x)
        x = self.features(x)
        x = x.flatten(1)
        return self.classifier(x)


In [None]:
# MLP model
class MLP(nn.Module):
    def __init__(self, model_cfg, num_features, num_classes):
        super().__init__()

        if model_cfg.input_layer_norm:
            self.input_norm = nn.LayerNorm(num_features)
        else:
            self.input_norm = nn.Identity()

        self.dense_activation = ACTIVATIONS[model_cfg.dense.activation]

        input_dim = num_features
        layers = OrderedDict()
        idx = 0
        for i, hidden_dim in enumerate(model_cfg.dense.units, start=1):
            layers[f"linear{i}"] = nn.Linear(input_dim, hidden_dim)
            idx += 1

            layers[f"act{i}"] = self.dense_activation()

            if model_cfg.dense.batch_norm:
                layers[f"bn{i}"] = nn.BatchNorm1d(hidden_dim)
                idx += 1

            if model_cfg.dense.layer_norm:
                layers[f"ln{i}"] = nn.LayerNorm(hidden_dim)
                idx += 1

            if model_cfg.dense.dropout:
                layers[f"drop{i}"] = nn.Dropout(
                    model_cfg.dense.dropout_rate)
                idx += 1

            input_dim = hidden_dim
        # fc_layers .append(nn.Linear(input_dim, num_classes))
        layers["out"] = nn.Linear(input_dim, num_classes)

        self.network = nn.Sequential(layers)

    def forward(self, x):
        x = self.input_norm(x)
        return self.network(x)


In [None]:
# loss_functions
class FocalLoss(nn.Module):
    """
    Focal Loss for multi‐class classification.

    Args:
        alpha (float): balance factor, multiplies the focal term.
        gamma (float): focusing parameter; higher ⇒ more focus on hard examples.
        reduction (str): 'none' | 'mean' | 'sum'
    """

    def __init__(self, alpha: float = 1.0, gamma: float = 2.0, reduction: str = 'mean'):
        super().__init__()
        if reduction not in ('none', 'mean', 'sum'):
            raise ValueError(f"Invalid reduction mode: {reduction}")
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
        """
        logits: shape (batch_size, num_classes)
        targets: shape (batch_size,) with class indices
        """
        # per‐sample cross‐entropy without reduction
        ce_loss = F.cross_entropy(logits, targets, reduction='none')
        # pt = probability assigned to the true class
        pt = torch.exp(-ce_loss)
        # focal scaling
        loss = self.alpha * (1 - pt) ** self.gamma * ce_loss

        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:  # 'none'
            return loss


In [None]:
# LitClassifier
from sklearn.preprocessing import label_binarize

class LitClassifier(pl.LightningModule):
    def __init__(self, model, model_name, training_cfg, labels_mapping, weight_tensor=None, using_wandb=False):

        super().__init__()
        # self.save_hyperparameters()

        self.model = model
        self.model_name = model_name
        self.learning_rate = training_cfg.learning_rate
        self.weight_decay = training_cfg.weight_decay
        self.labels = list(labels_mapping.values())
        self.labels_mapping = labels_mapping
        self.using_wandb = using_wandb
        self.multi_class = training_cfg.multi_class
        self.batch_size = training_cfg.batch_size

        if training_cfg.optimizer == "adam":
            self.optimizer = Adam
        elif training_cfg.optimizer == "sgd":
            self.optimizer = SGD

        if training_cfg.loss_type == "focal":
            self.criterion = FocalLoss(alpha=training_cfg.focal_loss_alpha,
                                       gamma=training_cfg.focal_loss_gamma, reduction=training_cfg.focal_loss_reduction)
        elif training_cfg.loss_type == "cross_entropy":
            self.criterion = nn.CrossEntropyLoss(weight=weight_tensor)

        self.train_epoch_metrics = {}
        self.val_epoch_metrics = {}
        self.train_outputs = {"preds": [], "targets": []}
        self.val_outputs = {"preds": [], "targets": []}
        self.test_outputs = {"preds": [], "targets": [],'logits':[]}

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        pred = self(x)
        loss = self.criterion(pred, y)
        pred = pred.argmax(dim=1)
        acc = (pred == y).float().mean() * 100.0
        self.log('train_loss', loss, on_epoch=True,
                 prog_bar=True, batch_size=self.batch_size)
        self.log('train_acc', acc, on_epoch=True,
                 prog_bar=True, batch_size=self.batch_size)

        self.train_outputs["preds"].append(pred)
        self.train_outputs["targets"].append(y)

        return loss

    def on_train_epoch_end(self):
        all_preds = torch.cat(self.train_outputs["preds"]).detach().cpu().numpy()
        all_targets = torch.cat(
            self.train_outputs["targets"]).detach().cpu().numpy()
        weighted_f1 = f1_score(all_targets, all_preds,
                               average="weighted") * 100.0
        self.log("train_f1_score", weighted_f1, on_epoch=True,
                 prog_bar=True, batch_size=self.batch_size)

        print(f"[INFO] Finished Epoch {self.current_epoch + 1}:")
        self.train_outputs = {"preds": [], "targets": []}

    def validation_step(self, batch, batch_idx):
        x, y = batch
        pred = self(x)
        loss = self.criterion(pred, y)
        pred = pred.argmax(dim=1)
        acc = (pred == y).float().mean() * 100.0
        self.log('val_loss', loss, on_epoch=True,
                 prog_bar=True, batch_size=self.batch_size)
        self.log('val_acc', acc, on_epoch=True,
                 prog_bar=True, batch_size=self.batch_size)

        self.val_outputs["preds"].append(pred)
        self.val_outputs["targets"].append(y)

        return {"val_loss": loss, "val_acc": acc}

    def on_validation_epoch_end(self):
        all_preds = torch.cat(self.val_outputs["preds"]).detach().cpu().numpy()
        all_targets = torch.cat(
            self.val_outputs["targets"]).detach().cpu().numpy()
        weighted_f1 = f1_score(all_targets, all_preds,
                               average="weighted") * 100.0
        self.log("val_f1_score", weighted_f1, on_epoch=True,
                 prog_bar=True, batch_size=self.batch_size)

        # report = classification_report(
        #     all_targets, all_preds, digits=4, output_dict=False, zero_division=0)

        # print("Validation Classification Report:\n", report)

        # if self.using_wandb:
        #     class_report = classification_report(all_targets, all_preds,
        #                                          digits=4,
        #                                          output_dict=True,
        #                                          zero_division=0)
        #     report_df = pd.DataFrame(class_report).T.reset_index()
        #     report_df = report_df.rename(columns={"index": "class"})

        #     table = wandb.Table(dataframe=report_df)
        #     wandb.log({f"classification_report_{self.server_round}": table})
            
        self.val_outputs = {"preds": [], "targets": []}

    def test_step(self, batch, batch_idx):
        x, y = batch
        pred = self(x)
        logits=self(x)
        loss = self.criterion(pred, y)
        pred = pred.argmax(dim=1)
        acc = (pred == y).float().mean() * 100.0

        self.log("test_loss", loss, on_epoch=True,
                 prog_bar=True, batch_size=self.batch_size)
        self.log("test_acc", acc, on_epoch=True,
                 prog_bar=True, batch_size=self.batch_size)
        self.test_outputs['logits'].append(logits)
        self.test_outputs["preds"].append(pred)
        self.test_outputs["targets"].append(y)
        return {"test_loss": loss, "test_acc": acc, "preds": pred, "targets": y}

    def on_test_epoch_end(self):
        all_preds = torch.cat(self.test_outputs["preds"]).detach().cpu().numpy()
        all_logits = torch.cat(self.test_outputs["logits"]).detach().cpu().numpy()

        all_targets = torch.cat(
            self.test_outputs["targets"]).detach().cpu().numpy()
        self.test_outputs = {"preds": [], "targets": [],"logits":[]}
        weighted_f1 = f1_score(all_targets, all_preds,
                               average="weighted") * 100.0
        self.log("test_f1", weighted_f1, on_epoch=True,
                 prog_bar=True, batch_size=self.batch_size)
        
        # ===== AUC calculation =====
        try:
            # Binarize the labels for multiclass AUC
            y_true_bin = label_binarize(all_targets, classes=list(range(len(self.labels))))
            all_probs = torch.softmax(torch.tensor(all_logits), dim=1).numpy()
            auc_score = roc_auc_score(y_true_bin, all_probs, multi_class="ovr", average="weighted")
            self.log("test_auc", auc_score * 100.0, on_epoch=True, prog_bar=True, batch_size=self.batch_size)
        except ValueError as e:
            print(f"[WARN] Could not compute AUC: {e}")
            auc_score = None
        # ===========================
        
        all_targets = np.vectorize(self.labels_mapping.get)(all_targets)
        all_preds = np.vectorize(self.labels_mapping.get)(all_preds)

        cm = confusion_matrix(all_targets, all_preds, labels=self.labels)

        cr = classification_report(
            all_targets, all_preds, digits=4, output_dict=True, zero_division=0)
        report = classification_report(
            all_targets, all_preds, digits=4, output_dict=False, zero_division=0)
        weighted_f1 = f1_score(all_targets, all_preds,
                               average="weighted") * 100

        results_fpr_fnr = calculate_fpr_fnr_with_global(cm)
        fpr = results_fpr_fnr["global"]["FPR"]
        fnr = results_fpr_fnr["global"]["FNR"]

        results = {
            "test_weighted_f1": weighted_f1,
            "test_auc": auc_score * 100 if auc_score is not None else None,
            "test_fpr": fpr,
            "test_fnr": fnr,
            "classification_report": cr,
            "results_fpr_fnr": results_fpr_fnr
        }
        
        os.makedirs("temp", exist_ok=True)
        json_path = os.path.join("temp", f"{self.model_name}_results.json")
        with open(json_path, "w") as f:
            json.dump(results, f, indent=4, cls=NumpyEncoder)

        if self.using_wandb:
            wandb.save(json_path)

        print("=== Test Evaluation Metrics ===")
        print("Classification Report:\n", report)

        cm_normalized = confusion_matrix(
            all_targets, all_preds, labels=self.labels, normalize="true")
        fig = plot_confusion_matrix(cm=cm,
                                    normalized=False,
                                    target_names=self.labels,
                                    title=f"Confusion Matrix of {self.model_name}",
                                    file_path=None,
                                    show_figure=False)

        if self.using_wandb:
            wandb.log({f"confusion_matrix_{self.model_name}": wandb.Image(
                fig), "epoch": self.current_epoch})
        fig = plot_confusion_matrix(cm=cm_normalized,
                                    normalized=True,
                                    target_names=self.labels,
                                    title=f"Confusion Matrix of {self.model_name}",
                                    file_path=None,
                                    show_figure=False)

        if self.using_wandb:
            wandb.log({f"confusion_matrix_{self.model_name}_normalized": wandb.Image(
                fig), "epoch": self.current_epoch})

    def configure_optimizers(self):
        optimizer = self.optimizer(
            self.model.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay)
        return optimizer



In [None]:
# init_model
def init_model(training_cfg, model_cfg, num_features, labels_mapping, weight_tensor, using_wandb):

    num_classes = 2
    if training_cfg.multi_class:
        num_classes = len(labels_mapping)

    if model_cfg.model.type == "mlp":
        model = MLP(model_cfg, num_features, num_classes)
    elif model_cfg.model.type == "cnn":
        model = CNN(model_cfg, num_features, num_classes)
    elif model_cfg.model.type == "cnn_lstm":
        model = CNNLSTM(model_cfg, num_features, num_classes)
    elif model_cfg.model.type == "cnn_gru":
        model = CNNGRU(model_cfg, num_features, num_classes)
    elif model_cfg.model.type == "gru":
        model = GRUModel(model_cfg, num_features, num_classes)

    # print(f"===> last layer: {model.network[-1]}")
    # print(f"===> last layer result: {torch.argmax(model(torch.randn(2, num_features)), dim=1)}")

    return LitClassifier(model, model_cfg.model.type, training_cfg, labels_mapping, weight_tensor, using_wandb)


# Prepare dataframes and dataloaders

In [None]:
# read and prepare dataframes

def load_df(file_path, raw_type):
    if raw_type == "parquet":
        return pd.read_parquet(file_path)
    elif raw_type == "csv":
        return pd.read_csv(file_path)

def create_processed_df(processed_dir, base_cfg):
    os.makedirs(processed_dir, exist_ok=True)
    
    dataset = base_cfg.dataset
    dp = base_cfg.dataset_properties
    
    df = load_df(os.path.join(original_datasets_files_path, dataset.raw), dataset.raw_type)
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(axis=0, how='any', inplace=True)
    df.drop_duplicates(subset=list(set(
        df.columns) - set([dp.timestamp_col, dp.flow_id_col])), keep="first", inplace=True)
    
    classes = df[dp.class_col].unique()
    
    label_encoder = LabelEncoder()
    label_encoder.fit(list(classes))
    labels_names = dict(zip(label_encoder.transform(
        label_encoder.classes_), label_encoder.classes_))
    labels_names = {int(k): str(v) for k, v in labels_names.items()}
    
    
    with open(os.path.join(processed_dir, "labels_names.pkl"), 'wb') as f:
            pickle.dump([labels_names, classes], f)
        
    df[dp.class_num_col] = label_encoder.transform(df[dp.class_col])
    df = add_centralities(
        df= df,
        new_path=new_path,
        graph_path=graph_path,
        dataset=DATASET,
        cn_measures=cn_measures,
        network_features=network_features
    )
    
    train_df, test_df = train_test_split(
                df, test_size=dataset.test_size, random_state=base_cfg.random_seed, stratify=df[dp.class_num_col])
    
    train_df.to_parquet(os.path.join(processed_dir, "train.parquet"))
    test_df.to_parquet(os.path.join(processed_dir, "test.parquet"))
    return train_df, test_df, labels_names

In [None]:
# load_processed_df 
def load_processed_df(base_cfg):
    processed_dir = os.path.join(main_processed_dir, base_cfg.dataset.name)
    if os.path.exists(processed_dir) and os.path.exists(os.path.join(processed_dir, "test.parquet")):
        print("loading existing files")
        train_df = pd.read_parquet(os.path.join(processed_dir, "train.parquet"))
        test_df = pd.read_parquet(os.path.join(processed_dir, "test.parquet"))

        with open(os.path.join(processed_dir, "labels_names.pkl"), "rb") as f:
            labels_names = pickle.load(f)
        labels_mapping = labels_names[0]

        return train_df, test_df, labels_mapping
    else:
        print("creating files")
        return create_processed_df(processed_dir, base_cfg)


def load_dfs(base_cfg):

    dp = base_cfg.dataset_properties
    train_df, test_df, labels_mapping = load_processed_df(base_cfg)

    if base_cfg.training.multi_class:
        label_col = dp.class_num_col
    else:
        label_col = dp.label_col

    train_labels = train_df[label_col]
    train_df.drop(columns=dp.drop_columns + [dp.class_col, dp.class_num_col, dp.label_col] + dp.weak_columns,
                   inplace=True, errors='ignore')
    
    test_labels = test_df[label_col]
    test_df.drop(columns=dp.drop_columns + [dp.class_col, dp.class_num_col, dp.label_col] + dp.weak_columns,
                   inplace=True, errors='ignore')

    input_dim = train_df.shape[1]

    return (
        train_df,
        train_labels,
        test_df,
        test_labels,
        input_dim,
        labels_mapping
    )


In [None]:
# LightningDataModule

class FnnDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


class CnnDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x_sample = self.X[idx].unsqueeze(0)  # add channel dimension
        return x_sample, self.y[idx]


class SlidingWindowDataset(Dataset):
    def __init__(self, data, targets, seq_length=10, stride=1, transform=None):
        super().__init__()
        self.data = data
        self.targets = targets
        self.seq_length = seq_length
        self.stride = stride
        self.transform = transform
        self.num_sequences = (len(self.data) - seq_length) // stride + 1
        assert self.num_sequences > 0, "Not enough data to form even one sequence!"

    def __len__(self):
        return self.num_sequences

    def __getitem__(self, idx):
        start_idx = idx * self.stride
        end_idx = start_idx + self.seq_length
        # shape: (seq_length, num_features)
        x_seq = self.data[start_idx:end_idx]
        y_label = self.targets[end_idx - 1]
        if self.transform:
            x_seq = self.transform(x_seq)
        # Convert to tensors
        x_seq = torch.as_tensor(x_seq, dtype=torch.float32)
        y_label = torch.as_tensor(y_label, dtype=torch.long)
        return x_seq, y_label


class LitDataModule(pl.LightningDataModule):
    def __init__(
        self,
        X_train,
        y_train,
        X_val,
        y_val,
        X_test,
        y_test,
        dataset,
        model_type,
        batch_size=128,
        oversample=False,
        **dataset_kwargs
    ):

        super().__init__()
        
        self.X_train = X_train
        self.y_train = y_train
        self.X_val = X_val
        self.y_val = y_val
        self.X_test = X_test
        self.y_test = y_test
        
        self.batch_size = batch_size
        self.oversample = oversample
        self.dataset = dataset
        self.dataset_kwargs = dataset_kwargs
        self.model_type = model_type
        self.num_features = 0
        
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.criterion(y_hat, y)
    
        self.log("test_loss", loss, prog_bar=True, on_epoch=True, logger=True)
        return {"test_loss": loss}

    def setup(self, stage=None):

        scaler = StandardScaler()

        self.X_train = scaler.fit_transform(self.X_train)
        self.X_val = scaler.transform(self.X_val)
        self.X_test = scaler.transform(self.X_test)

        classes = np.unique(self.y_train)
        weights = sklearn_class_weight.compute_class_weight(
            'balanced', classes=classes, y=self.y_train)


        
        # self.class_weights = torch.FloatTensor(weights)

    def _get_dataloader(self, split, sampler=None):
        if split == "train":
            X, y = self.X_train, self.y_train
            shuffle = True
        elif split == "val":
            X, y = self.X_val, self.y_val
            shuffle = False
        elif split == "test":
            X, y = self.X_test, self.y_test
            shuffle = False
        else:
            raise ValueError(f"Unknown split: {split}")

        if self.model_type == "mlp":
            dataset = FnnDataset(X, y)
        elif self.model_type in ["cnn", "cnn_lstm","cnn_gru"]:
            dataset = CnnDataset(X, y)
        elif self.model_type in ["lstm", "gru"]:
            dataset = SlidingWindowDataset(
                X, y, seq_length=self.dataset_kwargs["sequence_length"], stride=self.dataset_kwargs["stride"])

        if sampler:
            return DataLoader(dataset, batch_size=self.batch_size, sampler=sampler)
        else:
            return DataLoader(dataset, batch_size=self.batch_size, shuffle=shuffle, num_workers=0, drop_last=False)

    def train_dataloader(self):
        if self.oversample:
            # compute sample weights based on class frequency
            class_counts = np.bincount(self.y_train)
            inv_freq = 1.0 / class_counts
            alpha    = 0.5
            class_weights = inv_freq ** alpha
            
            sample_weights = class_weights[self.y_train]
            sampler = WeightedRandomSampler(
                weights=sample_weights,
                num_samples=len(sample_weights),
                replacement=True,
            )

        else:
            sampler = None
            # return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=0)


        # for x,y in self._get_dataloader("train"):
        #     print(f"==>> torch.unique(y, return_counts=True): {torch.unique(y, return_counts=True)}")
        #     break
        return self._get_dataloader("train", sampler)

    def val_dataloader(self):
        return self._get_dataloader("val")

    def test_dataloader(self):
        return self._get_dataloader("test")


# Main Code

In [None]:
import pandas as pd
import time
import psutil
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix, log_loss

def main(base_cfg, models):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # models_cfg_mapping = {model: all_models_cfgs[model] for model in models}

    models_cfg_mapping = {model: all_models_cfgs[model] for model in models}
    logging_cfg = base_cfg.logging[base_cfg.logging.selected_type]
    
    os.makedirs(
        base_cfg.logging[base_cfg.logging.selected_type].save_dir, exist_ok=True)

    # loading clients data
    
    X_tr, y_tr, X_test, y_test, input_dim, labels_mapping = load_dfs(base_cfg)
    # print(f"==>> X_tr.columns: {X_tr.columns}")
    # print(f"==>> X_test.columns: {X_test.columns}")

    X_train, X_val, y_train, y_val = train_test_split(X_tr, y_tr, test_size=base_cfg.dataset_properties.val_size)
    
    run_dtime = time.strftime("%Y%m%d-%H%M%S")


    for model_name in models:

        config = {
            "model_name": model_name,
            "input_dim": input_dim,
            "run_dtime": run_dtime,
            "input_layer_norm": models_cfg_mapping[model_name].input_layer_norm
        }

        for attribute_name, attribute_value in base_cfg.training.items():
            config[attribute_name] = attribute_value

        for layer in models_cfg_mapping[model_name].layers:
            model_config = models_cfg_mapping[model_name][layer]
            for attribute_name, attribute_value in model_config.items():
                config[f"{model_name}_{layer}_{attribute_name}"] = attribute_value
                
        dataset_kwargs = dict(
            sequence_length=base_cfg.training.sequence_length,
            stride=base_cfg.training.stride,
            using_masking=base_cfg.training.using_masking,
            masked_class=base_cfg.training.masked_class,
            num_workers=3,
            device='cuda' if torch.cuda.is_available() else "cpu"
        )
    
        data_module = LitDataModule(
            X_train=np.array(X_train),
            y_train=np.array(y_train),
            X_val=np.array(X_val),
            y_val=np.array(y_val),
            X_test=np.array(X_test),
            y_test=np.array(y_test),
            # X_train, y_train, X_val, y_val, X_test, y_test,
            dataset=base_cfg.dataset,
            batch_size=base_cfg.training.batch_size,
            multi_class=base_cfg.training.multi_class,
            model_type = model_name,
            oversample = base_cfg.training.oversample,
            **dataset_kwargs)
        data_module.setup()

        
        if base_cfg.training.use_weighted_loss:
            weight_tensor = compute_class_weights(np.array(y_train), np.array(list(labels_mapping.keys())), version=base_cfg.training.weighted_loss_version, device=device)
        else:
            weight_tensor = None
            
        pl_model = init_model(base_cfg.training, models_cfg_mapping[model_name], input_dim, labels_mapping,
                           weight_tensor, base_cfg.logging.selected_type == None)        

        
        if base_cfg.logging.selected_type == "wandb":
            logger = WandbLogger(
                project=logging_cfg.project,
                name=model_name,
                config=config,
                save_dir=f"{logging_cfg.save_dir}/{model_name}",
                # settings=wandb.Settings(quiet=True)
            )

        else:
            logger = TensorBoardLogger(
                f"{logging_cfg.save_dir}/{time.strftime('%Y%m%d-%H%M%S')}/{model_name}")

        trainer = pl.Trainer(
            max_epochs=base_cfg.training.max_epochs,
            num_sanity_val_steps=0,
            log_every_n_steps=0,
            # enable_progress_bar=False,
            enable_checkpointing=False,
            logger=logger,
        )
        # 🕒 Start timing & memory tracking
        start_time = time.time()
        process = psutil.Process(os.getpid())
        mem_before = process.memory_info().rss / 1024 / 1024  # MB
        
        trainer.fit(pl_model, datamodule=data_module)
        print(f"[INFO] Finished training model: {model_name}")
        results=trainer.test(pl_model, datamodule=data_module)
        # results = trainer.test(pl_model, datamodule=data_module)
        test_loss = results[0].get("test_loss", None)  # extract test_loss
        print(f"[INFO] Test Loss for {model_name}: {test_loss}")

        # 🕒 End timing & memory tracking
        elapsed_time = time.time() - start_time
        mem_after = process.memory_info().rss / 1024 / 1024
        mem_used = mem_after - mem_before

        metrics = {
            "Model": model_name,
            "Time_Seconds": elapsed_time,
            "Memory_MB": mem_used,
            "test_loss":test_loss
        }

        print(metrics)
        
        # Save metrics to JSON
        json_path = f"/kaggle/working/temp/{model_name}_metrics.json"
        with open(json_path, "w") as f:
            json.dump(metrics, f, indent=4) 

        if base_cfg.logging.selected_type == "wandb":
            wandb.finish()
        # if base_cfg.logging.selected_type == "wandb":
        #     wandb.finish()
        

In [None]:
main(base_cfg, models)

In [None]:
# file_path = "/kaggle/working/temp/cnn_results.json"
# if os.path.exists(file_path):
#     os.remove(file_path)
#     print(f"Deleted: {file_path}")
# else:
#     print("File does not exist:", file_path)
# model_name="cnn"
# elapsed_time="100"
# mem_used="100"
# test_loss="100"
# metrics = {
#     "Model": model_name,
#     "Time_Seconds": elapsed_time,
#     "Memory_MB": mem_used,
#     "test_loss":test_loss
# }

# print(metrics)

# # Save metrics to JSON
# json_path = f"/kaggle/working/temp/{model_name}_metrics.json"
# with open(json_path, "w") as f:
#     json.dump(metrics, f, indent=4) 

In [None]:
import os
import json
import pandas as pd
labels_map={}
# Your label mapping (from the print output you shared)
if dataset_name=="cic_ton_iot":
    labels_map = {
        "0": "Benign",
        "1": "backdoor",
        "2": "ddos",
        "3": "dos",
        "4": "injection",
        "5": "mitm",
        "6": "password",
        "7": "ransomware",
        "8": "scanning",
        "9": "xss"
    }
else:
    labels_map = {
        "0": "BENIGN",
        "1": "Bot",
        "2": "DDoS",
        "3": "DoS GoldenEye",
        "4": "DoS Hulk",
        "5": "DoS Slowhttptest",
        "6": "DoS slowloris",
        "7": "FTP-Patator",
        "8": "Heartbleed",
        "9": "Infiltration",
        "10": "PortScan",
        "11": "SSH-Patator",
        "12": "Web Attack \ufffd Brute Force",
        "13": "Web Attack \ufffd Sql Injection",
        "14": "Web Attack \ufffd XSS"
    }
def json_to_csv_per_file(json_folder, output_folder, dataset_name=dataset_name):
    os.makedirs(output_folder, exist_ok=True)
    rows_all=[]
    rows_m_avg=[]
    training_time=0
    training_mem_used=0
    for filename in os.listdir(json_folder):
        # print(filename)
        if filename.endswith("results.json"):
            # continue
            filepath = os.path.join(json_folder, filename)
            with open(filepath, "r") as f:
                data = json.load(f)
            no_ext = filename.replace("_results.json", "")
            # Remove the trailing "_metrics"
            # print(no_ext)
            model_name=no_ext
            metric_path = os.path.join(json_folder,f"{no_ext}_metrics.json")
            # print(metric_path)
            filepath = os.path.join(json_folder, filename)
            rows = []
            # --- Per-class metrics ---
            classification_report=data["classification_report"]
            global_value=classification_report["weighted avg"]
            macro_value=classification_report["macro avg"]
            t_fnr=0
            t_fpr=0
            t=0
            m_avg_fnr=0
            m_avg_fpr=0
            # print(classification_report)
            # print("macro_value:",macro_value)
            if "results_fpr_fnr" in data and "per_class" in data["results_fpr_fnr"]:
                # print(data["results_fpr_fnr"])
                for cls, metrics in data["results_fpr_fnr"]["per_class"].items():
                    attack=labels_map.get(cls,cls)
                    # print(f"{attack}")
                    # print(metrics)
                    metrics_d=classification_report[attack]
                    row = {
                        "ATTACK TYPE": attack,
                        "precision": metrics_d.get("precision"),
                        "recall": metrics_d.get("recall"),
                        "f1_score": metrics_d.get("f1-score"),
                        "support": metrics_d.get("support"),
                        "FPR": metrics.get("FPR"),
                        "FNR": metrics.get("FNR"),
                    }
                    t_fnr+=row["FNR"]
                    t_fpr+=row["FPR"]
                    t+=1
                    # print()
                    # print(metrics_d.get("precision"))
                    # print()
                    rows.append(row)
                m_avg_fnr+=t_fnr/t
                m_avg_fpr+=t_fpr/t
                # print(m_avg_fpr)
            df_classes = pd.DataFrame(rows)
            with open(metric_path, "r") as f:
                device_data = json.load(f)
            # print(device_data)
            MEMORY_USED=device_data["Memory_MB"]
            TIME_CONSUMED=device_data["Time_Seconds"]
            test_loss=device_data["test_loss"]
            Model=device_data["Model"]
            # print("Model:", Model)
            # print("Memory:", MEMORY_USED)
            # print("Time:", TIME_CONSUMED)
            # print("test Loss:",test_loss)
            # --- Global / test metrics ---
            global_metrics = {
                "test_accuracy":classification_report["accuracy"],
                "test_weighted_f1": global_value["f1-score"],
                "test_loss":test_loss,
                "recall":global_value["recall"],
                "precision":global_value["precision"],
                "test_fpr": data.get("test_fpr"),
                "test_fnr": data.get("test_fnr"),
                "test_auc": data.get("test_auc"),
                "global_FPR": data["results_fpr_fnr"]["global"].get("FPR") if "results_fpr_fnr" in data and "global" in data["results_fpr_fnr"] else None,
                "global_FNR": data["results_fpr_fnr"]["global"].get("FNR") if "results_fpr_fnr" in data and "global" in data["results_fpr_fnr"] else None,
                "time_seconds":TIME_CONSUMED,
                "memory_mb":MEMORY_USED
            }
            rows_all.append({
                "model_name":Model,
                "test_accuracy":classification_report["accuracy"],
                "test_weighted_f1": global_value["f1-score"],
                "test_loss":test_loss,
                "recall":global_value["recall"],
                "precision":global_value["precision"],
                "test_fpr": data.get("test_fpr"),
                "test_fnr": data.get("test_fnr"),
                "test_auc": data.get("test_auc"),
                "global_FPR": data["results_fpr_fnr"]["global"].get("FPR") if "results_fpr_fnr" in data and "global" in data["results_fpr_fnr"] else None,
                "global_FNR": data["results_fpr_fnr"]["global"].get("FNR") if "results_fpr_fnr" in data and "global" in data["results_fpr_fnr"] else None,
                "time_seconds":TIME_CONSUMED,
                "memory_mb":MEMORY_USED
            }
            )
            # Macro_Avg
            rows_m_avg.append({
                "model": Model,
                "F1": macro_value["f1-score"],
                "Precision": macro_value["precision"],
                "Recall": macro_value["recall"],
                "Support": macro_value["support"],
                "FNR": m_avg_fnr,
                "FPR": m_avg_fpr,
            })
            # print("rows_m_avg:",rows_m_avg)
            df_global = pd.DataFrame([global_metrics])

            # --- Save CSV ---
            csv_output = os.path.join(output_folder, filename.replace(".json", ".csv"))
            with open(csv_output, "w") as f:
                f.write("### Per-class Metrics\n")
            df_classes.to_csv(csv_output, mode="a", index=False)
            with open(csv_output, "a") as f:
                f.write("\n### Global Metrics\n")
            df_global.to_csv(csv_output, mode="a", index=False)

            print(f"✅ CSV saved as {csv_output}")
        # print(rows_all)
    df = pd.DataFrame(rows_all)
    temp_dir = "/kaggle/working/temp/csv_results/"
    
    # Create folder if it doesn't exist
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)
    
    # Save CSV inside temp/
    if dataset_name=="cic_ton_iot":
        csv_path = os.path.join(temp_dir, "ML_BENCHMARKING_CIC_TON_IOT.csv")
    else:
        csv_path = os.path.join(temp_dir, "ML_BENCHMARKING_CIC_IDS_2017.csv")
    df.to_csv(csv_path, index=False)
    print(f"✅ CSV saved as {csv_path}")
    
    df = pd.DataFrame(rows_m_avg)
    temp_dir = "/kaggle/working/temp/csv_results/"
    
    # Create folder if it doesn't exist
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)
    
    # Save CSV inside temp/
    if dataset_name=="cic_ton_iot":
        csv_path = os.path.join(temp_dir, "macro_results_CIC_TON_IOT.csv")
    else:
        csv_path = os.path.join(temp_dir, "macro_results_CIC_IDS_2017.csv")
    df.to_csv(csv_path, index=False)
    print(f"✅ CSV saved as {csv_path}")

json_folder = "/kaggle/working/temp/"
output_folder = "/kaggle/working/temp/csv_results"
json_to_csv_per_file(json_folder, output_folder,dataset_name=dataset_name)


In [None]:
print(dataset_name)

In [None]:
# List all installed packages with versions
!pip list


In [None]:
# # Save full environment snapshot
# !pip list --format=freeze > /kaggle/working/environment_snapshot.txt
# print("✅ Environment snapshot saved to /kaggle/working/environment_snapshot.txt")
