In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset,TensorDataset, WeightedRandomSampler, RandomSampler
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
import wandb
from omegaconf import OmegaConf
import os
import warnings
from kaggle_secrets import UserSecretsClient
import time
import psutil
from sklearn.metrics import roc_auc_score, precision_score, recall_score
import networkx as nx
import igraph as ig
import json
# change 1.02
# import /kaggle/input/centrality_network/pytorch/default/1 as network
import sys
sys.path.append("/kaggle/input/centrality_network/pytorch/default/1")

from network_features import separate_graph, cal_betweenness_centrality, cal_k_core, cal_k_truss
from CommCentralityCode import comm_centreality
from modularity_vitality import modularity_vitality

sys.path.append("/kaggle/input/githubrepofiles/pytorch/default/1")
from src.dataset.dataset_info import datasets


In [2]:
# name = "cic_ton_iot_5_percent"
name = "cic_ton_iot"
# name = "cic_ids_2017_5_percent"
# name = "cic_ids_2017"
NO_NODE_FEATURE=True # False: centrality added  True: centality not used

dataset = datasets[name]

In [3]:

# change 1.04
def add_centralities(df, new_path, graph_path, dataset, cn_measures, network_features):
# def add_centralities(df, new_path, graph_path, dataset, cn_measures):
        # change 1.05
    # G = nx.from_pandas_edgelist(df, source="Src IP",target="Dst IP", create_using=nx.DiGraph())
    if NO_NODE_FEATURE:
        print("NO node features added")
        return df
    G = nx.from_pandas_edgelist( df, source=dataset.src_ip_col, target=dataset.dst_ip_col, create_using=nx.DiGraph())
    G.remove_nodes_from(list(nx.isolates(G)))
    for node in G.nodes():
        G.nodes[node]['label'] = node

    G1 = ig.Graph.from_networkx(G)
    labels = [G.nodes[node]['label'] for node in G.nodes()]
    G1.vs['label'] = labels

    part = G1.community_infomap()
    communities = []
    for com in part:
        communities.append([G1.vs[node_index]['label'] for node_index in com])

    community_labels = {}
    for i, community in enumerate(communities):
        for node in community:
            community_labels[node] = i

    nx.set_node_attributes(G, community_labels, "new_community")

    intra_graph, inter_graph = separate_graph(G, communities)

    if "betweenness" in cn_measures:
        nx.set_node_attributes(G, cal_betweenness_centrality(G), "betweenness")
        print("calculated betweenness")
    if "local_betweenness" in cn_measures:
        nx.set_node_attributes(G, cal_betweenness_centrality(
            intra_graph), "local_betweenness")
        print("calculated local_betweenness")
    if "global_betweenness" in cn_measures:
        nx.set_node_attributes(G, cal_betweenness_centrality(
            inter_graph), "global_betweenness")
        print("calculated global_betweenness")
    if "degree" in cn_measures:
        nx.set_node_attributes(G, nx.degree_centrality(G), "degree")
        print("calculated degree")
    if "local_degree" in cn_measures:
        nx.set_node_attributes(
            G, nx.degree_centrality(intra_graph), "local_degree")
        print("calculated local_degree")
    if "global_degree" in cn_measures:
        nx.set_node_attributes(G, nx.degree_centrality(
            inter_graph), "global_degree")
        print("calculated global_degree")
    if "eigenvector" in cn_measures:
        nx.set_node_attributes(G, nx.eigenvector_centrality(
            G, max_iter=600), "eigenvector")
        print("calculated eigenvector")
    if "local_eigenvector" in cn_measures:
        nx.set_node_attributes(G, nx.eigenvector_centrality(
            intra_graph), "local_eigenvector")
        print("calculated local_eigenvector")
    if "global_eigenvector" in cn_measures:
        nx.set_node_attributes(G, nx.eigenvector_centrality(
            inter_graph), "global_eigenvector")
        print("calculated global_eigenvector")
    if "closeness" in cn_measures:
        nx.set_node_attributes(G, nx.closeness_centrality(G), "closeness")
        print("calculated closeness")
    if "local_closeness" in cn_measures:
        nx.set_node_attributes(G, nx.closeness_centrality(
            intra_graph), "local_closeness")
        print("calculated local_closeness")
    if "global_closeness" in cn_measures:
        nx.set_node_attributes(G, nx.closeness_centrality(
            inter_graph), "global_closeness")
        print("calculated global_closeness")
    if "pagerank" in cn_measures:
        nx.set_node_attributes(G, nx.pagerank(G, alpha=0.85), "pagerank")
        print("calculated pagerank")
    if "local_pagerank" in cn_measures:
        nx.set_node_attributes(G, nx.pagerank(
            intra_graph, alpha=0.85), "local_pagerank")
        print("calculated local_pagerank")
    if "global_pagerank" in cn_measures:
        nx.set_node_attributes(G, nx.pagerank(
            inter_graph, alpha=0.85), "global_pagerank")
        print("calculated global_pagerank")
    if "k_core" in cn_measures:
        nx.set_node_attributes(G, cal_k_core(G), "k_core")
        print("calculated k_core")
    if "k_truss" in cn_measures:
        nx.set_node_attributes(G, cal_k_truss(G), "k_truss")
        print("calculated k_truss")
    if "Comm" in cn_measures:
        nx.set_node_attributes(
            G, comm_centreality(G, community_labels), "Comm")
        print("calculated Comm")
    if "mv" in cn_measures:
        nx.set_node_attributes(G, modularity_vitality(G1, part), "mv")
        print("calculated mv")

    # nx.write_gexf(G, graph_path)

    features_dicts = {}
    for measure in cn_measures:
        features_dicts[measure] = nx.get_node_attributes(G, measure)
        print(f"==>> features_dicts: {measure , len(features_dicts[measure])}")

    for feature in network_features:
        if feature[:3] == "src":
            df[feature] = df.apply(lambda row: features_dicts[feature[4:]].get(
                row[dataset.src_ip_col], -1), axis=1)
            # df[feature] = df.apply(lambda row: features_dicts[feature[4:]].get(
                # row['Src Ip'], -1), axis=1)
        if feature[:3] == "dst":
            df[feature] = df.apply(lambda row: features_dicts[feature[4:]].get(
                row[dataset.dst_ip_col], -1), axis=1)
            # df[feature] = df.apply(lambda row: features_dicts[feature[4:]].get(row['Dst IP'], -1), axis=1)
    print(f"--------------------------  writting the DataFrame to {new_path} ----------------------")
    df.to_parquet(new_path)
    print(f"--------------------------DataFrame written to {new_path} --------------------------")
    # print(df.columns)
    # return network_features
    return df

In [4]:
warnings.filterwarnings('ignore')

# CHANGE 1.02
new_path = os.path.join("/kaggle/working/",f"{name}_with_centralities.parquet")
graph_path = os.path.join("/kaggle/working/",f"{name}_graph.gpickle")
# cn_measures = ["degree", "betweenness", "closeness", "eigenvector"]
cn_measures = ["betweenness", "degree", "pagerank", "closeness", "k_truss"]
network_features = ['src_betweenness', 'dst_betweenness', 'src_degree', 'dst_degree', 'src_pagerank', 'dst_pagerank', 'src_closeness', 'dst_closeness', 'src_k_truss', 'dst_k_truss']
if name=="cic_ids_2017":
    # Optimized Configuration
    config = OmegaConf.create({
        "wandb": {
            "project": "DL-NIDS-2--cic-ids-2017",
            "entity": "mohammad-fleity-lebanese-university",
            "tags": ["GRU", "CIC-IDS-2017", "PyTorch"],
            "notes": "Optimized GRU for network intrusion detection",
        },
        "model": {
            "hidden_size": 128,          # Increased capacity
            "num_layers": 2,             # Deeper network
            "dropout": 0.4,              # Stronger regularization
            "dense_units": [128, 64],    # Better feature extraction
            "learning_rate": 0.0001,     # Slower learning
            "weight_decay": 1e-4         # Stronger L2 regularization
        },
        "training": {
            "sequence_length": 5,        # Longer temporal context
            "batch_size": 256,           # Larger batches
            "max_epochs": 15,            # More training time
            # "max_epochs": 1,            # More training time
            "early_stopping_patience": 7,# More patience
            "oversample": True,          # Class balancing
            "gpus": 1 if torch.cuda.is_available() else 0,
            "train_size": 0.7,           # Proper train/val split
            "val_size": 0.15             # 70/15/15 split
        },
        "data": {
            "raw": "cic_ids_2017.parquet",
            "num_workers": 4
        }
    })
    dataset_name="CIC_IDS_2017"
elif name =="cic_ton_iot":
    config = OmegaConf.create({
        "wandb": {
            "project": "DL-NIDS-2--cic-ton-iot",
            "entity": "mohammad-fleity-lebanese-university",
            "tags": ["GRU", "CIC-TON-IOT", "PyTorch"],
            "notes": "Optimized GRU for network intrusion detection"
        },
        "model": {
            "hidden_size": 128,          # Increased capacity
            "num_layers": 2,             # Deeper network
            "dropout": 0.4,              # Stronger regularization
            "dense_units": [128, 64],    # Better feature extraction
            "learning_rate": 0.0001,     # Slower learning
            "weight_decay": 1e-4         # Stronger L2 regularization
        },
        "training": {
            "sequence_length": 5,        # Longer temporal context
            "batch_size": 256,           # Larger batches
            "max_epochs": 15,            # More training time
            # "max_epochs": 1,            # More training time
            "early_stopping_patience": 7,# More patience
            "oversample": True,          # Class balancing
            "gpus": 1 if torch.cuda.is_available() else 0,
            "train_size": 0.7,           # Proper train/val split
            "val_size": 0.15             # 70/15/15 split
        },
        "data": {
            "raw": "cic_ton_iot.parquet",
            "num_workers": 4
        }
    })
    dataset_name="CIC_TON_IOT"
    
elif name =="cic_ids_2017_5_percent": # THIS IS JUST FOR TESTING THE FUNCTIONALITIES FASTER
    config = OmegaConf.create({
        "wandb": {
            "project": "DL-NIDS-2--cic-ton-iot",
            "entity": "mohammad-fleity-lebanese-university",
            "tags": ["GRU", "CIC-TON-IOT", "PyTorch"],
            "notes": "Optimized GRU for network intrusion detection"
        },
        "model": {
            "hidden_size": 128,          # Increased capacity
            "num_layers": 2,             # Deeper network
            "dropout": 0.4,              # Stronger regularization
            "dense_units": [128, 64],    # Better feature extraction
            "learning_rate": 0.0001,     # Slower learning
            "weight_decay": 1e-4         # Stronger L2 regularization
        },
        "training": {
            "sequence_length": 5,        # Longer temporal context
            "batch_size": 256,           # Larger batches
            "max_epochs": 1,            # More training time
            "early_stopping_patience": 7,# More patience
            "oversample": True,          # Class balancing
            "gpus": 1 if torch.cuda.is_available() else 0,
            "train_size": 0.7,           # Proper train/val split
            "val_size": 0.15             # 70/15/15 split
        },
        "data": {
            "raw": "cic_ids_2017_5_percent.parquet",
            "num_workers": 4
        }
    })
    dataset_name="cic_ids_2017_5_percent"


In [5]:
def calculate_fpr_fnr_with_global(cm):
    """
    Calculate FPR and FNR for each class and globally for a multi-class confusion matrix.

    Parameters:
        cm (numpy.ndarray): Confusion matrix of shape (num_classes, num_classes).

    Returns:
        dict: A dictionary containing per-class and global FPR and FNR.
    """
    num_classes = cm.shape[0]
    results = {"per_class": {}, "global": {}}

    # Initialize variables for global calculation
    total_TP = 0
    total_FP = 0
    total_FN = 0
    total_TN = 0

    # Per-class calculation
    for class_idx in range(num_classes):
        TP = cm[class_idx, class_idx]
        FN = np.sum(cm[class_idx, :]) - TP
        FP = np.sum(cm[:, class_idx]) - TP
        TN = np.sum(cm) - (TP + FP + FN)

        # Calculate FPR and FNR for this class
        FPR = FP / (FP + TN) if (FP + TN) != 0 else None
        FNR = FN / (TP + FN) if (TP + FN) != 0 else None

        # Store per-class results
        results["per_class"][class_idx] = {"FPR": FPR, "FNR": FNR}

        # Update global counts
        total_TP += TP
        total_FP += FP
        total_FN += FN
        total_TN += TN

    # Global calculation
    global_FPR = total_FP / \
        (total_FP + total_TN) if (total_FP + total_TN) != 0 else None
    global_FNR = total_FN / \
        (total_FN + total_TP) if (total_FN + total_TP) != 0 else None

    results["global"]["FPR"] = global_FPR
    results["global"]["FNR"] = global_FNR

    return results



In [6]:

class NIDSDataModule(pl.LightningDataModule):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.batch_size = config.training.batch_size
        self.sequence_length = config.training.sequence_length
        self.num_workers = config.data.num_workers
        self.oversample = config.training.oversample
        self.non_numeric_cols=[]
        self.scaler=None
        self.alpha = 0.5

    def prepare_data(self):
        print(f"--------------------- {name}-parquet -----------------------")
        if not os.path.exists(new_path):
            print("--------------------- Centralities Calculations  -----------------------")
            if dataset_name=="CIC_IDS_2017":
                df = pd.read_parquet(os.path.join('/kaggle/input/cic-ids-2017-parquet', self.config.data.raw))
                # print("--------------------- cic-ids-2017-parquet -----------------------")
            elif dataset_name=="CIC_TON_IOT":
                df = pd.read_parquet(os.path.join('/kaggle/input/cic-ton-iot-parquet', self.config.data.raw))
                # print("--------------------- cic-ton-iot-parquet -----------------------")
            elif dataset_name=="cic_ids_2017_5_percent":
                df = pd.read_parquet(os.path.join('/kaggle/input/cic-ids-2017-5-percent', self.config.data.raw))
                # print("--------------------- /kaggle/input/cic-ids-2017-5-percent -----------------------")
    
            print("Clean data")
            df.replace([np.inf, -np.inf], np.nan, inplace=True)
            df.dropna(inplace=True)
            df.drop_duplicates(inplace=True)
            
            print("Reset index after cleaning")
            df = df.reset_index(drop=True)
            
            print("Identify non-numeric columns")
            self.non_numeric_cols = ['Label', 'Timestamp', 'Flow ID', 'Src IP', 
                                     'Src Port', 'Attack', 'Dst IP', 'Dst Port', 'Protocol']
            self.non_numeric_cols = [col for col in self.non_numeric_cols if col in df.columns]
    
    
            print("Encode labels")
            self.label_encoder = LabelEncoder()
            df['Label_Num'] = self.label_encoder.fit_transform(df['Attack'])
            self.classes = self.label_encoder.classes_
            
            # print("Initialize scaler")
            # self.scaler = StandardScaler()
            # CHANGE 1.01
            # print("Calculate Centralities start")
            print("--------------------- Centralities Calculations starting-----------------------")
            df = add_centralities(
                df= df,
                new_path=new_path,
                graph_path=graph_path,
                dataset=dataset,
                cn_measures=cn_measures,
                network_features=network_features
            )
        else:
            # print(new_path," exist now!")
            print("--------------------- Centralities Extractions  -----------------------")
            df = pd.read_parquet(new_path)
        # CHANGES TO REMOVE use only for test with 5_perecent 
        if dataset_name=="cic_ids_2017_5_percent":
            multiplier = 5  # change as you wish
    
            target_substring = "Sql Injection"
            target_rows = df[df['Attack'].str.contains(target_substring, na=False)]
            
            if target_rows.empty:
                print(f"Warning: no rows found matching '{target_substring}' in train set.")
            else:
                duplicated_block = pd.concat([target_rows] * (multiplier - 1), ignore_index=True)
                df = pd.concat([df, duplicated_block], ignore_index=True)
                print(f"Duplicated target class rows by factor {multiplier}. New count:",
                  df['Attack'].str.contains(target_substring, na=False).sum())
            
            
            
        print("\n===== Class Distribution =====")
        counts = df["Attack"].value_counts()
        print(counts)
        # print()
        print("Initialize scaler")
        self.scaler = StandardScaler()
        print("Train/Val/Test split")
        train_df, test_df = train_test_split(
            df,
            test_size=1 - self.config.training.train_size,
            random_state=42,
            stratify=df['Label_Num']
        )
        val_df, test_df = train_test_split(
            test_df,
            test_size=0.5,
            random_state=42,
            stratify=test_df['Label_Num']
        )
        # Reset indices after splitting
        train_df = train_df.reset_index(drop=True)
        val_df = val_df.reset_index(drop=True)
        test_df = test_df.reset_index(drop=True)
        
        # Use `fit=True` only for training data
        self.X_train, self.y_train = self._prepare_features(train_df, fit=True)
        self.X_val, self.y_val = self._prepare_features(val_df, fit=False)
        self.X_test, self.y_test = self._prepare_features(test_df, fit=False)
        # print("Val:\n", val_df['Label_Num'].value_counts(normalize=True).sort_index())
        # print("Test:\n", test_df['Label_Num'].value_counts(normalize=True).sort_index())

    def _prepare_features(self, df, fit=False):
        X = df.drop(['Label_Num'] + self.non_numeric_cols, axis=1)
        y = df['Label_Num']
        if fit:
            X = self.scaler.fit_transform(X)
        else:
            X = self.scaler.transform(X)
        # return self.create_sequences(X, y)
        return X, y
        
    def create_sequences(self, X, y):
        sequences = []
        labels = []
        y_values = y.values if hasattr(y, 'values') else y  # Convert to numpy array if pandas Series
        for i in range(len(X) - self.sequence_length):
            seq = X[i:i+self.sequence_length]
            sequences.append(seq)
            labels.append(y_values[i+self.sequence_length-1])
        return np.array(sequences), np.array(labels)
       
    def setup(self, stage=None):
        self.scaler = StandardScaler()
        # self.scaler.fit(self.X_train.reshape(-1, self.X_train.shape[-1]))
        
        # self.X_train = self.scaler.transform(self.X_train.reshape(-1, self.X_train.shape[-1])).reshape(self.X_train.shape)
        # self.X_val = self.scaler.transform(self.X_val.reshape(-1, self.X_val.shape[-1])).reshape(self.X_val.shape)
        # self.X_test = self.scaler.transform(self.X_test.reshape(-1, self.X_test.shape[-1])).reshape(self.X_test.shape)
        
        # self.train_dataset = TensorDataset(torch.FloatTensor(self.X_train), torch.LongTensor(self.y_train))
        # self.val_dataset = TensorDataset(torch.FloatTensor(self.X_val), torch.LongTensor(self.y_val))
        # self.test_dataset = TensorDataset(torch.FloatTensor(self.X_test), torch.LongTensor(self.y_test))

        self.train_dataset = TimeSeriesDataset(
            data=torch.FloatTensor(self.X_train),
            sequence_length=self.sequence_length,
            target_idx=torch.LongTensor(self.y_train)
        )
        
        self.val_dataset = TimeSeriesDataset(
            data=torch.FloatTensor(self.X_val),
            sequence_length=self.sequence_length,
            target_idx=torch.LongTensor(self.y_val)
        )
        
        self.test_dataset = TimeSeriesDataset(
            data=torch.FloatTensor(self.X_test),
            sequence_length=self.sequence_length,
            target_idx=torch.LongTensor(self.y_test)
        )
            
    def train_dataloader(self):
        if self.oversample:
            class_counts = np.bincount(self.y_train)
            weights = 1. / class_counts[self.y_train]
            sampler = WeightedRandomSampler(weights, len(weights), replacement=True)
        else:
            sampler = RandomSampler(self.train_dataset)
            
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            sampler=sampler,
            num_workers=self.num_workers,
            persistent_workers=True, 
            # persistent_workers=False, # better for tpu
            pin_memory=True
        )
    # c=0
    def val_dataloader(self):
        # print(c)
        # c+=1
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            pin_memory=True
            # pin_memory=False # better for tpu
        )
    
    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            pin_memory=True
            # pin_memory=False # TPU 
        )

In [7]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, sequence_length, target_idx=None):
        """
        Dataset for time series data that creates sequences on-the-fly.
 
        Args:
            data: Input data tensor of shape (n_samples, n_features)
            sequence_length: Length of sequences to create
            target_idx: Optional tensor of target indices. If None, uses the last position in sequence
        """
        self.data = data
        self.sequence_length = sequence_length
        self.target_idx = target_idx
 
    def __len__(self):
        return len(self.data)
 
    def __getitem__(self, idx):
        # Calculate start index for the sequence
        start_idx = max(0, idx - self.sequence_length + 1)
        
        # Get the sequence
        sequence = self.data[start_idx:idx + 1]
        
        # Ensure sequence is 2D [seq_len, features]
        if sequence.dim() == 1:
            sequence = sequence.unsqueeze(0)  # Add sequence length dimension if missing
        
        # Pad the beginning if needed
        if len(sequence) < self.sequence_length:
            padding = torch.zeros(self.sequence_length - len(sequence), sequence.shape[1])
            sequence = torch.cat([padding, sequence], dim=0)
        
        # Get target
        target = self.target_idx[idx] if self.target_idx is not None else -1
        
        return sequence, target



class GRUModel(pl.LightningModule):
    def __init__(self, input_size, num_classes, config):
        super().__init__()
        self.save_hyperparameters()

        # Temporary storage for step outputs
        self.train_outputs = []
        self.val_outputs = []

        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=config.model.hidden_size,
            num_layers=config.model.num_layers,
            batch_first=True,
            dropout=config.model.dropout if config.model.num_layers > 1 else 0
        )
        
        self.gru_ln = nn.LayerNorm(config.model.hidden_size)

        self.dense = nn.Sequential(
            nn.Linear(config.model.hidden_size, config.model.dense_units[0]),
            nn.LayerNorm(config.model.dense_units[0]),
            nn.ReLU(),
            nn.Dropout(config.model.dropout),
            nn.Linear(config.model.dense_units[0], config.model.dense_units[1]),
            nn.LayerNorm(config.model.dense_units[1]),
            nn.ReLU(),
            nn.Dropout(config.model.dropout)
        )
        
        self.output = nn.Linear(config.model.dense_units[1], num_classes)
        self.criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

    def forward(self, x):
        gru_out, _ = self.gru(x)
        gru_out = gru_out[:, -1, :]
        gru_out = self.gru_ln(gru_out)
        features = self.dense(gru_out)
        return self.output(features)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()

        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)

        # Save for epoch-end
        self.train_outputs.append({
            'loss': loss.detach(),
            'correct': (logits.argmax(dim=1) == y).sum().detach(),
            'total': len(y)
        })

        return loss

    def on_train_epoch_end(self):
        print(f"✅ Finished Epoch {self.current_epoch+1}")
        if not self.train_outputs:
            return

        avg_loss = torch.stack([x['loss'] for x in self.train_outputs]).mean()
        correct = sum([x['correct'] for x in self.train_outputs])
        total = sum([x['total'] for x in self.train_outputs])
        epoch_acc = correct / total

        self.log('train_epoch_loss', avg_loss, prog_bar=True)
        self.log('train_acc_epoch', epoch_acc*100, prog_bar=True)
        self.train_outputs.clear()

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()

        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)

        # Save for epoch-end
        self.val_outputs.append({
            'val_loss': loss.detach(),
            'correct': (logits.argmax(dim=1) == y).sum().detach(),
            'total': len(y)
        })

        return loss

    def on_validation_epoch_end(self):
        if not self.val_outputs:
            return

        avg_loss = torch.stack([x['val_loss'] for x in self.val_outputs]).mean()
        correct = sum([x['correct'] for x in self.val_outputs])
        total = sum([x['total'] for x in self.val_outputs])
        epoch_acc = (correct / total)*100

        self.log('val_loss', avg_loss, prog_bar=True)
        self.log('val_acc', epoch_acc, prog_bar=True)
        self.val_outputs.clear()

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()

        self.log('test_loss', loss)
        self.log('test_acc', acc*100)

        return {'test_loss': loss, 'preds': logits.argmax(dim=1), 'targets': y}

    def configure_optimizers(self):
        return optim.AdamW(
            self.parameters(),
            lr=self.hparams.config.model.learning_rate,
            weight_decay=self.hparams.config.model.weight_decay
        )


def init_wandb():
    user_secrets = UserSecretsClient()
    wandb_api_key = user_secrets.get_secret("mohammad_wandb_secret")
    wandb.login(key=wandb_api_key)
    
    run = wandb.init(
        project=config.wandb.project,
        entity=config.wandb.entity,
        tags=config.wandb.tags,
        notes=config.wandb.notes,
        config={
            "input_size": None,
            "num_classes": None,
            "sequence_length": config.training.sequence_length,
            # "train_samples": config.training.max_train_samples,
            # "val_samples": config.training.max_val_samples,
            # "test_samples": config.training.max_test_samples,
            "model_config": dict(config.model),
            "training_config": dict(config.training)
        }
    )
    
    wandb_logger = WandbLogger(
        experiment=run,
        log_model='all'
    )
    
    return wandb_logger, run

In [8]:
import threading, time, psutil

try:
    import pynvml
    pynvml.nvmlInit()
    GPU_AVAILABLE = True
except:
    GPU_AVAILABLE = False

def monitor_resources(process, peaks, interval=0.2):
    """Monitor CPU + GPU memory usage in a background thread."""
    peaks["cpu"] = 0.0
    peaks["gpu"] = 0.0
    
    handle = None
    if GPU_AVAILABLE:
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
    
    while getattr(threading.current_thread(), "running", True):
        try:
            cpu_mem = process.memory_info().rss / (1024 ** 2)
            gpu_mem = 0.0
            if GPU_AVAILABLE:
                gpu_mem = pynvml.nvmlDeviceGetMemoryInfo(handle).used / (1024 ** 2)
            peaks["cpu"] = max(peaks["cpu"], cpu_mem)
            peaks["gpu"] = max(peaks["gpu"], gpu_mem)
        except psutil.NoSuchProcess:
            break
        time.sleep(interval)


In [9]:
# print(f"{not NO_NODE_FEATURE}_1")

In [None]:
def main():
    # wandb_logger, run = init_wandb()
    
    data_module = NIDSDataModule(config)
    data_module.prepare_data()
    data_module.setup()
    
    sample_x, _ = next(iter(data_module.train_dataloader()))
    input_size = sample_x.shape[2]
    num_classes = len(data_module.classes)
    
    # run.config.update({
    #     "input_size": input_size,
    #     "num_classes": num_classes
    # })
    
    model = GRUModel(input_size, num_classes, config)
    
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=config.training.early_stopping_patience,
        mode='min'
    )
    
    checkpoint_callback = ModelCheckpoint(
        monitor='val_acc',
        mode='max',
        save_top_k=1,
        dirpath='checkpoints',
        filename='best_model'
    )
 
    trainer = pl.Trainer(
        precision=16,
        # logger=wandb_logger,
        max_epochs=config.training.max_epochs,
        callbacks=[early_stopping, checkpoint_callback],
        deterministic=True,
        gradient_clip_val=1.0,
        enable_progress_bar=True,
        log_every_n_steps=1000
    )
    
    # start_time = time.time()
    # process = psutil.Process()
    print("------------- start training ---------------------")
    
    # Measure start time and memory
    cpu_mem_used=gpu_mem_used=elapsed_time=0
    process = psutil.Process(os.getpid())
    cpu_mem_before = process.memory_info().rss / (1024 ** 2)
    handle = None
    if GPU_AVAILABLE:
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
    if GPU_AVAILABLE:
        gpu_mem_before = pynvml.nvmlDeviceGetMemoryInfo(handle).used / (1024 ** 2)
    
    process = psutil.Process(os.getpid())
    peaks = {}
    
    monitor_thread = threading.Thread(target=monitor_resources, args=(process, peaks))
    monitor_thread.running = True
    monitor_thread.start()
    
    start_time = time.time()
    # Train model
    trainer.fit(model, datamodule=data_module)
    print("------------- end training ---------------------")
    print("------------- start testing ---------------------")
    # Test model
    test_results = trainer.test(model, datamodule=data_module)


    # End time and memory
    elapsed_time = time.time() - start_time
    # Stop thread safely
    monitor_thread.running = False
    monitor_thread.join()
    
    peak_cpu = peaks.get("cpu", 0.0)
    peak_gpu = peaks.get("gpu", 0.0)
    cpu_mem_used=peak_cpu-cpu_mem_before
    gpu_mem_used=peak_gpu-gpu_mem_before
    
    # end_time = time.time()
    # time_consumption = end_time - start_time
    # memory_consumption_mb = process.memory_info().rss / (1024 * 1024)
    
    print("------------- end testing ---------------------")
    # Collect all predictions and targets for final evaluation
    test_loader = data_module.test_dataloader()
    all_preds = []
    all_targets = []
    all_probs = []  # <-- store probabilities here
    criterion = torch.nn.CrossEntropyLoss()
    test_loss_sum=0
    total_samples=0
    
    model.eval()
    with torch.no_grad():
        for batch in test_loader:
            x, y = batch
            y_hat = model(x)
            preds = torch.argmax(y_hat, dim=1)
            probs = torch.softmax(y_hat, dim=1).cpu().numpy()  # <-- probabilities
            
            loss = criterion(y_hat, y)
            test_loss_sum += loss.item() * x.size(0)
            total_samples += x.size(0)
            
            all_probs.extend(probs)
            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(y.cpu().numpy())
    
    class_names = data_module.classes.tolist()
    # metrics calculation
    print("------------- metric calculation ---------------------")
    test_acc = accuracy_score(all_targets, all_preds)
    test_f1 = f1_score(all_targets, all_preds, average='weighted')
    test_recall = recall_score(all_targets, all_preds, average='weighted')
    test_precision = precision_score(all_targets, all_preds, average='weighted')
    test_loss_OLD = test_results[0]['test_loss'] if 'test_loss' in test_results[0] else None
    test_loss = test_loss_sum / total_samples
    print(test_loss_OLD)
    print(test_loss)

    all_probs = np.array(all_probs)
    
    try:
        if all_probs.shape[1] == 2:
            # Binary classification → probability of positive class
            auc_score = roc_auc_score(all_targets, all_probs[:, 1])
        else:
            # Multi-class classification
            auc_score = roc_auc_score(all_targets, all_probs, multi_class='ovr')
    except Exception as e:
        print(f"AUC computation failed: {e}")
        auc_score = None

    
    # False Positive / False Negative Rate
    cm = confusion_matrix(all_targets, all_preds)
    cr = classification_report(
        all_targets, all_preds, digits=4, output_dict=True, zero_division=0)
    report = classification_report(
        all_targets, all_preds, digits=4, output_dict=False, zero_division=0)
    # weighted_f1 = f1_score(all_targets, all_preds,
    #                        average="weighted") * 100

    # results_fpr_fnr = calculate_fpr_fnr_with_global(cm)
    # fpr = results_fpr_fnr["global"]["FPR"]
    # fnr = results_fpr_fnr["global"]["FNR"]

    # results = {
    #     "test_weighted_f1": weighted_f1,
    #     "test_auc": auc_score * 100 if auc_score is not None else None,
    #     "test_fpr": fpr,
    #     "test_fnr": fnr,
    #     "classification_report": cr,
    #     "results_fpr_fnr": results_fpr_fnr
    # }
    # os.makedirs("temp", exist_ok=True)
    # json_path = os.path.join("temp", f"LSTM_results.json")
    # with open(json_path, "w") as f:
    #     json.dump(results, f, indent=4)
        
    FP = cm.sum(axis=0) - np.diag(cm)
    FN = cm.sum(axis=1) - np.diag(cm)
    TP = np.diag(cm)
    TN = cm.sum() - (FP + FN + TP)
    fpr = FP.sum() / (FP.sum() + TN.sum())
    fnr = FN.sum() / (FN.sum() + TP.sum())
    # --- Per-class FPR/FNR ---
    per_class_fpr = np.where((FP + TN) > 0, FP / (FP + TN), 0.0)
    per_class_fnr = np.where((FN + TP) > 0, FN / (FN + TP), 0.0)
    
    # Convert to dict with class names
    fpr_dict = {class_names[i]: per_class_fpr[i] for i in range(len(class_names))}
    fnr_dict = {class_names[i]: per_class_fnr[i] for i in range(len(class_names))}

    metrics_dict = {
        "Test Accuracy": test_acc,
        "F1 Score": test_f1,
        "Test Loss": test_loss,
        "Recall": test_recall,
        "Precision": test_precision,
        "AUC": auc_score,
        "False Positive Rate": fpr,
        "False Negative Rate": fnr,
        "Time Consumption (s)": elapsed_time,
        # "Memory Consumption (MB)": memory_consumption_mb
        "CPU_Peak_MB": peak_cpu,
        "GPU_Peak_MB": peak_gpu,
        "cpu_mem_used":cpu_mem_used,
        "gpu_mem_used":gpu_mem_used,
    }
    # Print metrics
    print("------------- final evaluation metric ---------------------")
    for k, v in metrics_dict.items():
        print(f"{k}: {v}")

    metrics_df = pd.DataFrame([metrics_dict])

    class_names = [str(c) for c in data_module.label_encoder.classes_]
    print("--------------- class names --------------------")
    print(class_names)
    # Generate classification report dict once
    report = classification_report(
        all_targets,
        all_preds,
        target_names=class_names,
        output_dict=True
    )
    for i, cls in enumerate(class_names):
        report[cls]["FPR"] = per_class_fpr[i]
        report[cls]["FNR"] = per_class_fnr[i]

    # Optionally, also store global ones
    report["macro avg"]["FPR"] = fpr
    report["macro avg"]["FNR"] = fnr
    report["weighted avg"]["FPR"] = np.average(per_class_fpr, weights=cm.sum(axis=1))
    report["weighted avg"]["FNR"] = np.average(per_class_fnr, weights=cm.sum(axis=1))
        
    print("Classification Report:")
    print(classification_report(all_targets, all_preds, target_names=class_names))
    
    print("Confusion Matrix:")
    print(confusion_matrix(all_targets, all_preds))
    
    
    print("------------- classification report dict -------------")
    print(report)
    final_report = pd.DataFrame(report).transpose()
    metrics_df = pd.DataFrame([metrics_dict])
    merged = pd.concat([final_report, metrics_df], axis=0)
    
    merged.to_csv(os.path.join("/kaggle/working/",f"GRU_{dataset_name}__NODEFEATURES-{not NO_NODE_FEATURE}__report.csv"))
    # Create a wandb Table for the classification report
    # report_table = wandb.Table(columns=["Class", "Precision", "Recall", "F1-Score", "Support"])
    # for class_name in class_names:
    #     report_table.add_data(
    #         class_name,
    #         report[class_name]["precision"],
    #         report[class_name]["recall"],
    #         report[class_name]["f1-score"],
    #         report[class_name]["support"]
    #     )
    
    # # Add weighted averages
    # report_table.add_data(
    #     "Weighted Avg",
    #     report["weighted avg"]["precision"],
    #     report["weighted avg"]["recall"],
    #     report["weighted avg"]["f1-score"],
    #     report["weighted avg"]["support"]
    # )
    # print("empty")
    # wandb.log({"classification_report": report_table})
    
    # Finish wandb run
    # wandb.finish()
if __name__ == "__main__":
    main()

--------------------- cic_ids_2017-parquet -----------------------
--------------------- Centralities Calculations  -----------------------
Clean data
Reset index after cleaning
Identify non-numeric columns
Encode labels
--------------------- Centralities Calculations starting-----------------------
calculated betweenness
calculated degree
calculated closeness
calculated pagerank
calculated k_truss
==>> features_dicts: ('betweenness', 19129)
==>> features_dicts: ('degree', 19129)
==>> features_dicts: ('pagerank', 19129)
==>> features_dicts: ('closeness', 19129)
==>> features_dicts: ('k_truss', 19129)
--------------------------  writting the DataFrame to /kaggle/working/cic_ids_2017_with_centralities.parquet ----------------------
--------------------------DataFrame written to /kaggle/working/cic_ids_2017_with_centralities.parquet --------------------------

===== Class Distribution =====
Attack
BENIGN                        2271122
DoS Hulk                       230123
PortScan        

2025-10-27 09:04:38.946803: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761555879.389046      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761555879.515571      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

✅ Finished Epoch 1


Validation: |          | 0/? [00:00<?, ?it/s]

✅ Finished Epoch 2


Validation: |          | 0/? [00:00<?, ?it/s]

✅ Finished Epoch 3


Validation: |          | 0/? [00:00<?, ?it/s]

✅ Finished Epoch 4


Validation: |          | 0/? [00:00<?, ?it/s]

✅ Finished Epoch 5


Validation: |          | 0/? [00:00<?, ?it/s]

✅ Finished Epoch 6


In [None]:
print(new_path)
if not os.path.exists(new_path):
    print("it didn't exist yet")