In [None]:
from pathlib import Path
from typing import List, Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    confusion_matrix,
    ConfusionMatrixDisplay,
    roc_curve,
    auc,
    precision_recall_curve,
    PrecisionRecallDisplay,
    RocCurveDisplay,
)
from susi.SOMClassifier import SOMClassifier
import time
import psutil
import os

In [None]:
def som_cross_validate(df: pd.DataFrame, feature_cols: List[str], grid: Tuple[int, int] = (22, 22)):
    accuracies = []  # List to store accuracy per fold
    process = psutil.Process(os.getpid())  # Used for monitoring RAM and CPU

    # Track starting resource usage
    overall_start_time = time.time()
    overall_start_ram = process.memory_info().rss / 1024 / 1024
    overall_start_cpu = psutil.cpu_percent(interval=1)

    # Perform manual K-fold cross-validation
    for fold in sorted(df["Fold"].unique()):
        print(f"\n── Fold {fold + 1} / {df['Fold'].nunique()} ────────────────")

        # Split data into training and validation sets
        train_df = df[df["Fold"] != fold]
        val_df = df[df["Fold"] == fold]

        # Normalize feature columns
        scaler = StandardScaler()
        X_train = scaler.fit_transform(train_df[feature_cols])
        X_val = scaler.transform(val_df[feature_cols])
        y_train = train_df["Label"].values
        y_val = val_df["Label"].values

        # Initialize the supervised SOM model with specified parameters
        som = SOMClassifier(
            n_rows=grid[0],
            n_columns=grid[1],
            neighborhood_function='gaussian',       # Use Gaussian neighborhood function
            learning_rate=0.5,                       # Learning rate for SOM map updates
            n_iter_unsupervised=10000,               # Unsupervised SOM training steps
            n_iter_supervised=2000,                  # Supervised learning phase iterations
            supervised_learning_rate=0.01            # Learning rate for label association
        )


        # Train the SOM on training data
        som.fit(X_train, y_train)

        # Predict on validation data
        y_pred = som.predict(X_val)

        # Plot and save U-Matrix (distance map) for this fold
        plt.figure(figsize=(10, 8))
        plt.title(f"SOM U-Matrix - Fold {fold+1}")
        som.plot_distance_map()
        plt.savefig(f"som_umatrix_fold_{fold+1}.png", dpi=300, bbox_inches="tight")
        plt.close()

        # Calculate and record accuracy for this fold
        acc = (y_pred == y_val).mean()
        accuracies.append(float(acc))
        print(f"Accuracy: {acc:.4f}")

    # Track ending resource usage
    overall_end_time = time.time()
    overall_end_ram = process.memory_info().rss / 1024 / 1024
    overall_end_cpu = psutil.cpu_percent(interval=1)

    # Output fold-wise and overall performance metrics
    print("\n══════ SOM Validation Summary ══════")
    for i, a in enumerate(accuracies, 1):
        print(f"Fold {i}: {a:.4f}")
    print(f"Mean Accuracy: {np.mean(accuracies):.4f}")
    print(f"Standard Deviation: {np.std(accuracies):.4f}")
    print("\n Overall Training Stats ")
    print(f"Total Training Time: {overall_end_time - overall_start_time:.2f} seconds")
    print(f"Total RAM Usage Increase: {overall_end_ram - overall_start_ram:.2f} MB")
    print(f"CPU Usage (at final check): {overall_end_cpu}%")

    return accuracies


In [None]:
def som_visual_evaluation(df: pd.DataFrame, feature_cols: List[str], grid=(22, 22), save_dir="som_plots"):
    Path(save_dir).mkdir(exist_ok=True)  # Ensure output directory exists

    # Normalize feature columns
    scaler = StandardScaler()
    X = scaler.fit_transform(df[feature_cols])
    y = df["Label"].values

    # Train supervised SOM on full dataset
    som = SOMClassifier(
        n_rows=grid[0],
        n_columns=grid[1],
        neighborhood_function='gaussian',
        learning_rate=0.5,
        n_iter_unsupervised=10000,
        n_iter_supervised=2000,
        supervised_learning_rate=0.01
    )
    som.fit(X, y)
    y_pred = som.predict(X)

    # Confusion Matrix visualization
    cm = confusion_matrix(y, y_pred)
    ConfusionMatrixDisplay(confusion_matrix=cm).plot(cmap="Blues")
    plt.title("Confusion Matrix")
    plt.savefig(Path(save_dir) / "som_confusion.png", dpi=300, bbox_inches="tight")
    plt.close()

    # ROC and PR curves are shown only for binary classification
    if len(np.unique(y)) == 2:
        probs = som.predict_proba(X)[:, 1]  # Get predicted probability for positive class

        # ROC Curve
        fpr, tpr, _ = roc_curve(y, probs)
        roc_auc = auc(fpr, tpr)
        RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc).plot()
        plt.title(f"ROC Curve (AUC = {roc_auc:.4f})")
        plt.savefig(Path(save_dir) / "som_roc.png", dpi=300, bbox_inches="tight")
        plt.close()

        # Precision-Recall Curve
        precision, recall, _ = precision_recall_curve(y, probs)
        PrecisionRecallDisplay(precision=precision, recall=recall).plot()
        plt.title("Precision-Recall Curve")
        plt.savefig(Path(save_dir) / "som_pr.png", dpi=300, bbox_inches="tight")
        plt.close()


In [None]:
def main():
    # Load dataset from CSV file
    df = pd.read_csv("D:/Coding Projects/Detection-of-SYN-Flood-Attacks-Using-Machine-Learning-and-Deep-Learning-Techniques-with-Feature-Base/Taulant Matarova/your_data.csv")

    # Select feature columns (first 12, excluding 'Label' and 'Fold')
    feat_cols = df.columns.difference(["Label", "Fold"]).tolist()[:12]

    # Run cross-validation to assess performance on different folds
    accs = som_cross_validate(df, feat_cols)
    print("\nFinal SOM Cross-Validation Results:")
    print(f"Fold Accuracies: {accs}")

    # Train final model on full data and generate evaluation plots
    som_visual_evaluation(df, feat_cols)

# Execute main logic if this script is run directly
if __name__ == "__main__":
    main()
