# Test du HSJ sur différents modèles

Import des librairies

In [None]:
import sys
import os

root_dir = os.getcwd().split("AdversarialNIDS")[0] + "AdversarialNIDS"
sys.path.append(root_dir)

import numpy as np
import torch

from CICIDS2017.dataset import CICIDS2017
from UNSWNB15.dataset import UNSWNB15

from scripts.logger import SimpleLogger

from art.attacks.evasion import HopSkipJump
from art.estimators.classification import SklearnClassifier, PyTorchClassifier

from NIDS_attacks.bounds_constrains import apply_bounds_constraints
from NIDS_attacks.integers_constrains import apply_integer_constraints

from scripts.models.decision_tree.decision_tree import train_decision_tree
from scripts.models.random_forest.random_forest import train_random_forest
from scripts.models.knn.knn import train_knn

from scripts.analysis.model_analysis import perform_model_analysis

from NIDS_attacks.hsj_attack_generalized import hsj_attack_generalized

Import du dataset

In [24]:
ds = CICIDS2017().optimize_memory().encode()
#ds = UNSWNB15().optimize_memory().encode()
ds = ds.subset(size=10000, multi_class=True)
X_train, X_test, y_train, y_test = ds.split(test_size=0.2, apply_smote=True)

[INFO] Downloading dataset: sweety18/cicids2017-full-dataset
[INFO] Loading dataset into DataFrame
[DEBUG] /home/loup/.cache/kagglehub/datasets/sweety18/cicids2017-full-dataset/versions/1/combine.csv
[INFO] Loading dataset into DataFrame
[DEBUG] /home/loup/.cache/kagglehub/datasets/sweety18/cicids2017-full-dataset/versions/1/combine.csv
[INFO] Initial dimensions: 2,214,469 rows x 79 columns = 174,943,051 cells
[DEBUG] Cleaning column names
[INFO] Initial dimensions: 2,214,469 rows x 79 columns = 174,943,051 cells
[DEBUG] Cleaning column names
[DEBUG] Removing duplicate rows
[DEBUG] Removing duplicate rows
[DEBUG] Removed 271,598 duplicate rows. Remaining: 1,942,871
[DEBUG] Removing rows with missing values (initial pass)
[DEBUG] Removed 271,598 duplicate rows. Remaining: 1,942,871
[DEBUG] Removing rows with missing values (initial pass)
[DEBUG] Removed 178 rows with missing values. Remaining: 1,942,693
[DEBUG] Checking for infinite values in numeric columns
[DEBUG] Removed 178 rows wit

Choix des contraintes

In [25]:
integer_indices = [0, 2, 5, 10, 15] # Exemple
modifiable_indices = list(range(20))

## 1.Decision Tree

In [31]:
model_dt, _ = train_decision_tree(X_train, y_train, max_depth=10)

results_dt = hsj_attack_generalized(
        model=model_dt,
        X_test=X_test,
        y_test=y_test,
        dataset="CICIDS2017",
        #dataset="UNSWNB15",
        nb_samples=20,  # Réduire pour les tests
        integer_indices=integer_indices,
        modifiable_indices=modifiable_indices,
        apply_constraints=True,
        per_sample_visualization=False  # Désactiver pour réduire le bruit
    )

[INFO] Starting Generalized HopSkipJump attack on CICIDS2017
[INFO] Using targeted class: 0
[INFO] Applying constraints with 20 modifiable features
[INFO] Integer constraints on 5 features
[INFO] Initial accuracy: 0.994
[INFO] Attacking 20 attack samples to make them appear benign...
[INFO] Target classes being attacked: [1. 2. 3.]


HopSkipJump:   0%|          | 0/20 [00:00<?, ?it/s]

[INFO] === Attack Results ===
[INFO] Original accuracy on attack samples: 1.000
[INFO] Adversarial accuracy on attack samples: 0.650
[INFO] Attack success rate (attacks -> benign): 0.000
[INFO] Average L2 perturbation: 0.533575
[INFO] Constraints applied: bounds + 5 integer features
[INFO] 
Summary: Attack succeeded 0.0% of the time


In [35]:
logger = SimpleLogger()
cm_orig, report_orig = perform_model_analysis(
    model=model_dt,
    X_test=X_test,
    y_test=y_test,
    logger=logger,
    title="Decision Tree - Dataset Original",
    plot=True,
    save_fig=False
) # Does not work

cm_adv, report_adv = perform_model_analysis(
    model=model_dt,
    X_test=results['X_adv'],
    y_test=results['y_attacks'],
    logger=logger,
    title="Decision Tree - Dataset Adverserial",
    plot=True,
    save_fig=False
)


[INFO] Running analysis for scikit-learn model: Decision Tree - Dataset Original


AxisError: axis 1 is out of bounds for array of dimension 1

## 2.Random Forest

In [37]:
model_rf, _ = train_random_forest(X_train, y_train, n_estimators=50, max_depth=10)

results_rf = hsj_attack_generalized(
        model=model_rf,
        X_test=X_test,
        y_test=y_test,
        dataset="CICIDS2017",
        #dataset="UNSWNB15",
        nb_samples=10,
        integer_indices=integer_indices,
        modifiable_indices=modifiable_indices,
        apply_constraints=True,
        per_sample_visualization=True
    )

[INFO] Starting Generalized HopSkipJump attack on CICIDS2017
[INFO] Using targeted class: 0
[INFO] Applying constraints with 20 modifiable features
[INFO] Integer constraints on 5 features
[INFO] Initial accuracy: 0.997
[INFO] Attacking 10 attack samples to make them appear benign...
[INFO] Target classes being attacked: [1. 2. 3.]


HopSkipJump:   0%|          | 0/10 [00:00<?, ?it/s]

[INFO] === Attack Results ===
[INFO] Original accuracy on attack samples: 1.000
[INFO] Adversarial accuracy on attack samples: 1.000
[INFO] Attack success rate (attacks -> benign): 0.000
[INFO] Average L2 perturbation: 22.698642
[INFO] Constraints applied: bounds + 5 integer features
[INFO] === Per-Sample Analysis ===
[INFO] Sample 1: Class 1.0 -> Original pred: 1.0 -> Adversarial pred: 1.0
[INFO] Sample 2: Class 1.0 -> Original pred: 1.0 -> Adversarial pred: 1.0
[INFO] Sample 3: Class 1.0 -> Original pred: 1.0 -> Adversarial pred: 1.0
[INFO] Sample 4: Class 3.0 -> Original pred: 3.0 -> Adversarial pred: 3.0
[INFO] Sample 5: Class 2.0 -> Original pred: 2.0 -> Adversarial pred: 2.0
[INFO] Sample 6: Class 1.0 -> Original pred: 1.0 -> Adversarial pred: 1.0
[INFO] Sample 7: Class 2.0 -> Original pred: 2.0 -> Adversarial pred: 2.0
[INFO] Sample 8: Class 1.0 -> Original pred: 1.0 -> Adversarial pred: 1.0
[INFO] Sample 9: Class 3.0 -> Original pred: 3.0 -> Adversarial pred: 3.0
[INFO] Sample 

In [38]:
logger = SimpleLogger()
cm_orig, report_orig = perform_model_analysis(
    model=model_rf,
    X_test=X_test,
    y_test=y_test,
    logger=logger,
    title="Random Forest - Dataset Original",
    plot=True,
    save_fig=False
) # Does not work

cm_adv, report_adv = perform_model_analysis(
    model=model_rf,
    X_test=results_rf['X_adv'],
    y_test=results_rf['y_attacks'],
    logger=logger,
    title="Random Forest - Dataset Adverserial",
    plot=True,
    save_fig=False
)

[INFO] Running analysis for scikit-learn model: Random Forest - Dataset Original


AxisError: axis 1 is out of bounds for array of dimension 1

## 3.KNN

In [None]:
model_knn, _ = train_knn(X_train, y_train, n_neighbors=5)

results_knn = hsj_attack_generalized(
        model=model_knn,
        X_test=X_test,
        y_test=y_test,
        dataset="CICIDS2017",
        #dataset="UNSWNB15",
        nb_samples=10,
        integer_indices=integer_indices,
        modifiable_indices=modifiable_indices,
        apply_constraints=True,
        per_sample_visualization=True
    )

[INFO] Starting Generalized HopSkipJump attack on CICIDS2017
[INFO] Using targeted class: 0
[INFO] Applying constraints with 20 modifiable features
[INFO] Integer constraints on 5 features
[INFO] Initial accuracy: 0.947
[INFO] Attacking 10 attack samples to make them appear benign...
[INFO] Target classes being attacked: [1. 2. 3.]


HopSkipJump:   0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
logger = SimpleLogger()
cm_orig, report_orig = perform_model_analysis(
    model=model_knn,
    X_test=X_test,
    y_test=y_test,
    logger=logger,
    title="K-Nearest Neighbors - Dataset Original",
    plot=True,
    save_fig=False
) # Does not work

cm_adv, report_adv = perform_model_analysis(
    model=model_rf,
    X_test=results_knn['X_adv'],
    y_test=results_knn['y_attacks'],
    logger=logger,
    title="K-Nearest Neighbors - Dataset Adverserial",
    plot=True,
    save_fig=False
)