In [None]:
import os
import sys
from datetime import datetime

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split

import matplotlib.pyplot as plt

root_dir = os.getcwd().split("AdversarialNIDS")[0] + "AdversarialNIDS"
sys.path.append(root_dir)

from scripts.logger import LoggerManager
from scripts.analysis.model_analysis import perform_model_analysis

from CICIDS2017.dataset import CICIDS2017

from scripts.models.pytorch.MLP import NetworkIntrusionMLP
from scripts.models.pytorch.visualization import display_loss
from scripts.models.pytorch.train import train

from scripts.models.random_forest.random_forest import train_random_forest

from attacks.substitut import attack_substitut
from attacks.fgsm import attack

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

lm = LoggerManager(
    root_dir=root_dir,
    log_name="test_substitut",
)
logger = lm.get_logger()
title = lm.get_title()
logger.info(f"Logger initialized for '{title}'")

In [None]:
dataset = CICIDS2017(
    dataset_size="full", # "full" or "small"
    logger=logger
).optimize_memory().encode()

In [None]:
multi_class = False

X_train, X_test, y_train, y_test = dataset.subset(size=800*1000, multi_class=multi_class).split(
    one_hot=True,
    test_size=0.3,
    apply_smote=True
)

num_classes = y_train.shape[1]

Initialisation des donn√©es


In [None]:
# Train Random Forest using standalone function and plot CV scores
logger.info("Training Random Forest with cross-validation using train_random_forest...")
rf_model, cv_scores = train_random_forest(
    X_train,
    y_train,
    n_estimators=10,
    max_depth=10,
    min_samples_split=5,
    min_samples_leaf=2,
    max_features=None,
    random_state=0,
    cv=5,
    class_weight=None,
    logger=None
)

if cv_scores != None:
    print("\n" + "="*50)
    print("CROSS-VALIDATION RESULTS")
    print("="*50)
    print(f"CV Scores: {cv_scores}")
    print(f"Mean CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})")
    # Plot CV scores
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(cv_scores)+1), cv_scores, marker='o', markersize=10, linewidth=2, color='green')
    plt.axhline(y=cv_scores.mean(), color='r', linestyle='--', label=f'Mean: {cv_scores.mean():.4f}')
    plt.xlabel('Fold', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.title('Decision Tree Cross-Validation Scores', fontsize=14, fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

Evaluation of the Random Forest Before attack

In [None]:
# Evaluate model
cm_rf, cr = perform_model_analysis(
    model=rf_model,
    X_test=X_test,
    y_test=y_test,
    num_classes=num_classes,
    logger=logger,
    title="RandomForest before adversarial attack",
    root_dir=root_dir,
    plot=True
)

Generation of the substitut for the attack

In [None]:
sub, cm, cr = attack_substitut(
    model=rf_model,
    X_test=X_test,
    y_test=y_test,
    model_name="RandomForest Substitut",
    root_dir=root_dir,
    plot_analysis=True,
    plot_loss=True,
    save_fig=True,
    logger=lm.logger,
    device=device
)

* * *
# Attack
* * *

In [None]:
x_adv = torch.FloatTensor(X_test).clone().detach().requires_grad_(True).to(device)
criterion = nn.CrossEntropyLoss()

# Targeting the 'benign' class
target = torch.LongTensor([0]).repeat(x_adv.shape[0]).to(device)

x_adv = attack(
    model=sub,
    x_adv=x_adv,
    target=target,
    X_train=torch.FloatTensor(X_train),
    dataset=dataset,
    logger=logger,
    device=device
)

In [None]:
cm_rf, cr = perform_model_analysis(
    model=sub,
    X_test=x_adv,
    y_test=y_test,
    num_classes=num_classes,
    logger=logger,
    title="RandomForest Substitut after adversarial attack",
    root_dir=root_dir,
    plot=True
)

In [None]:
cm_rf, cr = perform_model_analysis(
    model=rf_model,
    X_test=x_adv.cpu().numpy(),
    y_test=y_test,
    num_classes=num_classes,
    logger=logger,
    title="RandomForest after adversarial attack",
    root_dir=root_dir,
    plot=True
)

In [None]:
X_train, X_test_rob, y_train, y_test_rob = dataset.subset(size=800*1000, multi_class=multi_class).split(
    one_hot=True,
    test_size=0.2,
    apply_smote=True
)

X_train_rob = np.concatenate([X_train, x_adv.cpu().numpy()], axis=0)
y_train_rob = np.concatenate([y_train, y_test], axis=0)

In [None]:
# Train Random Forest using standalone function and plot CV scores
logger.info("Training Random Forest with cross-validation using train_random_forest...")
rf_model_robuste, cv_scores = train_random_forest(
    X_train_rob,
    y_train_rob,
    n_estimators=10,
    max_depth=10,
    min_samples_split=5,
    min_samples_leaf=2,
    max_features=None,
    random_state=0,
    cv=5,
    class_weight=None,
    logger=None
)

if cv_scores != None:
    print("\n" + "="*50)
    print("CROSS-VALIDATION RESULTS")
    print("="*50)
    print(f"CV Scores: {cv_scores}")
    print(f"Mean CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})")
    # Plot CV scores
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(cv_scores)+1), cv_scores, marker='o', markersize=10, linewidth=2, color='green')
    plt.axhline(y=cv_scores.mean(), color='r', linestyle='--', 
                label=f'Mean: {cv_scores.mean():.4f}')
    plt.xlabel('Fold', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.title('Decision Tree Cross-Validation Scores', fontsize=14, fontweight='bold')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

In [None]:
cm_rf, cr = perform_model_analysis(
    model=rf_model_robuste,
    X_test=X_test_rob,
    y_test=y_test_rob,
    num_classes=num_classes,
    logger=logger,
    title="RandomForest Robust after adversarial training",
    root_dir=root_dir,
    plot=True
)

In [None]:
sub, cm, cr = attack_substitut(
    model=rf_model,
    X_test=X_test,
    y_test=y_test,
    root_dir=root_dir,
    logger=lm.logger,
    model_name="RandomForest Robust Substitut",
    plot_analysis=True,
    plot_loss=True,
    save_fig=True,
    device=device
)

In [None]:
x_adv = torch.FloatTensor(X_test_rob).clone().detach().requires_grad_(True).to(device)
criterion = nn.CrossEntropyLoss()

# Targeting the 'benign' class
target = torch.LongTensor([0]).repeat(x_adv.shape[0]).to(device)

x_adv = attack(
    model=sub,
    x_adv=x_adv,
    target=target,
    X_train=torch.FloatTensor(X_train_rob),
    dataset=dataset,
    logger=logger,
    device=device
)

In [None]:
cm_rf, cr = perform_model_analysis(
    model=sub,
    X_test=x_adv,
    y_test=y_test_rob,
    num_classes=num_classes,
    logger=logger,
    title="RandomForest Robust Substitut after adversarial attack",
    root_dir=root_dir,
    plot=True
)

In [None]:
cm_rf, cr = perform_model_analysis(
    model=rf_model,
    X_test=x_adv.cpu().numpy(),
    y_test=y_test_rob,
    num_classes= num_classes,
    logger=logger,
    title="RandomForest Robust after adversarial attack",
    root_dir=root_dir,
    plot=True
)