In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import copy
from sklearn.cluster import KMeans
from sklearn.metrics import classification_report, accuracy_score, recall_score, f1_score, roc_auc_score
from shared.utils import load_data
from datasets import preprocess_dataset
from sklearn.preprocessing import StandardScaler
from intrusion_detection_systems.models import cnn_model, mlp_model, rnn_model
from intrusion_detection_systems import train_dl_model, evaluate_dl_model
from agent.MAB import MultiArmedBanditDLThompsonSampling

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
name = "CIC-IDS_2017_2"
df = load_data(
            [
                "./shared/data/CIC_2017/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv",
                # "./shared/data/CIC_2017/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv",
                # "./shared/data/CIC_2017/Friday-WorkingHours-Morning.pcap_ISCX.csv",
                # "./shared/data/CIC_2017/Monday-WorkingHours.pcap_ISCX.csv",
                # "./shared/data/CIC_2017/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv"
                # "./shared/data/CIC_2017/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv",
                # "./shared/data/CIC_2017/Tuesday-WorkingHours.pcap_ISCX.csv"
            ],
            42
        )
print("Dataset loaded")
df_preprocessed = preprocess_dataset(
    df, save=True, dataset_type="CIC_2017", seed=42, load=False, name_save=name, name_load=name)
print("Dataset preprocessed")

X_train_full, y_train_full = df_preprocessed.x_train, df_preprocessed.y_train
X_test_full, y_test_full = df_preprocessed.x_test, df_preprocessed.y_test
print(f"Train full shape: {X_train_full.shape}, labels distribution: {np.unique(y_train_full, return_counts=True)}")
print(f"Test full shape: {X_test_full.shape}, labels distribution: {np.unique(y_test_full, return_counts=True)}")
y_train_full = np.array([int(str(x).strip()) for x in y_train_full])
y_test_full = np.array([int(str(x).strip()) for x in y_test_full])


Dataset loaded
Loading new data
labels: {'DDoS'}
Dataset preprocessed
Train full shape: (158021, 68), labels distribution: (array(['0', '1'], dtype=object), array([68402, 89619], dtype=int64))
Test full shape: (67724, 68), labels distribution: (array(['0', '1'], dtype=object), array([29316, 38408], dtype=int64))


In [3]:
input_dim = X_train_full.shape[1]
#MLP Model
mlp_model_pt = mlp_model(input_dim)
criterion_mlp_model_pt = nn.CrossEntropyLoss()
optimizer_mlp_model_pt = optim.Adam(mlp_model_pt.parameters(), lr=0.001)

print("--- MLP Model ---")
print(mlp_model_pt)
print(f"Criterion: {criterion_mlp_model_pt}")
print(f"Optimizer: {optimizer_mlp_model_pt}")
# CNN Model
cnn_model_pt = cnn_model(input_dim)
criterion_cnn_model_pt = nn.CrossEntropyLoss()
optimizer_cnn_model_pt = optim.Adam(cnn_model_pt.parameters(), lr=0.001)

print("--- CNN Model ---")
print(cnn_model_pt)
print(f"Criterion: {criterion_cnn_model_pt}")
print(f"Optimizer: {optimizer_cnn_model_pt}")
# RNN Model
rnn_model_pt = rnn_model(input_dim) 
criterion_rnn_model_pt = nn.CrossEntropyLoss()
optimizer_rnn_model_pt = optim.Adam(rnn_model_pt.parameters(), lr=0.001)

print("--- RNN Model (LSTM) ---")
print(rnn_model_pt)
print(f"Criterion: {criterion_rnn_model_pt}")
print(f"Optimizer: {optimizer_rnn_model_pt}")

--- MLP Model ---
MLPModel(
  (fc1): Linear(in_features=68, out_features=128, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=2, bias=True)
)
Criterion: CrossEntropyLoss()
Optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)
--- CNN Model ---
CNNModel(
  (conv1): Conv1d(1, 64, kernel_size=(5,), stride=(1,))
  (relu1): ReLU()
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(64, 128, kernel_size=(5,), stride=(1,))
  (relu2): ReLU()
  (pool2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Linear(in_features=1792, out_features=2, bias=True)
)
Criterion: CrossEntropyLoss()
Optimizer: Adam

In [4]:
SUB_TRAIN = min(1000, len(X_train_full))
SUB_TEST = min(2000, len(X_test_full))
# Sử dụng mẫu nhỏ hoặc toàn bộ dữ liệu
X_train, y_train = X_train_full[:SUB_TRAIN], y_train_full[:SUB_TRAIN]
X_test, y_test = X_test_full[:SUB_TEST], y_test_full[:SUB_TEST]
# Nếu muốn dùng toàn bộ dữ liệu, bỏ dòng trên và dùng:
# X_train, y_train = X_train_full, y_train_full
# X_test, y_test = X_test_full, y_test_full
print("📌 Nhãn của tập huấn luyện (y_train):", np.unique(y_train, return_counts=True))
print("📌 Nhãn của tập kiểm thử (y_test):", np.unique(y_test, return_counts=True))

📌 Nhãn của tập huấn luyện (y_train): (array([0, 1]), array([423, 577], dtype=int64))
📌 Nhãn của tập kiểm thử (y_test): (array([0, 1]), array([ 855, 1145], dtype=int64))


In [5]:
# =====================
# MAB Training and Evaluation
# =====================
arms = [mlp_model(input_dim), cnn_model(input_dim), rnn_model(input_dim)]
mab = MultiArmedBanditDLThompsonSampling(
    arms=arms,
    n_clusters=3,
    train_fn=train_dl_model,
    eval_fn=evaluate_dl_model,
    device=device
)

mab.train(X_train, y_train)  

y_pred, arm_selected = mab.predict(X_test)

print(classification_report(y_test, y_pred))
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"ROC AUC Score: {roc_auc}")



Cluster 0: 85 samples
Training Arm 0 (MLPModel) on Cluster 0

--- Training MLPModel ---
Epoch [1/50], Train Loss: 0.6987, Train Acc: 0.3765, Val Loss: 0.6957, Val Acc: 0.3765
Epoch [2/50], Train Loss: 0.6957, Train Acc: 0.3765, Val Loss: 0.6928, Val Acc: 0.3765
Epoch [3/50], Train Loss: 0.6928, Train Acc: 0.3765, Val Loss: 0.6899, Val Acc: 0.7412
Epoch [4/50], Train Loss: 0.6899, Train Acc: 0.7412, Val Loss: 0.6869, Val Acc: 0.7176
Epoch [5/50], Train Loss: 0.6869, Train Acc: 0.7176, Val Loss: 0.6840, Val Acc: 0.6941
Epoch [6/50], Train Loss: 0.6840, Train Acc: 0.6941, Val Loss: 0.6811, Val Acc: 0.6941
Epoch [7/50], Train Loss: 0.6811, Train Acc: 0.6941, Val Loss: 0.6782, Val Acc: 0.6824
Epoch [8/50], Train Loss: 0.6782, Train Acc: 0.6824, Val Loss: 0.6752, Val Acc: 0.6588
Epoch [9/50], Train Loss: 0.6752, Train Acc: 0.6588, Val Loss: 0.6724, Val Acc: 0.6235
Epoch [10/50], Train Loss: 0.6724, Train Acc: 0.6235, Val Loss: 0.6695, Val Acc: 0.6235
Epoch [11/50], Train Loss: 0.6695, Train 