In [33]:
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
# tf.compat.v1.disable_eager_execution()
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from art.estimators.classification import KerasClassifier, SklearnClassifier
from art.attacks.evasion import HopSkipJump, ZooAttack, ProjectedGradientDescent, CarliniL2Method
from art.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM, Input
from shared.utils import load_data
from datasets import preprocess_dataset, datasets_types
from tensorflow import keras


seed = 42


In [None]:
name = "CIC-IDS_2017_2"
df = load_data(
            [
                "./shared/data/CIC_2017/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv",
                "./shared/data/CIC_2017/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv",
                # "./shared/data/CIC_2017/Tuesday-WorkingHours.pcap_ISCX.csv"
            ],
            seed
        )
print("Dataset loaded")
df_preprocessed = preprocess_dataset(
    df, save=True, dataset_type="CIC_2017", seed=seed, load=False, name_save=name, name_load=name)
print("Dataset preprocessed")

Dataset cargado
Loading new data
labels: {'Web Attack � XSS', 'Infiltration', 'Web Attack � Brute Force', 'Web Attack � Sql Injection'}
Dataset Preprocesado


In [54]:
x_train = df_preprocessed.x_train
y_train = df_preprocessed.y_train
x_test = df_preprocessed.x_test
y_test = df_preprocessed.y_test

y_train = y_train.apply(lambda x: int(x))
y_test = y_test.apply(lambda x: int(x))

print(y_train.unique())
print(y_test.unique())

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

[0 1]
[0 1]
(321277, 70)
(321277,)
(137691, 70)
(137691,)


In [55]:
attacks_data = x_test[y_test == 1]
normal_data = x_test[y_test == 0]
print(attacks_data.shape, normal_data.shape)

(665, 70) (137026, 70)


In [56]:
# MLP Model
mlp_model = Sequential()
mlp_model.add(Dense(128, input_dim=x_train.shape[1], activation='relu'))
mlp_model.add(Dense(64, activation='relu'))
mlp_model.add(Dense(2, activation='softmax'))  # Update to have 2 output units for binary classification

mlp_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Use categorical_crossentropy for multi-class

# Convert y_train and y_test to one-hot encoding
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

# Train the MLP model
mlp_model.fit(x_train, y_train_one_hot, epochs=3, batch_size=32, verbose=1)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x22c46169e48>

In [57]:
# CNN Model
cnn_model = Sequential()
cnn_model.add(Conv1D(64, 5, activation='relu', input_shape=(x_train.shape[1], 1)))  # Assuming 1D data
cnn_model.add(MaxPooling1D(2))
cnn_model.add(Conv1D(128, 5, activation='relu'))
cnn_model.add(MaxPooling1D(2))
cnn_model.add(Flatten())
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dense(2, activation='softmax'))  # Update to have 2 output units for binary classification

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Use categorical_crossentropy for multi-class

# Reshaping data for CNN (as CNN expects 3D input)
x_train_cnn = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test_cnn = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

# Convert y_train and y_test to one-hot encoding
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

# Train the CNN model
cnn_model.fit(x_train_cnn, y_train_one_hot, epochs=1, batch_size=32, verbose=1)

Epoch 1/1


<keras.callbacks.History at 0x22c462035c8>

In [65]:
# RNN Model using LSTM
rnn_model = Sequential()
rnn_model.add(LSTM(64, input_shape=(x_train.shape[1], 1), return_sequences=False))
rnn_model.add(Dense(32, activation='relu'))
rnn_model.add(Dense(2, activation='softmax'))  # Update to have 2 output units for binary classification

rnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Use categorical_crossentropy for multi-class

# Reshaping data for RNN
x_train_rnn = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test_rnn = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

# Train the RNN model
rnn_model.fit(x_train_rnn, y_train_one_hot, epochs=1, batch_size=32, verbose=1)


Epoch 1/1


<keras.callbacks.History at 0x22c46d7cfc8>

In [58]:
FUNCTIONAL_FEATURES = [
 ' min_seg_size_forward',' Bwd Header Length',' Destination Port'
 'Init_Win_bytes_forward',' Init_Win_bytes_backward',' Bwd Packets/s'
 'Total Length of Fwd Packets',' Subflow Fwd Bytes',' Max Packet Length'
 'Bwd Packet Length Max',' Avg Bwd Segment Size',' Bwd Packet Length Mean'
 ' Fwd Packet Length Max',' Average Packet Size',' Packet Length Std'
 ' Packet Length Mean',' Bwd Packet Length Std',' Bwd Packet Length Min'
 ' Fwd Packet Length Std',' Fwd Packet Length Min',' Min Packet Length'
 ' Fwd Packet Length Mean',' Avg Fwd Segment Size',' act_data_pkt_fwd'
 ' Total Fwd Packets','Subflow Fwd Packets',' Total Backward Packets']
print(len(FUNCTIONAL_FEATURES))
FUNCTIONAL_FEATURES_IDEXES = [df.columns.get_loc(c) for c in df.columns if c not in FUNCTIONAL_FEATURES][:-1]
print(FUNCTIONAL_FEATURES_IDEXES, len(FUNCTIONAL_FEATURES_IDEXES))
NON_FUNCTIONAL_FEATURES_IDEXES = [df.columns.get_loc(c) for c in df.columns if c in FUNCTIONAL_FEATURES]
print(NON_FUNCTIONAL_FEATURES_IDEXES, len(NON_FUNCTIONAL_FEATURES_IDEXES))
print(len(df.columns))

19
[0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 53, 56, 57, 58, 60, 62, 63, 64, 65, 66, 67, 68] 58
[3, 7, 13, 33, 50, 51, 52, 54, 55, 59, 61] 11
70


In [59]:
class Classifier(KerasClassifier):
    
    def __init__(self, model, clip_values=None, preprocessing=(0, 1), attacks=[]):
        # Wrap model into ART classifier
        super(Classifier, self).__init__(model=model, clip_values=clip_values, preprocessing=preprocessing)
        self._attacks = attacks

    def predict(self, x, **kwargs):
        # Set attacks features to X
        for i in FUNCTIONAL_FEATURES_IDEXES:
            for j in range(len(x)):
                x[j][i] = self._attacks[j][i]
        predictions = self._model.predict(x)
        return to_categorical(predictions, num_classes=self._get_nb_classes())

In [68]:
def prepare_input_for_model(model, x_data):
    from tensorflow import keras  # hoặc `import keras` nếu bạn dùng keras thường
    first_layer = type(model.layers[0]).__name__.lower()

    if 'dense' in first_layer:
        return x_data.reshape((x_data.shape[0], -1))  # MLP
    elif 'conv1d' in first_layer:
        return x_data.reshape((x_data.shape[0], x_data.shape[1], 1))  # CNN
    elif 'lstm' in first_layer:
        return x_data.reshape((x_data.shape[0], x_data.shape[1], 1)) 
    else:
        raise ValueError(f"Không rõ loại mô hình với lớp đầu tiên: {first_layer}")


In [None]:
def create_art_classifier(model):
    # Wrap model into ART classifier
    classifier = KerasClassifier(model=model, clip_values=(0, 1), use_logits=False)
    return classifier

# Create Carlini-Wagner L2 Attack
def create_carlini_wagner_attack(classifier):
    # Initialize Carlini L2 attack
    cw_attack = CarliniL2Method(classifier, targeted=False, max_iter=300, confidence=0, learning_rate=0.01, binary_search_steps=3)
    return cw_attack

# Train and test the model with Carlini-Wagner attack
def train_and_test_with_carlini_wagner(model, model_name, classifier, attack_data, normal_data):
    # Create Carlini-Wagner L2 attack
    cw_attack = create_carlini_wagner_attack(classifier)
    
    x_input = prepare_input_for_model(model, attack_data[:100])
    x_test_adv = cw_attack.generate(x_input)
    # Generate adversarial data using Carlini-Wagner attack
    # x_test_adv = cw_attack.generate(x_test[:10])  # Apply attack on the first 100 samples
    
    # Concatenate adversarial examples with normal data
    non_adv_x_test = np.concatenate((attack_data[:100], normal_data[:100]))
    non_adv_y_test = np.concatenate((np.ones((attack_data[:100].shape[0], 1)), np.zeros((normal_data[:100].shape[0], 1))))
    adv_x_test = np.concatenate((x_test_adv, normal_data[:100]))
    adv_y_test = np.concatenate((np.ones((x_test_adv.shape[0], 1)), np.zeros((normal_data[:100].shape[0], 1))))
    
    # Evaluate the model with the adversarial data
    print("==========> Model: ", model_name)
   # Predict for non-adversarial data
    non_adv_predictions = model.predict(non_adv_x_test)
    non_adv_pred_labels = np.argmax(non_adv_predictions, axis=1)  # If the output is probabilities
    
    # Predict for adversarial data
    adv_predictions = model.predict(adv_x_test)
    adv_pred_labels = np.argmax(adv_predictions, axis=1)

    # Convert true labels to integer type
    true_non_adv_labels = non_adv_y_test.flatten().astype(int)
    true_adv_labels = adv_y_test.flatten().astype(int)

   # Evaluate the model on non-adversarial data
    print("---------- Non adversarial data")
    print(classification_report(true_non_adv_labels, non_adv_pred_labels))
    print("Accuracy: ", accuracy_score(true_non_adv_labels, non_adv_pred_labels))
    print("Detection Rate: ", recall_score(true_non_adv_labels, non_adv_pred_labels))
    print("F1 Score: ", f1_score(true_non_adv_labels, non_adv_pred_labels))
    print("ROC AUC Score: ", roc_auc_score(true_non_adv_labels, non_adv_pred_labels))

    # Evaluate the model on adversarial data
    print("---------- Adversarial data")
    print(classification_report(true_adv_labels, adv_pred_labels))
    print("Accuracy: ", accuracy_score(true_adv_labels, adv_pred_labels))
    print("Detection Rate (Recall): ", recall_score(true_adv_labels, adv_pred_labels))
    print("F1 Score: ", f1_score(true_adv_labels, adv_pred_labels))
    print("ROC AUC Score: ", roc_auc_score(true_adv_labels, adv_pred_labels))



In [98]:
models = [mlp_model,cnn_model, rnn_model]
model_names = ['MLP', 'CNN', 'RNN']

for model, model_name in zip(models, model_names):
    # Wrap model into ART classifier
    classifier = KerasClassifier(model, clip_values=(0, 1))
    
    # Determine the data to use for each model
    if model_name == "CNN" or model_name == "RNN":
        # If the model is CNN or RNN, reshape normal_data to be 3D
        normal_data_reshaped = normal_data.reshape((normal_data.shape[0], normal_data.shape[1], 1))
        attack_data_reshaped = attacks_data.reshape((attacks_data.shape[0], attacks_data.shape[1], 1))
    else:
        # For MLP (or other models), keep normal_data as 2D
        normal_data_reshaped = normal_data
        attack_data_reshaped = attacks_data

    # Test the model with Carlini-Wagner adversarial data
    print(f"Evaluating {model_name} model with Carlini-Wagner attack...")
    train_and_test_with_carlini_wagner(model, model_name, classifier, attack_data=attack_data_reshaped, normal_data=normal_data_reshaped)
    

Evaluating MLP model with Carlini-Wagner attack...


C&W L_2: 100%|██████████| 100/100 [07:26<00:00,  4.46s/it]


---------- Non adversarial data
              precision    recall  f1-score   support

           0       0.89      1.00      0.94       100
           1       1.00      0.88      0.94       100

    accuracy                           0.94       200
   macro avg       0.95      0.94      0.94       200
weighted avg       0.95      0.94      0.94       200

Accuracy:  0.94
Detection Rate:  0.88
F1 Score:  0.9361702127659575
ROC AUC Score:  0.94
---------- Adversarial data
              precision    recall  f1-score   support

           0       0.60      1.00      0.75       100
           1       1.00      0.32      0.48       100

    accuracy                           0.66       200
   macro avg       0.80      0.66      0.62       200
weighted avg       0.80      0.66      0.62       200

Accuracy:  0.66
Detection Rate (Recall):  0.32
F1 Score:  0.48484848484848486
ROC AUC Score:  0.66
Evaluating CNN model with Carlini-Wagner attack...


C&W L_2: 100%|██████████| 100/100 [19:00<00:00, 11.40s/it]


---------- Non adversarial data
              precision    recall  f1-score   support

           0       0.92      1.00      0.96       100
           1       1.00      0.91      0.95       100

    accuracy                           0.95       200
   macro avg       0.96      0.96      0.95       200
weighted avg       0.96      0.95      0.95       200

Accuracy:  0.955
Detection Rate:  0.91
F1 Score:  0.9528795811518325
ROC AUC Score:  0.9550000000000001
---------- Adversarial data
              precision    recall  f1-score   support

           0       0.86      1.00      0.93       100
           1       1.00      0.84      0.91       100

    accuracy                           0.92       200
   macro avg       0.93      0.92      0.92       200
weighted avg       0.93      0.92      0.92       200

Accuracy:  0.92
Detection Rate (Recall):  0.84
F1 Score:  0.9130434782608696
ROC AUC Score:  0.9199999999999999
Evaluating RNN model with Carlini-Wagner attack...


C&W L_2:  35%|███▌      | 35/100 [40:46<1:15:43, 69.90s/it]


KeyboardInterrupt: 