In [7]:
# Imports
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
tf.random.set_seed(42) # Used to always train de model in a same way to make assumptions
import numpy as np
from alive_progress import alive_bar
import time
from keras import backend as K
import pickle as pkl

# Choice of the dataset & attack

In [8]:
# Dataset Loading

# Select here the Dataset and the botnet attack
'''ds = "CICIDS2018"
botnet = ["Zeus_Ares"]'''

ds = "CTU"
botnet = ["Neris", "Rbot", "Virut"]

attackindex=0 # index of botnet attack in the previous list

# Dataset loading & Pre-processing

## If not yet generated the datasets

In [11]:
# Get benign samples from the selected dataset ds
ben = pd.read_csv("DReLAB/{}/benign.csv".format(ds), index_col = 0)

n_ben = len(ben)
print(n_ben)

# load target attack traffic (for testing & adversarial instances generation)
# Get malicious samples belonging to the chosen botnet from the selected dataset
mal = pd.read_csv("DReLAB/{}/malicious/{}.csv".format(ds, botnet[attackindex]), index_col = 0)
n_mal = len(mal)
print(n_mal)

# Load dataset containing all attacks (for training)
'''mal = pd.DataFrame()
for bot in botnet:
    mal_temp = pd.read_csv("DReLAB/{}/malicious/{}.csv".format(ds, bot), index_col = 0)
    mal = pd.concat([mal, mal_temp], ignore_index = True)
    
n_mal = len(mal)
print(n_mal)'''

2582434
60632


'mal = pd.DataFrame()\nfor bot in botnet:\n    mal_temp = pd.read_csv("DReLAB/{}/malicious/{}.csv".format(ds, bot), index_col = 0)\n    mal = pd.concat([mal, mal_temp], ignore_index = True)\n    \nn_mal = len(mal)\nprint(n_mal)'

In [12]:
# Pre-processing 
# Remove the infinity value in ratio in out when in and out bytes = 0, replaced by 0 because 0/0

ben.loc[(ben["OutBytes"] == 0) & (ben["InBytes"] == 0), "RatioOutIn"] = 0
mal.loc[(mal["OutBytes"] == 0) & (mal["InBytes"] == 0), "RatioOutIn"] = 0

# Obtain the dataset in a 1:1 benign/malicious ratio
if (n_ben // 1) >= n_mal:
    dataset_ben = ben.sample(n_mal * 1)
    dataset = pd.concat([dataset_ben, mal], ignore_index = True)
    print("Benign in dataset: {}".format(len(dataset_ben)))
    print("Malicious in dataset: {}".format(len(mal)))
    print("Ben / Mal Ratio: {}".format(len(dataset_ben) / len(mal)))
    
else:
    dataset_mal = mal.sample(n_ben // 1)
    dataset = pd.concat([ben, dataset_mal], ignore_index = True)
    print("Benign in dataset: {}".format(len(ben)))
    print("Malicious in dataset: {}".format(len(dataset_mal)))
    print("Ben / Mal Ratio: {}".format(len(ben) / len(dataset_mal)))
    
# Data Splitting, No cross validation because too slow due to the big dataset and model complexity 
# Split general dataset to provide separate dataset for attacker and defender
defender_dataset, attacker_dataset = train_test_split(dataset, random_state=42, stratify=dataset.Label, shuffle=True, test_size=0.5)

# Datasets saving 
pkl.dump(defender_dataset, open(ds + '/datasets/<attack_name> or <training>/defender_dataset.pkl', 'wb'))
pkl.dump(attacker_dataset, open(ds + '/datasets/<attack_name> or <training>/attacker_dataset.pkl', 'wb'))

Benign in dataset: 60632
Malicious in dataset: 60632
Ben / Mal Ratio: 1.0


OSError: [Errno 22] Invalid argument: 'CTU/datasets/<attack_name> or <training>/defender_dataset.pkl'

## If datasets already saved

In [13]:
# Datasets Loading
defender_dataset = pkl.load(open(ds + '/datasets/training/defender_dataset.pkl', 'rb'))
attacker_dataset = pkl.load(open(ds + '/datasets/training/attacker_dataset.pkl', 'rb'))

# Substitute = Attacker
# Split x_train and x_test to train and evaluate substitute and defender models 
X_train_defender, X_test_defender, y_train_defender, y_test_defender = train_test_split(defender_dataset.drop(columns = ["Label"]), defender_dataset.Label, test_size=0.25)
X_train_substitute, X_test_substitute, y_train_substitute, y_test_substitute = train_test_split(attacker_dataset.drop(columns = ["Label"]), attacker_dataset.Label, test_size=0.25)

# Recover the complete test datasets with labels for the aversarial instances generation and evaluation
test_defender = pd.concat([X_test_defender, y_test_defender], axis=1)
test_substitute = pd.concat([X_test_substitute, y_test_substitute], axis=1)
train_defender = pd.concat([X_train_defender, y_train_defender], axis=1)

In [14]:
# Pre-processing

## Data Normalization
# https://stackoverflow.com/questions/49444262/normalize-data-before-or-after-split-of-training-and-testing-data
# Data normalized on the corresponding training set

# For attacker DNN
scaler = MinMaxScaler()
scaler.fit(X_train_substitute.to_numpy()) # to numpy to avoid the warning later when we predict with a numpy instead of dataframe
X_train_substitute_scaled = scaler.transform(X_train_substitute)
X_test_substitute_scaled = scaler.transform(X_test_substitute)  # normalize test set on training set

# For defender DNN
scaler2 = MinMaxScaler()
scaler2.fit(X_train_defender.to_numpy())
X_train_defender_scaled = scaler2.transform(X_train_defender)
X_test_defender_scaled = scaler2.transform(X_test_defender) # normalize test set on training set



# Models Initialization & Training

In [17]:
# Model 1 initialization and training - DNN (DEFENDER)

# One-hot encoding and convertion into tensors because TensorFlow DNN
y_train_defender_ohe = pd.get_dummies(y_train_defender)
y_train_defender_ohe_tf = tf.convert_to_tensor(y_train_defender_ohe, np.float32)

y_test_defender_ohe = pd.get_dummies(y_test_defender)
y_test_defender_ohe_tf = tf.convert_to_tensor(y_test_defender_ohe, np.float32)

# Calculate the weights for each class so that we can balance the data. Safe umbalanced data
# new loss function
# https://stackoverflow.com/questions/43390162/class-weights-in-binary-classification-model-with-keras
def weighted_binary_crossentropy( y_true, y_pred, weight=1. ) :
    y_true = K.clip(y_true, K.epsilon(), 1-K.epsilon())
    y_pred = K.clip(y_pred, K.epsilon(), 1-K.epsilon())
    logloss = -(y_true * K.log(y_pred) * weight + (1 - y_true) * K.log(1 - y_pred))
    return K.mean( logloss, axis=-1)

output_number = 2
eval_metric = 'categorical_accuracy'
activ_out = 'softmax'
neurons_number = 256
lr = 0.01
features_number = X_train_defender_scaled.shape[1]

model = tf.keras.Sequential([
    tf.keras.layers.Dense(neurons_number, input_shape=(features_number,), activation="relu"),
    tf.keras.layers.Dense(neurons_number, activation="relu"),
    tf.keras.layers.Dense(output_number, activation=activ_out)
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
    loss = weighted_binary_crossentropy,
    metrics=[eval_metric]
)

model.fit(x=X_train_defender_scaled, y=y_train_defender_ohe_tf, epochs=10, batch_size=100, verbose=1)
model.evaluate(x=X_test_defender_scaled, y=y_test_defender_ohe_tf, verbose=1)

# Model saving 
tf.keras.models.save_model(model, ds + '/models/dnndefender')

# Uncomment all before if you already trained the model
# Model Loading
# model = tf.keras.models.load_model(ds + '/models/dnndefender', custom_objects={'weighted_binary_crossentropy': weighted_binary_crossentropy})

# Evaluation
y_pred = model.predict(X_test_defender_scaled)
y_pred_vect = np.argmax(y_pred,1)
print(classification_report(y_true=y_test_defender, y_pred=y_pred_vect))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: CTU/models/dnndefender\assets


ValueError: Classification metrics can't handle a mix of multilabel-indicator and binary targets

In [None]:
# Model Initialization and training - Random Forest (DEFENDER)

# Bootstrap = True by default (split dataset to train sub model with sub dataset)
# Max feature = sqrt(nb feature), means that when it can, the best split is sqrt(f)
model2 = RandomForestClassifier(n_estimators = 200, n_jobs = -1, random_state=0)

# No need to scale data as in Neural Network
model2.fit(X_train_defender, y_train_defender)

# Model saving 
pkl.dump(model2, open(ds + '/models/rfdefender.pkl', 'wb'))

# Uncomment all before if you already trained the model
# Model Loading
# model2 = pkl.load(open(ds + '/models/rfdefender.pkl', 'rb'))

# Evaluation
pred = model2.predict(X_test_defender)

# Compute f1, precision and recall score.
matrix = classification_report(y_true=y_test_defender, y_pred=pred)
print(matrix)

In [None]:
# Model 3 - ADABoost (DEFENDER)
model3 = AdaBoostClassifier(n_estimators=200, learning_rate=0.5, random_state=0)
model3.fit(X_train_defender, y_train_defender)

# Model saving 
pkl.dump(model3, open(ds + '/models/adaboostdefender.pkl', 'wb'))

# Uncomment all before if you already trained the model
# Model Loading
# model3 = pkl.load(open(ds + '/models/adaboostdefender.pkl', 'rb'))

# Evaluation
pred = model3.predict(X_test_defender)
matrix = classification_report(y_true=y_test_defender, y_pred=pred)
print(matrix)

In [None]:
# Model 4 - KNN (DEFENDER)

model4 = KNeighborsClassifier(n_neighbors=3)
model4.fit(X_train_defender, y_train_defender)

# Model saving 
pkl.dump(model4, open(ds + '/models/knndefender.pkl', 'wb'))

# Uncomment all before if you already trained the model
# Model Loading
# model4 = pkl.load(open(ds + '/models/knndefender.pkl', 'rb'))

# Evaluation
pred = model4.predict(X_test_defender)
matrix = classification_report(y_true=y_test_defender, y_pred=pred)
print(matrix)

In [None]:
# Model 5 - Logistic regression (DEFENDER)
model5 = LogisticRegression(max_iter=1000, penalty="l1", solver="saga", random_state=0) # solver lbfgs dans paramètres
model5.fit(X_train_defender_scaled, y_train_defender)

# Model saving 
pkl.dump(model5, open(ds + '/models/lrdefender.pkl', 'wb'))

# Uncomment all before if you already trained the model
# Model Loading
# model5 = pkl.load(open(ds + '/models/lrdefender.pkl', 'rb'))

# Evaluation
pred = model5.predict(X_test_defender_scaled)
matrix = classification_report(y_true=y_test_defender, y_pred=pred)
print(matrix)

In [None]:
# Model 6 -  Decision Tree (DEFENDER)
model6 = DecisionTreeClassifier(criterion="entropy", random_state=0)
model6.fit(X_train_defender, y_train_defender)

# Model saving 
pkl.dump(model6, open(ds + '/models/dtdefender.pkl', 'wb'))

# Uncomment all before if you already trained the model
# Model Loading
# model6 = pkl.load(open(ds + '/models/dtdefender.pkl', 'rb'))

# Evaluation
pred = model6.predict(X_test_defender)
matrix = classification_report(y_true=y_test_defender, y_pred=pred)
print(matrix)

## Important - Do the same with different meta-parameters for the attacker with their corresponding dataset

In [19]:
# Model 7 initialization and training - DNN (ATTACKER)

# Pre-processing for the training
y_train_substitute_ohe = pd.get_dummies(y_train_substitute)
y_train_substitute_ohe_tf = tf.convert_to_tensor(y_train_substitute_ohe, np.float32)

y_test_substitute_ohe = pd.get_dummies(y_test_substitute)
y_test_substitute_ohe_tf = tf.convert_to_tensor(y_test_substitute_ohe, np.float32)

# Calculate the weights for each class so that we can balance the data. Safe umbalanced data
# new loss function
# https://stackoverflow.com/questions/43390162/class-weights-in-binary-classification-model-with-keras
def weighted_binary_crossentropy( y_true, y_pred, weight=1. ) :
    y_true = K.clip(y_true, K.epsilon(), 1-K.epsilon())
    y_pred = K.clip(y_pred, K.epsilon(), 1-K.epsilon())
    logloss = -(y_true * K.log(y_pred) * weight + (1 - y_true) * K.log(1 - y_pred))
    return K.mean( logloss, axis=-1)

output_number = 2
eval_metric = 'categorical_accuracy'
activ_out = 'softmax'
neurons_number = 128
lr = 0.01
features_number = X_train_substitute_scaled.shape[1]

model7 = tf.keras.Sequential([
    tf.keras.layers.Dense(neurons_number, input_shape=(features_number,), activation="relu"),
    tf.keras.layers.Dense(neurons_number, activation="relu"),
    tf.keras.layers.Dense(neurons_number, activation="relu"),
    tf.keras.layers.Dense(output_number, activation=activ_out)
])
model7.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
    loss = weighted_binary_crossentropy,
    metrics=[eval_metric]
)


model7.fit(x=X_train_substitute_scaled, y=y_train_substitute_ohe_tf, epochs=10, batch_size=100, verbose=1)
model7.evaluate(x=X_test_substitute_scaled, y=y_test_substitute_ohe_tf, verbose=1)

# Model saving 
tf.keras.models.save_model(model7, ds + '/models/dnnattacker')

# Model Loading
# model7 = tf.keras.models.load_model(ds + '/models/dnnattacker', custom_objects={'weighted_binary_crossentropy': weighted_binary_crossentropy})

y_pred = model7.predict(X_test_substitute_scaled)
y_pred_vect = np.argmax(y_pred,1)
print(classification_report(y_true=y_test_substitute, y_pred=y_pred_vect))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: CTU/models/dnnattacker\assets
              precision    recall  f1-score   support

           0       0.99      0.93      0.96     15135
           1       0.93      0.99      0.96     14974

    accuracy                           0.96     30109
   macro avg       0.96      0.96      0.96     30109
weighted avg       0.96      0.96      0.96     30109



# Evasion attack - Adversarial algorithm execution

## Configuration of the different settings for the generation

In [20]:
# Give the dataset where perform adversarial generation. Defender dataset is used for the defense
dataset_input = test_substitute # From the attacker

# Choose the model to generate adv example as parameter (depends of your chosen dataset)
model_input = model7

# The mask combinaisons that can be adapted in function of the studied domain. Depends of the manipulable factors
combinaisons = [ # Don't take [0000] because not relevant, so 7 combinaisons. [0001] = duration, [0010] = totpkt et [1100] in/outbytes (most difficult to modify)
    [0,0,0,1],
    [0,0,1,0],
    [0,0,1,1],
    [0,1,0,0],
    [0,1,0,1],
    [0,1,1,0],
    [0,1,1,1],
    [1,0,0,0],
    [1,0,0,1],
    [1,0,1,0],
    [1,0,1,1],
    [1,1,0,0],
    [1,1,0,1],
    [1,1,1,0],
    [1,1,1,1]
]

ratio_mean_out_in = []
dif_mean_out_in = []

vect = np.array([1.,1.])

# Define max value of each modified feature in the general dataset to project too big values on these max
# It's the max value of the attacker or defender dataset, specified in parameter
max_dur = dataset_input['Dur'].max()
max_pkts = dataset_input['TotPkts'].max()
max_out = dataset_input['OutBytes'].max()
max_in = dataset_input['InBytes'].max()

# Used to generate Adv Ex. Don't take the label to generate adv ex
ben_dataset = dataset_input.loc[dataset_input['Label'] == 0]
mal_dataset = dataset_input.loc[dataset_input['Label'] == 1]

# For the first method - Mean ratio used to have the distance between benign and malicious traffic
ratio_mean_dur = mal_dataset[['Dur']].mean() / ben_dataset[['Dur']].mean()
ratio_mean_dur = ratio_mean_dur[0]
ratio_mean_pkts = mal_dataset[['TotPkts']].mean() / ben_dataset[['TotPkts']].mean()
ratio_mean_pkts = ratio_mean_pkts[0]
ratio_mean_out = mal_dataset[['OutBytes']].mean() / ben_dataset[['OutBytes']].mean()
ratio_mean_out_in.append(ratio_mean_out[0])
ratio_mean_in = mal_dataset[['InBytes']].mean() / ben_dataset[['InBytes']].mean()
ratio_mean_out_in.append(ratio_mean_in[0])

# Mean determination to determine during peturbation generation the direction of the perturbation (negative or positive)
ben_mean_dur = ben_dataset['Dur'].mean()
ben_mean_pkts = ben_dataset['TotPkts'].mean()
ben_mean_out = ben_dataset['OutBytes'].mean()
ben_mean_in = ben_dataset['InBytes'].mean()

# For the second method, Mean difference used to have the Euclidian distance between benign and malicious traffic
# We take the absolute value to avoid apposite perturbation
dif_mean_dur = ben_dataset[['Dur']].mean() - mal_dataset[['Dur']].mean()
dif_mean_dur = abs(dif_mean_dur[0])
dif_mean_pkts = ben_dataset[['TotPkts']].mean() - mal_dataset[['TotPkts']].mean()
dif_mean_pkts = abs(dif_mean_pkts[0])
dif_mean_out = ben_dataset[['OutBytes']].mean() - mal_dataset[['OutBytes']].mean()
dif_mean_out_in.append(abs(dif_mean_out[0]))
dif_mean_in = ben_dataset[['InBytes']].mean() - mal_dataset[['InBytes']].mean()
dif_mean_out_in.append(abs(dif_mean_in[0]))

# Reduce the dataset for the tests to speed up the generation. Just take 10K instances here
mal_dataset_reduced, not_used = train_test_split(mal_dataset.drop(columns = ["Label"]), shuffle=True, train_size=(10000/mal_dataset.shape[0]), random_state=42)

## Adversarial algorithms

In [None]:
# For CICIDS

adv_ex = []
total_ex = []

tot_nb_of_steps = [] # Used to know what the needed mean steps to create an adversarial example
nb_of_needed_step = 0

tot_masks = []# Used to know what the most used mask to create an adversarial example
index_of_mask = 0

max_ratio = dataset_input['RatioOutIn'].max() # Max value in RatioOutIn for the semantic constraints

# compute the time taken
start = time.process_time()
with alive_bar(len(mal_dataset_reduced)) as bar:
    # For each malicious instance
    for index, row in mal_dataset_reduced.iterrows():
        breaked = False
        perturb_direction = []
        
        # Check the direction of perturbation for the 4 instance features
        if(row[1] <= ben_mean_out):
            perturb_direction.append(1)
        else:
            perturb_direction.append(-1)
            
        if(row[2] <= ben_mean_in):
            perturb_direction.append(1)
        else:
            perturb_direction.append(-1)
            
        if(row[3] <= ben_mean_pkts):
            perturb_direction.append(1)
        else:
            perturb_direction.append(-1)    
            
        if(row[0] <= ben_mean_dur): 
            perturb_direction.append(1)
        else:
            perturb_direction.append(-1)
        
        # Max 6 iterations of iterative perturbation to try to get benign instance. Can be changed (ex: 10 to have 10 - 100%) for the second method
        for i in range(1, 7):
            nb_of_needed_step += 1 # start directly at the round 1
            # Iterate while not benign 
            if(breaked==False):
                # For each 7 combinations of perturbations
                for combi in combinaisons:
                    index_of_mask += 1 # check which mask is used
                    # add perturbation to the autorized features
                    
                    adv = np.array(row)
                    
                    # First method taking the mean ratio between attack and benign datasets to determine perturbation
                    # Factor C here must be really more important               
                    '''perturb1 = np.array(combi[0:2]) * (vect * ratio_mean_out_in * i * perturb_direction[0:2]) # Verify for CICIDS
                    perturb2 = np.array(combi[2]) * (ratio_mean_pkts * i * perturb_direction[2])
                    perturb3 = np.array(combi[3]) * (ratio_mean_dur * i * perturb_direction[3])'''
                    
                    # Second method taking the mean difference
                    
                    perturb1 = np.array(combi[0:2]) * (vect * dif_mean_out_in * (i*0.2) * perturb_direction[0:2])  # dunno how to avoid the error without vect
                    perturb2 = np.array(combi[2]) * (dif_mean_pkts * (i*0.05) * perturb_direction[2])
                    perturb3 = np.array(combi[3]) * (dif_mean_dur * (i*0.003) * perturb_direction[3])
                    
                    adv[1:3] = adv[1:3] + perturb1
                    adv[3] = adv[3] + perturb2
                    adv[0] = adv[0] + perturb3
                    
                    # Syntactic Constraints
                    # Add projection on the max value present in the dataset to keep the physical limitation
                    if(adv[0] > max_dur):
                        adv[0] = max_dur
                    if(adv[2] > max_out):
                        adv[2] = max_out
                    if(adv[1] > max_in):
                        adv[1] = max_in
                    if(adv[3] > max_pkts):
                        adv[3] = max_pkts
                                
                    # Add the semantic contraints
                    # Total number of Bytes in the communication. Sum of OutBytes and InBytes feature values.
                    adv[4] = adv[1]+adv[2]
                    # Average number of bytes exchanged per packet. Ratio between TotBytes and TotPkts.
                    adv[5] = adv[4]/adv[3]
                    # Average number of bytes exchanged per second. Ratio between TotBytes and Duration.
                    adv[6] = adv[4]/adv[0]
                    # Average number of packets exchanged per second. Ratio between TotPkts and Duration.
                    adv[7] = adv[3]/adv[0]
                    
                    # Ratio between OutBytes and InBytes
                    if(adv[1] == 0 and adv[2] != 0):
                        adv[8] = max_ratio # It's the maximum value in the dataset to replace the infinity value
                    # If In and Out = 0, ratio is 0. 0/0 (Maybe not necessary)
                    if(adv[1] == 0 and adv[2] == 0):
                        adv[8] = 0
                    # Ratio by default when inbytes has a value x/y
                    if(adv[1] != 0):
                        adv[8] = adv[2]/adv[1]
                    # if there is new bytes, normaly there is also at least 1 packet 
                    if(adv[3] == 0 and adv[4] > 0):
                        adv[3] = 1 
                    
                    adv2 = [] # used to fit with the input of the model because normaly take a matrix, so need the matrix notation, even for a vector
                    adv2.append(adv)
                    
                    adv2_scaled = scaler.transform(adv2) # For DNN, if not the case, must be commented
                    test = model_input.predict(adv2_scaled) # For DNN
                    test = np.argmax(test,1) # For DNN
                    #test = model_input.predict(adv2) # For other model than DNN
                    if (test == 0): # benign break
                        adv_ex.append(adv) # adv_ex contains all adversarial examples that fool the classifier
                        breaked = True
                        tot_masks.append(index_of_mask)
                        tot_nb_of_steps.append(nb_of_needed_step)
                        break
            index_of_mask = 0
        nb_of_needed_step = 0            
        total_ex.append(adv) # Total adversarial examples. append the final created adv ex that fool or not 
        bar()

end = time.process_time()-start
print("Time taken to generate: " + str(end) + " seconds")

In [21]:
# For CTU

adv_ex = []
total_ex = []

tot_nb_of_steps = [] # Used to know what the needed mean steps to create an adversarial example
nb_of_needed_step = 0

tot_masks = []# Used to know what the most used mask to create an adversarial example
index_of_mask = 0

max_ratio = dataset_input['RatioOutIn'].max() # Max value in RatioOutIn for the semantic constraints
start = time.process_time()

with alive_bar(len(mal_dataset_reduced)) as bar:
    # For each malicious instance
    for index, row in mal_dataset_reduced.iterrows():
        breaked = False
        perturb_direction = []
        
        # Check the direction of perturbation for the 4 instance features
        if(row[6] <= ben_mean_out): # Out
            perturb_direction.append(1)
        else:
            perturb_direction.append(-1)
            
        if(row[7] <= ben_mean_in): # In
            perturb_direction.append(1)
        else:
            perturb_direction.append(-1)
            
        if(row[8] <= ben_mean_pkts): # TotPkts
            perturb_direction.append(1)
        else:
            perturb_direction.append(-1)    
            
        if(row[2] <= ben_mean_dur): # Dur
            perturb_direction.append(1)
        else:
            perturb_direction.append(-1)
        
        # Max 6 iterations of iterative perturbation to try to get benign instance
        for i in range(1, 7):
            nb_of_needed_step += 1 # start directly at the round 1
            # Iterate while not benign 
            if(breaked==False):
                # For each 7 combinations of perturbations
                for combi in combinaisons:
                    index_of_mask += 1 # check which mask is used
                    # add perturbation to the autorized features
                    
                    adv = np.array(row)
                    
                    # First method taking the mean ratio between attack and benign datasets to determine perturbation
                    '''perturb1 = np.array(combi[0:2]) * (vect * ratio_mean_out_in * (i*5) * perturb_direction[0:2])
                    perturb2 = np.array(combi[2]) * (ratio_mean_pkts * (i*5) * perturb_direction[1])
                    perturb3 = np.array(combi[3]) * (ratio_mean_dur * (i*5) * perturb_direction[2])'''
                    
                    # Second method taking the mean difference
                    perturb1 = np.array(combi[0:2]) * (vect * dif_mean_out_in * (i*0.06) * perturb_direction[0:2])
                    perturb2 = np.array(combi[2]) * (dif_mean_pkts * (i*0.25) * perturb_direction[1])
                    perturb3 = np.array(combi[3]) * (dif_mean_dur * (i*0.01) * perturb_direction[2])
                    
                    # Addition of crafted perturbation
                    adv[7] = adv[7] + perturb1[1] # InBytes
                    adv[6] = adv[6] + perturb1[0] # OutBytes
                    adv[2] = adv[2] + perturb3 # Duration
                    adv[8] = adv[8] + perturb2 # Tot Packets # cast in INT to keep only the integer value
                    
                    # Syntactic Constraints
                    # Add projection on the max value present in the dataset to keep the physical limitation
                    if(adv[2] > max_dur):
                        adv[2] = max_dur
                    if(adv[6] > max_out):
                        adv[6] = max_out
                    if(adv[7] > max_in):
                        adv[7] = max_in
                    if(adv[8] > max_pkts):
                        adv[8] = max_pkts
                    
                    # Add the Semantic Contraints
                    # Total number of Bytes in the communication. Sum of OutBytes and InBytes feature values.
                    adv[9] = adv[6]+adv[7] # TotBytes
                    # Average number of bytes exchanged per packet. Ratio between TotBytes and TotPkts.
                    adv[11] = adv[9]/adv[8] # BytesPerPkt
                    # Average number of bytes exchanged per second. Ratio between TotBytes and Duration.
                    adv[10] = adv[9]/adv[2] # BytesPerSec
                    # Average number of packets exchanged per second. Ratio between TotPkts and Duration.
                    adv[12] = adv[8]/adv[2]
                    
                    # Ratio between OutBytes and InBytes
                    if(adv[7] == 0 and adv[6] != 0):
                        adv[13] = max_ratio # It's the maximum value in the dataset to replace the undefined value when x/0
                    # If In and Out = 0, ratio is 0. 0/0 (Maybe not necessary)
                    if(adv[7] == 0 and adv[6] == 0):
                        adv[13] = 0
                    # Ratio by default when inbytes has a value x/y
                    if(adv[7] != 0):
                        adv[13] = adv[6]/adv[7] # Ratio Out/In
                    # if there is new bytes, normaly there is also at least 1 packet
                    if(adv[8] == 0 and adv[9] > 0):
                        adv[8] = 1 # Maybe change this part
                        
                    adv2 = [] # used to fit with the input of the model because normaly take a matrix, so need the matrix notation, even for a vector
                    adv2.append(adv)
                    
                    adv2_scaled = scaler.transform(adv2) # For DNN
                    test = model_input.predict(adv2_scaled)
                    test = np.argmax(test,1) # For DNN
                    #test = model_input.predict(adv2) # For other model than DNN
                    if (test == 0): # benign break
                        adv_ex.append(adv) # adv_ex contains all adversarial examples that fool the classifier
                        breaked = True
                        tot_masks.append(index_of_mask)
                        tot_nb_of_steps.append(nb_of_needed_step)
                        break
            index_of_mask = 0
        nb_of_needed_step = 0            
        total_ex.append(adv) # Total adversarial examples. append the final created adv ex that fool or not 
        bar()
        
end = time.process_time()-start
print("Time taken to generate: " + str(end) + " seconds")

|▎⚠︎                                      | (!) 61/10000 [1%] in 46.5s (1.31/s) 


KeyboardInterrupt: 

In [None]:
# Evaluation 

# Adversarial prediction on DNN Defender

mean_steps = np.mean(tot_nb_of_steps)
print("The mean steps needed to generate adv examples is: " + str(round(mean_steps,1)))

# total_ex_pd = pd.DataFrame(total_ex, columns = mal_dataset.columns) # To see distributions
truevalue = np.ones(len(total_ex)) # Recover the true value (all 1 because all malicious)
total_ex_scaled = scaler.transform(total_ex) # For DNN
pred = model.predict(total_ex_scaled) 
# pred = model.predict(total_ex)
pred = np.argmax(pred,1) # For DNN

matrix = classification_report(y_true=truevalue, y_pred=pred)
print(matrix)

# Distribution of the steps to generate adv ex

plt.rcParams.update({'figure.figsize':(7,5), 'figure.dpi':100})

# Plot Histogram
plt.hist(tot_nb_of_steps, bins=30)
plt.gca().set(title='Step distribution', ylabel='Frequency');

# Distribution of masks used to generate adv ex

# Plot Histogram
plt.hist(tot_masks, bins=30)
plt.gca().set(title='Mask distribution', ylabel='Frequency');

# Compute the average perturbation rate
perturb_diff = total_ex - mal_dataset_reduced.to_numpy()
perturb_diff_mean = np.mean(perturb_diff,axis=0)
perturb_diff_max = np.max(perturb_diff,axis=0)

In [22]:
# Post processing + Exports adversarial and clean sets (for attacker)
# Convert to Pandas dataframe
adv_instances = pd.DataFrame(adv_ex, columns = mal_dataset_reduced.columns)
# Relabelisation of adversarial and non-adversarial instances
labelised_adv_instances = adv_instances.assign(Label=1)
# Save Adversarial dataset        
pkl.dump(labelised_adv_instances, open(ds + '/defense/defender_test_adv_instances.pkl', 'wb'))

# Concat clean (malicious + benign dataset) dataset
clean_instances = pd.concat([ben_dataset.drop(columns = ["Label"])[:1250], mal_dataset_reduced[:1250]], ignore_index = True)
# Relabelisation of adversarial and non-adversarial instances
labelised_clean_instances = clean_instances.assign(Label=0)
# Save dataset
pkl.dump(labelised_clean_instances, open(ds + '/defense/defender_test_clean_instances.pkl', 'wb'))

# Exports distributions of attack
pkl.dump(tot_nb_of_steps, open(ds + '/defense/attacker_nb_steps.pkl', 'wb'))
pkl.dump(tot_masks, open(ds + '/defense/attacker_masks.pkl', 'wb'))

# Adversarial instances recovering - ONLY USE TO RECOVER ADV INSTANCES
adv_ex = pkl.load(open(ds + '/defense/mlp/attacker_adv_instances.pkl', 'rb'))
adv_ex = adv_ex.drop(columns = ["Label"])
total_ex = adv_ex

FileNotFoundError: [Errno 2] No such file or directory: 'CTU/defense/mlp/attacker_adv_instances.pkl'

In [None]:
# Exports instances for the defense

# Exports benign sets (for defender)
# TRAIN
# pkl.dump(ben_dataset.drop(columns = ["Label"])[:4000], open(ds + '/defense/benign_train_instances.pkl', 'wb'))

# TEST
# pkl.dump(ben_dataset.drop(columns = ["Label"])[:1000], open(ds + '/defense/benign_test_instances.pkl', 'wb'))

# Exports adversarial and malicious sets (for defender)

# Labelised after, when all adversarial examples are generated

# export malicious set (TRAIN)
pkl.dump(mal_dataset_reduced[:2000], open(ds + '/defense/'+botnet[attackindex]+'/train_mal_instances.pkl', 'wb'))
 
# export adversarial set (TRAIN)
pkl.dump(adv_ex, open(ds + '/defense/'+botnet[attackindex]+'/train_adv_instances.pkl', 'wb'))      

# export malicious set (TEST)
#pkl.dump(mal_dataset_reduced[:500], open(ds + '/defense/'+botnet[attackindex]+'/test_mal_instances.pkl', 'wb'))

# export adversarial set (TEST)
#pkl.dump(adv_ex, open(ds + '/defense/'+botnet[attackindex]+'/test_adv_instances.pkl', 'wb'))  

# Post processing, replace train by test or the reverse
mal_instances_all = pd.DataFrame()

# malicious concat
for bot in botnet:
    mal_instances = pkl.load(open(ds + '/defense/'+bot+'/test_mal_instances.pkl', 'rb'))
    mal_instances_all = pd.concat([mal_instances_all, mal_instances], ignore_index = True)
    
# benign concat + clean_instances label
ben_instances = pkl.load(open(ds + '/defense/test_benign_instances.pkl', 'rb'))

clean_instances = pd.concat([mal_instances_all, ben_instances], ignore_index = True)

labelised_clean_instances = clean_instances.assign(Label=0)

pkl.dump(labelised_clean_instances, open(ds + '/defense/defender_test_clean_instances.pkl', 'wb'))


adv_instances_all = pd.DataFrame()

# adv instances concat
for bot in botnet:
    adv_instances = pkl.load(open(ds + '/defense/'+bot+'/test_adv_instances.pkl', 'rb'))
    adv_instances_pd = pd.DataFrame(adv_instances, columns = mal_instances_all.columns)
    adv_instances_all = pd.concat([adv_instances_all, adv_instances_pd], ignore_index=True)
    
labelised_adv_instances = adv_instances_all.assign(Label=1)

pkl.dump(labelised_adv_instances, open(ds + '/defense/defender_test_adv_instances.pkl', 'wb'))

# Defense - Adversarial detection

In [None]:
# Initialization

# For CTU and BOTNET
'''modifiablefeature = [2,6,7,8]
dependantfeature = [9,10,11,12,13]
nonmodifiablefeature = [0,1,3,4,5,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37]'''

# For CICIDS2018
modifiablefeature = [0,1,2,3]
dependantfeature = [4,5,6,7,8]
nonmodifiablefeature = [9,10,11,12]

feature_map = []
feature_map.append(modifiablefeature)
feature_map.append(dependantfeature)
feature_map.append(nonmodifiablefeature)

In [None]:
# Preprocessing for training and testing (Defender)
# Recover train and test set containing adversarial and non-adversarial instances
labelised_adv_instances_train = pkl.load(open(ds + '/defense/defender_train_adv_instances.pkl', 'rb'))
labelised_clean_instances_train = pkl.load(open(ds + '/defense/defender_train_clean_instances.pkl', 'rb'))

labelised_adv_instances_test = pkl.load(open(ds + '/defense/defender_test_adv_instances.pkl', 'rb'))
labelised_clean_instances_test = pkl.load(open(ds + '/defense/defender_test_clean_instances.pkl', 'rb'))

# Concatenation of adversarial and non-adversarial instances
labelised_instances_train = pd.concat([labelised_adv_instances_train, labelised_clean_instances_train], ignore_index = True)
labelised_instances_train = shuffle(labelised_instances_train)

labelised_instances_test = pd.concat([labelised_adv_instances_test, labelised_clean_instances_test], ignore_index = True)
labelised_instances_test = shuffle(labelised_instances_test)

# Adversarial and non-adversarial X_train of the defender to train
X_train_adv = labelised_instances_train.drop(columns = ["Label"])
y_train_adv = labelised_instances_train.Label

# Adversarial and non-adversarial X_test of the defender to evaluate
X_test_adv = labelised_instances_test.drop(columns = ["Label"])
y_test_adv = labelised_instances_test.Label

# Normalization
scaler3 = MinMaxScaler()
scaler3.fit(X_train_adv.to_numpy())
X_train_adv_scaled = scaler3.transform(X_train_adv)
X_test_adv_scaled = scaler3.transform(X_test_adv) 

In [None]:
# Preprocessing for testing (Attacker)
labelised_adv_instances_attacker_test = pkl.load(open(ds + '/attack/' + botnet[attackindex] + '/rf/attacker_adv_instances.pkl', 'rb'))
labelised_clean_instances_attacker_test = pkl.load(open(ds + '/attack/' + botnet[attackindex] + '/rf/attacker_clean_instances.pkl', 'rb'))

#labelised_instances_attacker_test = pd.concat([labelised_adv_instances_attacker_test, labelised_clean_instances_attacker_test], ignore_index = True)
#labelised_instances_attacker_test = shuffle(labelised_instances_attacker_test)

X_test_adv_attacker = labelised_adv_instances_attacker_test.drop(columns = ["Label"])
y_test_adv_attacker = labelised_adv_instances_attacker_test.Label

X_test_adv_attacker_scaled = scaler2.transform(X_test_adv_attacker)

In [None]:
#%% Clusters Training - MLP

all_cluster = []

y_train_adv_ohe = pd.get_dummies(y_train_adv)
y_train_adv_ohe_tf = tf.convert_to_tensor(y_train_adv_ohe, np.float32)

y_test_adv_ohe = pd.get_dummies(y_test_adv)
y_test_adv_ohe_tf = tf.convert_to_tensor(y_test_adv_ohe, np.float32)

for features in feature_map:
    # Take feature for each cluster
    X_train_defender_scaled_cluster = X_train_adv_scaled[:, features]
    X_test_defender_scaled_cluster = X_test_adv_scaled[:, features] 
        
    output_number = 2
    eval_metric = 'categorical_accuracy'
    loss_fn = tf.keras.losses.BinaryCrossentropy()
    activ_out = 'softmax'
    neurons_number = 256
    lr = 0.01
    features_number = X_train_defender_scaled_cluster.shape[1]
    
    cluster = tf.keras.Sequential([
        tf.keras.layers.Dense(neurons_number, input_shape=(features_number,), activation="relu"),
        tf.keras.layers.Dense(neurons_number, activation="relu"),
        # tf.keras.layers.Dense(neurons_number, activation="relu"),
        tf.keras.layers.Dense(output_number, activation=activ_out)
    ])
    cluster.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        loss = weighted_binary_crossentropy,
        metrics=[eval_metric]
    )
    
    
    cluster.fit(x=X_train_defender_scaled_cluster, y=y_train_adv_ohe_tf, epochs=10, batch_size=100, verbose=1)
    cluster.evaluate(x=X_test_defender_scaled_cluster, y=y_test_adv_ohe_tf, verbose=1)
    
    all_cluster.append(cluster)
    
# Model saving 
# pkl.dump(all_cluster, open('detectordefender.pkl', 'wb'))

# Clusters evaluation

# Model Loading
# all_cluster = pkl.load(open('detectordefender.pkl', 'rb'))

recalls = []

i = 0
# Evaluation
for cluster in all_cluster:
    X_test_defender_scaled_cluster = X_test_adv_scaled[:, feature_map[i]] 
    y_pred = cluster.predict(X_test_defender_scaled_cluster)
    y_pred_vect = np.argmax(y_pred,1)
    print(classification_report(y_true=y_test_adv, y_pred=y_pred_vect))
    metric_matrix = classification_report(output_dict = True, y_true=y_test_adv, y_pred=y_pred_vect)
    # extract recall and round to 2 after ,
    recalls.append(round(metric_matrix['weighted avg']['recall'],2))
    
    i+=1
    
# Recall extraction to set defensive wieghts (as importance of recall)
defensive_weights = recalls
   
    
#%% Attack part - Contextual Discounting (Cluster - Output decision)

# set must be chosen (defender adv test to evaluate or attacker adv)
X_adv_scaled = X_test_adv_attacker_scaled
y_adv = y_test_adv_attacker

'''X_adv_scaled = X_test_adv_scaled
y_adv = y_test_adv'''

cluster_pred = []

i = 0
for cluster in all_cluster:
    # Multiplication of probs by their defensive weights
    X_defender_scaled_cluster = X_adv_scaled[:, feature_map[i]] 
    y_pred = cluster.predict(X_defender_scaled_cluster)
    y_pred = y_pred * defensive_weights[i]
    
    cluster_pred.append(y_pred)
    
    i+=1
    
    
#%% Scaling to have 100% using Bayes theorem for the fusion 

all_fusion = []

# Bayesian fusion, sum of clusters probabilities 
preds = sum(cluster_pred)

# Normalization, for each prediction summed before, normalization in function of the class importance
for pred in preds:
    fusion = pred/sum(pred)
    all_fusion.append(fusion)

# Take only the class number (transform logits in decision)
detector_predicted_class = np.argmax(all_fusion, axis=1)
    
# General evaluation of the detector after fusion

print(classification_report(y_true=y_adv, y_pred=detector_predicted_class)) 

In [None]:
#%% Evaluation on the defender IDS with detector

# Extract all adversarial instances that passed the detector

passed_adv_ex = y_adv == detector_predicted_class
passed_adv_ex = passed_adv_ex[passed_adv_ex==False]

# Recover instances from dataset
joined_passed_adv_ex = X_test_adv_attacker.join(passed_adv_ex, how='inner')

# Relabel as malicious (normaly IDS will classify this instance as benign, but not sure with transerability)
joined_passed_adv_ex = joined_passed_adv_ex.assign(Label=1)

# Concat with original test set
new_X_test_defender = pd.concat([joined_passed_adv_ex, test_defender], ignore_index = True)
new_X_test_defender = shuffle(new_X_test_defender)

# Extract Label
new_y_test_defender = new_X_test_defender.Label

# Evaluate on the Defender MLP (IDS)
model = tf.keras.models.load_model(ds + '/models/dnndefender', custom_objects={'weighted_binary_crossentropy': weighted_binary_crossentropy})

# Evaluation
new_X_test_defender_scaled = scaler2.transform(new_X_test_defender.drop(columns = ["Label"])) 

y_pred = model.predict(new_X_test_defender_scaled)
y_pred_vect = np.argmax(y_pred,1)
print(classification_report(y_true=new_y_test_defender, y_pred=y_pred_vect))