In [36]:
from scapy.all import *
import sys
import codecs
import re
import numpy as np
from numpy import newaxis
from numpy import savetxt
import string
from nltk.corpus import stopwords 
import time
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import tensorflow as tf
tf.compat.v1.experimental.output_all_intermediates(True)

from joblib import dump, load

from sklearn.metrics import classification_report
from art.attacks.evasion import ProjectedGradientDescent
from art.attacks.evasion import ZooAttack
from art.attacks.evasion import FastGradientMethod
from art.attacks.evasion import CarliniL2Method
from art.attacks.evasion import SaliencyMapMethod
from art.attacks.evasion import DecisionTreeAttack

import sklearn
from art.estimators import BaseEstimator
from art.estimators.classification import SklearnClassifier
from art.estimators.classification import XGBoostClassifier
from art.estimators.classification import KerasClassifier
from art.estimators.classification import EnsembleClassifier
from art.estimators.classification.scikitlearn import ScikitlearnRandomForestClassifier


from itertools import islice
from tqdm import tqdm
import pandas as pd
import numpy as np
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
tf.compat.v1.disable_eager_execution()


In [10]:
# Import normal Dataset
def import_dataset():
    with open('./Dataset/csv/Original/Attack_merge.csv', newline='') as csvfile:
        rows = pd.read_csv(csvfile,header=None)
        y = rows[10]
        x = rows.drop([10], axis=1)
        X_train, X_test, y_train, y_test = train_test_split(x, y , test_size=0.2)
        X_train = np.array(X_train)
        X_test = np.array(X_test)
        y_train = np.array(y_train)
        y_test = np.array(y_test)
        y_train_ex = y_train[:, newaxis]
        X_train_ex = X_train[:, :, newaxis]
        y_test_ex = y_test[:, newaxis]
        X_test_ex = X_test[:, :, newaxis]
    return X_train, X_test, y_train, y_test, X_train_ex, X_test_ex, y_train_ex, y_test_ex

In [3]:
# Import Model
def import_model():
    DT = load("./models/DT.joblib")
    RF = load("./models/RF.joblib")
    LR = load("./models/LR.joblib")
    XGB = load("./models/XGB.joblib") 
    SVM = load ("./models/SVM.joblib")
    KNN = load("./models/KNN.joblib")
    DNN = tf.keras.models.load_model("./models/DNN4.h5")
    CNN = tf.keras.models.load_model("./models/CNN.h5")
    LSTM = tf.keras.models.load_model("./models/LSTM.h5")
    return DT, RF, LR, XGB, SVM, KNN, DNN, CNN, LSTM

In [None]:
def DTA(model, model_, X_test, X_test_ex):
    print("######## DTA_", model_," ########")
    DT_adv = SklearnClassifier(model)
    attack_DT = DecisionTreeAttack(DT_adv)
    adv = attack_DT.generate(X_test)
    if not os.path.exists("./Dataset/csv/Attacked"):
        os.mkdir("./Dataset/csv/Attacked")

    adv_i = np.rint(adv[:,1:])
    adv_int = np.insert(adv_i, 0, adv[:,0], axis=1)
    np.savetxt("./Dataset/csv/Attacked/DTA_DT_X_Test.csv", adv_int, delimiter=",")
    return adv

In [None]:
def CW(model, model_, X_test, X_test_ex):
    print("######## CW_", model_," ########")
    if model_ =='SVM':
        model_adv = SklearnClassifier(model, clip_values=(0, 10))
    elif model_ == 'LR' or model_ == 'DT':
        model_adv = SklearnClassifier(model)
    elif model_ == 'RF':
        model_adv = ScikitlearnRandomForestClassifier(model)
    elif model_ == 'XGB':
        model_adv = XGBoostClassifier(model=model, nb_features=X_test.shape[1], nb_classes=10)
    else:
        model_adv = KerasClassifier(model, clip_values=(0, 1))

    attack_CW = CarliniL2Method(model_adv, max_iter=20, verbose=True)

    if model_ == "CNN" or model_ =="LSTM":
        adv = attack_CW.generate(X_test_ex)
    else:
        adv = attack_CW.generate(X_test)
    if not os.path.exists("./Dataset/csv/Attacked"):
        os.mkdir("./Dataset/csv/Attacked")
    
    adv_i = np.rint(adv[:,1:])
    adv_int = np.insert(adv_i, 0, adv[:,0], axis=1)
    if model_ == "CNN" or model_ =="LSTM":
            np.savetxt("./Dataset/csv/Attacked/CW_" + model_ +"_X_Test.csv", adv_int[:,:,0], delimiter=",")
    else:
        np.savetxt("./Dataset/csv/Attacked/CW_" + model_ +"_X_Test.csv", adv_int, delimiter=",")
    return adv

In [16]:
def JSMA(model, model_, X_test, X_test_ex):
    print("######## JSMA_",model_," ########")
    if model_ =='SVM':
        model_adv = SklearnClassifier(model, clip_values=(0, 10))
    elif model_ == 'LR' or model_ == 'DT':
        model_adv = SklearnClassifier(model)
    elif model_ == 'RF':
        model_adv = ScikitlearnRandomForestClassifier(model)
    elif model_ == 'XGB':
        model_adv = XGBoostClassifier(model=model, nb_features=X_test.shape[1], nb_classes=10)
    else:
        model_adv = KerasClassifier(model, clip_values=(0, 1))
        
    attack_JSMA = SaliencyMapMethod(classifier=model_adv)

    if model_ == "CNN" or model_ =="LSTM":
        adv = attack_JSMA.generate(X_test_ex)
    else:
        adv = attack_JSMA.generate(X_test)

    if not os.path.exists("./Dataset/csv/Attacked"):
        os.mkdir("./Dataset/csv/Attacked")
    
    adv_i = np.rint(adv[:,1:])
    adv_int = np.insert(adv_i, 0, adv[:,0], axis=1)
    if model_ == "CNN" or model_ =="LSTM":
            np.savetxt("./Dataset/csv/Attacked/JSMA_" + model_ +"_X_Test.csv", adv_int[:,:,0], delimiter=",")
    else:
        np.savetxt("./Dataset/csv/Attacked/JSMA_" + model_ +"_X_Test.csv", adv_int, delimiter=",")
    return adv

In [15]:
def PGD(model, model_, X_test, X_test_ex):
    print("######## PGD_",model_," ########")
    if model_ =='SVM':
        model_adv = SklearnClassifier(model, clip_values=(0, 10))
    elif model_ == 'LR' or model_ == 'DT':
        model_adv = SklearnClassifier(model)
    elif model_ == 'RF':
        model_adv = ScikitlearnRandomForestClassifier(model)
    elif model_ == 'XGB':
        model_adv = XGBoostClassifier(model=model, nb_features=X_test.shape[1], nb_classes=10)
    else:
        model_adv = KerasClassifier(model, clip_values=(0, 1))

    attack_PGD = ProjectedGradientDescent(estimator=model_adv, norm=np.inf, eps=.3, eps_step=0.1, max_iter=20, 
                               targeted=False, num_random_init=0, batch_size=128, verbose=False)
    
    if model_ == "CNN" or model_ =="LSTM":
        adv = attack_PGD.generate(X_test_ex)
    else:
        adv = attack_PGD.generate(X_test)

    if not os.path.exists("./Dataset/csv/Attacked"):
        os.mkdir("./Dataset/csv/Attacked")
    
    adv_i = np.rint(adv[:,1:])
    adv_int = np.insert(adv_i, 0, adv[:,0], axis=1)
    if model_ == "CNN" or model_ =="LSTM":
        np.savetxt("./Dataset/csv/Attacked/PGD_" + model_ +"_X_Test.csv", adv_int[:,:,0], delimiter=",")
    else:
        np.savetxt("./Dataset/csv/Attacked/PGD_" + model_ +"_X_Test.csv", adv_int, delimiter=",")
    return adv

In [31]:
def ZOO(model, model_, X_test, X_test_ex):
    print("######## ZOO_",model_," ########")
    if model_ =='SVM':
        model_adv = SklearnClassifier(model, clip_values=(0, 10))
    elif model_ == 'LR' or model_ == 'DT':
        model_adv = SklearnClassifier(model)
    elif model_ == 'RF':
        model_adv = ScikitlearnRandomForestClassifier(model)
    elif model_ == 'XGB':
        model_adv = XGBoostClassifier(model=model, nb_features=X_test.shape[1], nb_classes=10)
    else:
        model_adv = KerasClassifier(model, clip_values=(0, 1))

    attack_ZOO = ZooAttack(classifier=model_adv, confidence=0.0, targeted=False, learning_rate=1e-1, max_iter=20,
                    binary_search_steps=10, initial_const=1e-3, abort_early=True, use_resize=False, 
                    use_importance=False, nb_parallel=1, batch_size=1, variable_h=0.2)
    
    if model_ == "CNN" or model_ =="LSTM":
        adv = attack_ZOO.generate(X_test_ex)
    else:
        adv = attack_ZOO.generate(X_test)

    if not os.path.exists("./Dataset/csv/Attacked"):
        os.mkdir("./Dataset/csv/Attacked")
    
    adv_i = np.rint(adv[:,1:])
    adv_int = np.insert(adv_i, 0, adv[:,0], axis=1)
    if model_ == "CNN" or model_ =="LSTM":
        np.savetxt("./Dataset/csv/Attacked/ZOO_" + model_ +"_X_Test.csv", adv_int[:,:,0], delimiter=",")
    else:
        np.savetxt("./Dataset/csv/Attacked/ZOO_" + model_ +"_X_Test.csv", adv_int, delimiter=",")
    return adv

In [18]:
def FGSM(model, model_, X_test, X_test_ex):
    print("######## FGSM_",model_," ########")
    if model_ =='SVM' or model_ == "RF" :
        model_adv = SklearnClassifier(model, clip_values=(0, 10))
    elif model_ == 'LR':
        model_adv = SklearnClassifier(model)
    elif model_ == 'RF':
        model_adv = ScikitlearnRandomForestClassifier(model)
    elif model_ == 'XGB':
        model_adv = XGBoostClassifier(model)
    elif model_ == 'KNN':
        model_adv = BaseEstimator(model)
    else:
        model_adv = KerasClassifier(model, clip_values=(0, 1))

    attack_FGSM = FastGradientMethod(estimator=model_adv, eps=0.3)
    if model_ == "CNN" or model_ == "LSTM":
        adv = attack_FGSM.generate(X_test_ex)
    else:
        adv = attack_FGSM.generate(X_test)

    if not os.path.exists("./Dataset/csv/Attacked"):
        os.mkdir("./Dataset/csv/Attacked")
    
    adv_i = np.rint(adv[:,1:])
    adv_int = np.insert(adv_i, 0, adv[:,0], axis=1)
    if model_ == "CNN" or model_ =="LSTM":
        np.savetxt("./Dataset/csv/Attacked/FGSM_" + model_ +"_X_Test.csv", adv_int[:,:,0], delimiter=",")
    else:
        np.savetxt("./Dataset/csv/Attacked/FGSM_" + model_ +"_X_Test.csv", adv_int, delimiter=",")
    return adv

In [35]:
X_train, X_test, y_train, y_test, X_train_ex, X_test_ex, y_train_ex, y_test_ex = import_dataset()
DT, RF, LR, XGB, SVM, KNN, DNN, CNN, LSTM = import_model()
#DTA FGSM(完成 快) CW(很久) JSMA(跑條跑不出來) PGD(完成) ZOO(很久 ing)



In [39]:
savetxt("./Dataset/csv/Attacked/Original_X_Test.csv", X_test, delimiter=",")
savetxt("./Dataset/csv/Attacked/Original_y_Test.csv", y_test, delimiter=",")

In [41]:
DTA(DT,"DT",X_test)

######## DTA_ DT  ########


Decision tree attack: 100%|██████████| 44730/44730 [00:16<00:00, 2791.89it/s]


array([[ 5.15089962e-05,  3.30000000e+02,  1.30000000e+01, ...,
         1.30000000e+01,  1.71000000e+02,  0.00000000e+00],
       [-9.98948000e-04,  4.87000000e+02,  0.00000000e+00, ...,
         0.00000000e+00,  1.69501000e+02,  0.00000000e+00],
       [ 5.15089962e-05,  3.30000000e+02,  1.30000000e+01, ...,
         1.30000000e+01,  1.71000000e+02,  0.00000000e+00],
       ...,
       [-9.98948000e-04,  4.87000000e+02,  0.00000000e+00, ...,
         0.00000000e+00,  1.69501000e+02,  0.00000000e+00],
       [ 5.15089962e-05,  1.65001000e+02,  0.00000000e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [ 5.15089962e-05,  3.30000000e+02,  1.30000000e+01, ...,
         1.30000000e+01,  1.71000000e+02,  0.00000000e+00]])

In [42]:
FGSM(LR, "LR", X_test, X_test_ex) 
FGSM(SVM, "SVM", X_test, X_test_ex) 
#FGSM(KNN, "KNN", X_test, X_test_ex)?
FGSM(DNN, "DNN", X_test, X_test_ex) 
FGSM(CNN, "CNN", X_test, X_test_ex) 
FGSM(LSTM, "LSTM", X_test, X_test_ex) 

######## FGSM_ LR  ########
######## FGSM_ SVM  ########
######## FGSM_ DNN  ########




######## FGSM_ CNN  ########
######## FGSM_ LSTM  ########


array([[[0.000e+00],
        [3.297e+02],
        [1.270e+01],
        ...,
        [1.270e+01],
        [1.707e+02],
        [0.000e+00]],

       [[0.000e+00],
        [4.867e+02],
        [0.000e+00],
        ...,
        [0.000e+00],
        [0.000e+00],
        [0.000e+00]],

       [[0.000e+00],
        [3.297e+02],
        [1.270e+01],
        ...,
        [1.270e+01],
        [1.707e+02],
        [0.000e+00]],

       ...,

       [[0.000e+00],
        [4.867e+02],
        [0.000e+00],
        ...,
        [0.000e+00],
        [0.000e+00],
        [0.000e+00]],

       [[0.000e+00],
        [0.000e+00],
        [0.000e+00],
        ...,
        [3.000e-01],
        [3.000e-01],
        [3.000e-01]],

       [[0.000e+00],
        [3.297e+02],
        [1.270e+01],
        ...,
        [1.270e+01],
        [1.707e+02],
        [0.000e+00]]], dtype=float32)

In [43]:
CW(LR, "LR", X_test, X_test_ex) 
CW(SVM, "SVM", X_test, X_test_ex)  
#CW(KNN, "KNN", X_test, X_test_ex) ?

######## CW_ LR  ########


C&W L_2: 100%|██████████| 44730/44730 [1:56:53<00:00,  6.38it/s]  


######## CW_ SVM  ########


C&W L_2: 100%|██████████| 44730/44730 [1:46:40<00:00,  6.99it/s]


array([[1.491070e-03, 3.300000e+02, 1.300000e+01, ..., 1.300000e+01,
        1.710000e+02, 0.000000e+00],
       [4.080000e-05, 4.870000e+02, 0.000000e+00, ..., 0.000000e+00,
        0.000000e+00, 0.000000e+00],
       [5.171150e-03, 3.300000e+02, 1.300000e+01, ..., 1.300000e+01,
        1.710000e+02, 0.000000e+00],
       ...,
       [1.410000e-05, 4.870000e+02, 0.000000e+00, ..., 0.000000e+00,
        0.000000e+00, 0.000000e+00],
       [1.366854e-03, 0.000000e+00, 0.000000e+00, ..., 0.000000e+00,
        0.000000e+00, 0.000000e+00],
       [4.191011e-03, 3.300000e+02, 1.300000e+01, ..., 1.300000e+01,
        1.710000e+02, 0.000000e+00]], dtype=float32)

In [44]:
ZOO(DT, "DT", X_test, X_test_ex) 
ZOO(RF, "RF", X_test, X_test_ex) 
ZOO(LR, "LR", X_test, X_test_ex) 
ZOO(XGB, "XGB", X_test, X_test_ex) 
ZOO(SVM, "SVM", X_test, X_test_ex) 
#ZOO(KNN, "KNN", X_test, X_test_ex)
ZOO(DNN, "DNN", X_test, X_test_ex) 

######## ZOO_ DT  ########


ZOO: 100%|██████████| 44730/44730 [07:27<00:00, 100.02it/s]


######## ZOO_ RF  ########


ZOO: 100%|██████████| 44730/44730 [1:00:03<00:00, 12.41it/s]


######## ZOO_ LR  ########


ZOO: 100%|██████████| 44730/44730 [12:38<00:00, 58.94it/s]


######## ZOO_ XGB  ########


ZOO: 100%|██████████| 44730/44730 [1:07:18<00:00, 11.08it/s]


######## ZOO_ SVM  ########


ZOO: 100%|██████████| 44730/44730 [07:50<00:00, 95.15it/s]


######## ZOO_ DNN  ########


ZOO: 100%|██████████| 44730/44730 [4:54:40<00:00,  2.53it/s]  


array([[1.49107000e-03, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [4.08000000e-05, 1.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [5.17115000e-03, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       ...,
       [1.41000000e-05, 1.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.01366857e-01, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [4.19101100e-03, 1.00000000e+00, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 0.00000000e+00]])

In [45]:
JSMA(LR, "LR", X_test, X_test_ex) 
JSMA(SVM, "SVM", X_test, X_test_ex) 
#JSMA(KNN, "KNN", X_test, X_test_ex) ?
JSMA(DNN, "DNN", X_test, X_test_ex)

######## JSMA_ LR  ########


JSMA: 100%|██████████| 44730/44730 [01:06<00:00, 670.65it/s]


######## JSMA_ SVM  ########


JSMA: 100%|██████████| 44730/44730 [17:44<00:00, 42.03it/s]


######## JSMA_ DNN  ########


JSMA: 100%|██████████| 44730/44730 [3:09:33<00:00,  3.93it/s]  


array([[1.00000000e+00, 3.30000000e+02, 1.00000000e+00, ...,
        1.30000000e+01, 1.71000000e+02, 1.00000000e+00],
       [1.00000000e+00, 4.87000000e+02, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [1.00000000e+00, 3.30000000e+02, 1.00000000e+00, ...,
        1.30000000e+01, 1.71000000e+02, 1.00000000e+00],
       ...,
       [1.00000000e+00, 4.87000000e+02, 1.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [1.01366855e-01, 2.00000003e-01, 8.00000072e-01, ...,
        4.00000006e-01, 7.00000048e-01, 1.00000000e+00],
       [1.00000000e+00, 3.30000000e+02, 1.00000000e+00, ...,
        1.30000000e+01, 1.71000000e+02, 1.00000000e+00]], dtype=float32)

In [46]:
PGD(LR, "LR", X_test, X_test_ex) 
PGD(SVM, "SVM", X_test, X_test_ex) 
#PGD(KNN, "KNN", X_test, X_test_ex) ?
PGD(DNN, "DNN", X_test, X_test_ex)
PGD(CNN, "CNN", X_test, X_test_ex)
PGD(LSTM, "LSTM", X_test, X_test_ex)

######## PGD_ LR  ########
######## PGD_ SVM  ########
######## PGD_ DNN  ########
######## PGD_ CNN  ########
######## PGD_ LSTM  ########


array([[[0.000e+00],
        [3.297e+02],
        [1.270e+01],
        ...,
        [1.270e+01],
        [1.707e+02],
        [0.000e+00]],

       [[0.000e+00],
        [4.867e+02],
        [0.000e+00],
        ...,
        [0.000e+00],
        [0.000e+00],
        [0.000e+00]],

       [[0.000e+00],
        [3.297e+02],
        [1.270e+01],
        ...,
        [1.270e+01],
        [1.707e+02],
        [0.000e+00]],

       ...,

       [[0.000e+00],
        [4.867e+02],
        [0.000e+00],
        ...,
        [0.000e+00],
        [0.000e+00],
        [0.000e+00]],

       [[0.000e+00],
        [0.000e+00],
        [0.000e+00],
        ...,
        [3.000e-01],
        [3.000e-01],
        [3.000e-01]],

       [[0.000e+00],
        [3.297e+02],
        [1.270e+01],
        ...,
        [1.270e+01],
        [1.707e+02],
        [0.000e+00]]], dtype=float32)

In [None]:
adv = DTA(DT, "DT", X_test)

Decision tree attack: 100%|██████████| 44730/44730 [00:17<00:00, 2615.59it/s]


In [None]:
adv_i = np.rint(adv[:,1:])
arr = np.insert(adv_i, 0, adv[:,0], axis=1)
np.savetxt("./Dataset/csv/Attacked/test.csv", arr, delimiter=",")

In [None]:
model = DT
model.fit(X_train,y_train)
print(model.score(X_test, y_test))
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

0.971070869662419
              precision    recall  f1-score   support

           0       0.97      0.98      0.97     23042
           1       0.97      0.97      0.97     21688

    accuracy                           0.97     44730
   macro avg       0.97      0.97      0.97     44730
weighted avg       0.97      0.97      0.97     44730



In [None]:
model = DT
model.fit(X_train,y_train)
print(model.score(arr, y_test))
prediction = model.predict(arr)
print(classification_report(y_test,prediction))

0.44804381846635366
              precision    recall  f1-score   support

           0       0.33      0.07      0.12     23042
           1       0.46      0.85      0.60     21688

    accuracy                           0.45     44730
   macro avg       0.40      0.46      0.36     44730
weighted avg       0.39      0.45      0.35     44730

