In [1]:
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
from matplotlib import pyplot as plt
import art#fully initialise module
from art.estimators.classification import SklearnClassifier
from art.attacks.evasion import ZooAttack
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import tree

from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

data=pd.read_csv('dataset.csv')

# clean up column names

data.columns = data.columns.\
    str.strip().\
    str.lower()

#____remove non-numeric columns
    
data = data.select_dtypes(['number']) 


#__extracting dependent and independent variable

x=data.drop(['type'],axis=1)
y=data['type']
x=np.nan_to_num(x) #____replace nan with zero and inf with finite numbers

In [3]:
#______Splitting the data into Training and test dataset
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=1/3,random_state=44)

In [4]:
from sklearn.ensemble import RandomForestClassifier

rfc=RandomForestClassifier(n_estimators=100)
rfc.fit(x_train,y_train)

pred1=rfc.predict(x_test)

In [5]:
#accuracy for random forest
print('For Random Forest accuracy score is ',accuracy_score(y_test,pred1))
print('For Random Forest confusion_matrix is: \n\n',confusion_matrix(y_test,pred1))
print ('For Random Forest Classification Report: \n\n',classification_report(y_test,pred1))
false_positive_rate=confusion_matrix(y_test,pred1)[0][1]/(confusion_matrix(y_test,pred1)[0][0]+confusion_matrix(y_test,pred1)[0][1])
print("False Positive Rate: "+str(false_positive_rate))

For Random Forest accuracy score is  0.9612794612794613
For Random Forest confusion_matrix is: 

 [[507   7]
 [ 16  64]]
For Random Forest Classification Report: 

               precision    recall  f1-score   support

           0       0.97      0.99      0.98       514
           1       0.90      0.80      0.85        80

    accuracy                           0.96       594
   macro avg       0.94      0.89      0.91       594
weighted avg       0.96      0.96      0.96       594

False Positive Rate: 0.013618677042801557


In [6]:
model = GradientBoostingClassifier()
model.fit(X=x_train, y=y_train)

# Create ART classifier for scikit-learn GradientBoostingClassifier
art_classifier = SklearnClassifier(model=model)

# Create ART Zeroth Order Optimization attack
zoo = ZooAttack(classifier=art_classifier, confidence=0.0, targeted=False, learning_rate=1e-1, max_iter=20,
                binary_search_steps=10, initial_const=1e-3, abort_early=True, use_resize=False, 
                use_importance=False, nb_parallel=1, batch_size=1, variable_h=0.2)

# Generate adversarial samples with ART Zeroth Order Optimization attack
x_train_adv = zoo.generate(x_train)

ZOO: 100%|█████████████████████████████████████████████████████████████████████████| 1187/1187 [00:28<00:00, 41.86it/s]


In [7]:
x_train_adv

array([[4.9000e+01, 9.0000e+00, 3.2400e+02, ..., 0.0000e+00, 0.0000e+00,
        0.0000e+00],
       [4.8000e+01, 1.1000e+01, 6.4000e+02, ..., 3.4940e+03, 3.1000e+01,
        8.0000e+00],
       [6.1000e+01, 1.2000e+01, 3.9198e+04, ..., 0.0000e+00, 0.0000e+00,
        0.0000e+00],
       ...,
       [6.0000e+01, 1.1000e+01, 0.0000e+00, ..., 5.3920e+03, 3.5000e+01,
        6.0000e+00],
       [4.6000e+01, 9.0000e+00, 0.0000e+00, ..., 5.6400e+02, 6.0000e+00,
        0.0000e+00],
       [7.5000e+01, 1.6000e+01, 0.0000e+00, ..., 4.2470e+03, 4.1000e+01,
        8.0000e+00]])

In [9]:
x_test_adv = zoo.generate(x_test)

ZOO: 100%|███████████████████████████████████████████████████████████████████████████| 594/594 [00:14<00:00, 41.71it/s]


In [10]:
x_test_adv

array([[5.1000e+01, 1.0000e+01, 0.0000e+00, ..., 5.0930e+03, 4.1000e+01,
        6.0000e+00],
       [5.6000e+01, 9.0000e+00, 0.0000e+00, ..., 2.4160e+03, 2.1000e+01,
        6.0000e+00],
       [1.2800e+02, 2.5000e+01, 2.9840e+04, ..., 4.3200e+02, 4.0000e+00,
        0.0000e+00],
       ...,
       [4.9000e+01, 9.0000e+00, 1.6200e+02, ..., 2.1380e+03, 2.5000e+01,
        6.0000e+00],
       [4.8000e+01, 1.0000e+01, 3.4500e+02, ..., 1.4740e+03, 1.9000e+01,
        2.0000e+00],
       [6.5000e+01, 1.1000e+01, 1.2339e+04, ..., 1.9220e+03, 2.2000e+01,
        4.0000e+00]])

In [12]:
score = model.score(x_train, y_train)
print("Benign Training Score: %.4f" % score)

Benign Training Score: 0.9916


In [13]:
score = model.score(x_train_adv, y_train)
print("Adversarial Training Score: %.4f" % score)

Adversarial Training Score: 0.9916


In [14]:
score = model.score(x_test, y_test)
print("Benign Test Score: %.4f" % score)

Benign Test Score: 0.9512


In [15]:
score = model.score(x_test_adv, y_test)
print("Adversarial Test Score: %.4f" % score)

Benign Test Score: 0.9512


In [16]:
#GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier

rfc=GradientBoostingClassifier(n_estimators=100)
rfc.fit(x_train_adv,y_train)

pred1=rfc.predict(x_test)

#accuracy for GradientBoostingClassifier
print('For GradientBoostingClassifier accuracy score is ',accuracy_score(y_test,pred1))
print('For GradientBoostingClassifier confusion_matrix is: \n\n',confusion_matrix(y_test,pred1))
print ('For GradientBoostingClassifier Classification Report: \n\n',classification_report(y_test,pred1))
false_positive_rate=confusion_matrix(y_test,pred1)[0][1]/(confusion_matrix(y_test,pred1)[0][0]+confusion_matrix(y_test,pred1)[0][1])
print("False Positive Rate: "+str(false_positive_rate))

For GradientBoostingClassifier accuracy score is  0.9511784511784511
For GradientBoostingClassifier confusion_matrix is: 

 [[503  11]
 [ 18  62]]
For GradientBoostingClassifier Classification Report: 

               precision    recall  f1-score   support

           0       0.97      0.98      0.97       514
           1       0.85      0.78      0.81        80

    accuracy                           0.95       594
   macro avg       0.91      0.88      0.89       594
weighted avg       0.95      0.95      0.95       594

False Positive Rate: 0.021400778210116732


In [18]:
type(x_test)

numpy.ndarray