In [2]:
import setGPU

setGPU: Setting GPU to: 3


In [5]:
random_state=999

# Load data

In [3]:
n_ts = 1000  # number of testing set samples

from secml.data.loader import CDataLoaderMNIST
digits = (1, 5, 9)
loader = CDataLoaderMNIST()
tr = loader.load('training', digits=digits)
ts = loader.load('testing', digits=digits, num_samples=n_ts)

# Normalize the data
tr.X /= 255
ts.X /= 255

Split data into train-validation set (Stacked Generalization alike)

In [4]:
from secml.data.splitter import CTrainTestSplit

In [6]:
tr, vl = CTrainTestSplit(test_size=0.5, random_state=random_state).split(tr)

In [7]:
tr.X.shape[0], vl.X.shape[0]

(9056, 9056)

# NN classifier

In [8]:
import torch
from torch import nn

class MNIST3cCNN(nn.Module):
    """Model with input size (-1, 28, 28) for MNIST 3-classes dataset."""
    def __init__(self):
        super(MNIST3cCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 3)

    def forward(self, x):
        x = torch.relu(torch.max_pool2d(self.conv1(x), 2))
        x = torch.relu(torch.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

### Wrap it

In [9]:
from torch import optim

# Random seed
torch.manual_seed(0)

net = MNIST3cCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),
                      lr=0.01, momentum=0.9)

from secml.ml.classifiers import CClassifierPyTorch
dnn = CClassifierPyTorch(model=net,
                         loss=criterion,
                         optimizer=optimizer,
                         epochs=20,
                         batch_size=20,
                         input_shape=(1, 28, 28),
                         random_state=0)

In [10]:
# clf.verbose = 1  # Can be used to display training process output
print("Training started...")
dnn.fit(tr)
print("Training completed!")

Training started...
Training completed!


In [11]:
print(dnn)

CClassifierPyTorch{'classes': CArray(3,)(dense: [0 1 2]), 'n_features': 784, 'preprocess': None, 'model': MNIST3cCNN(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=3, bias=True)
), 'trained': True, 'input_shape': (1, 28, 28), 'softmax_outputs': False, 'batch_size': 20, 'loss': CrossEntropyLoss(), 'optimizer': SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0.9
    nesterov: False
    weight_decay: 0
), 'optimizer_scheduler': None, 'epochs': 20}


In [12]:
label_torch = dnn.predict(ts.X, return_decision_function=False)

from secml.ml.peval.metrics import CMetric
acc_torch = CMetric.create('accuracy').performance_score(ts.Y, label_torch)

print("Model Accuracy: {}".format(acc_torch))

Model Accuracy: 0.997


# Generate Adversarial Samples

In [16]:
ts.X.shape

(1000, 784)

In [17]:
# For simplicity, let's attack a subset of the test set
attack_ds = ts

noise_type = 'l2'  # Type of perturbation 'l1' or 'l2'
dmax = 3.0  # Maximum perturbation
lb, ub = 0., 1.  # Bounds of the attack space. Can be set to `None` for unbounded
y_target = None  # None if `error-generic` or a class label for `error-specific`

# Should be chosen depending on the optimization problem
solver_params = {
    'eta': 0.5,
    'eta_min': 2.0,
    'eta_max': None,
    'max_iter': 100,
    'eps': 1e-6
}

from secml.adv.attacks import CAttackEvasionPGDLS
pgd_ls_attack = CAttackEvasionPGDLS(classifier=dnn,
                                    surrogate_classifier=dnn,
                                    surrogate_data=tr,
                                    distance=noise_type,
                                    dmax=dmax,
                                    solver_params=solver_params,
                                    y_target=y_target)

print("Attack started...")
eva_y_pred, _, eva_adv_ds, _ = pgd_ls_attack.run(
    attack_ds.X, attack_ds.Y, double_init=True)
print("Attack complete!")

Attack started...
Attack complete!


In [18]:
attack_ds

CDataset{'X': CArray(1000, 784)(dense: [[0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] ... [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.] [0. 0. 0. ... 0. 0. 0.]]), 'Y': CArray(1000,)(dense: [0 0 2 ... 1 1 1]), 'header': CDatasetHeader{'num_samples': None, 'img_w': 28, 'img_h': 28, 'y_original': (1, 5, 9)}}

In [19]:
from secml.ml.peval.metrics import CMetric
metric = CMetric.create('accuracy')

acc = metric.performance_score(
    y_true=attack_ds.Y, y_pred=dnn.predict(attack_ds.X))
acc_attack = metric.performance_score(
    y_true=attack_ds.Y, y_pred=eva_y_pred)

print("Accuracy on reduced test set before attack: {:.2%}".format(acc))
print("Accuracy on reduced test set after attack: {:.2%}".format(acc_attack))

Accuracy on reduced test set before attack: 99.70%
Accuracy on reduced test set after attack: 10.70%


# Security evaluation of a classifier

In [None]:
attack.verbose = 0

In [None]:
# Only required for visualization in notebooks
%matplotlib inline

In [None]:
# Perturbation levels to test
from secml.array import CArray
e_vals = CArray.arange(start=0, step=0.1, stop=1.1)

from secml.adv.seceval import CSecEval
sec_eval = CSecEval(
    attack=pgd_ls_attack, param_name='dmax', param_values=e_vals)

# Run the security evaluation using the test set
print("Running security evaluation...")
sec_eval.run_sec_eval(ts, double_init=False)

from secml.figure import CFigure
fig = CFigure(height=5, width=5)

# Convenience function for plotting the Security Evaluation Curve
fig.sp.plot_sec_eval(
    sec_eval.sec_eval_data, marker='o', label='DNN', show_average=True)

---

# Get DNN features

In [20]:
from secml.ml.features import CNormalizerDNN

In [21]:
feat_extr = CNormalizerDNN(dnn, out_layer='fc1')
feat_extr

CNormalizerDNN{'net': CClassifierPyTorch{'classes': CArray(3,)(dense: [0 1 2]), 'n_features': 784, 'preprocess': None, 'model': MNIST3cCNN(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2_drop): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=3, bias=True)
), 'trained': True, 'input_shape': (1, 28, 28), 'softmax_outputs': False, 'batch_size': 20, 'loss': CrossEntropyLoss(), 'optimizer': SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0.9
    nesterov: False
    weight_decay: 0
), 'optimizer_scheduler': None, 'epochs': 20}, 'out_layer': 'fc1', 'preprocess': None}

## ACHTUNG!
<mark> Generating further training points using validation set to avoid overfitting!</mark>

In [99]:
feats_vl = feat_extr.forward(vl.X)   # <-- see?
feats_ts = feat_extr.forward(ts.X)
feats_vl.shape, feats_ts.shape

((9056, 50), (1000, 50))

# Train an SVM-RBF ontop

In [100]:
from secml.data.c_dataset import CDataset

In [101]:
feat_vl_ds = CDataset(feats_vl, vl.Y)
feat_ts_ds = CDataset(feats_ts, ts.Y)

In [104]:
# Normalize the data
from secml.ml.features import CNormalizerMinMax
nmz = CNormalizerMinMax()
feat_vl_ds.X = nmz.fit_transform(feat_vl_ds.X)
feat_ts_ds.X = nmz.transform(feat_ts_ds.X)

In [105]:
# Creation of the multiclass classifier
from secml.ml.classifiers import CClassifierSVM
from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA
from secml.ml.kernels import CKernelRBF
clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF())

# Parameters for the Cross-Validation procedure
# xval_params = {'C': [1e-3, 1e-2, 1e-1, 1, 100, 1000],
#                'kernel.gamma': [0.01, 0.1, 1, 10, 100, 1e3]}
xval_params = {
    'C': [1e-2, 1, 100],
    'kernel.gamma': [1e-2, 10, 100]
}

# Let's create a 3-Fold data splitter
from secml.data.splitter import CDataSplitterKFold
xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state)

# Select and set the best training parameters for the classifier

clf.verbose = 1

print("Estimating the best training parameters...")
best_params = clf.estimate_parameters(
    dataset=feat_vl_ds,
    parameters=xval_params,
    splitter=xval_splitter,
    metric='accuracy',
    perf_evaluator='xval'
)

print("The best training parameters are: ",
      [(k, best_params[k]) for k in sorted(best_params)])

Estimating the best training parameters...
2020-04-21 12:24:25,965 - CPerfEvaluatorXVal.0x7f83888ec978 - INFO - Parameters to evaluate: {'C': [0.01, 1, 100], 'kernel.gamma': [0.01, 10, 100]}
2020-04-21 12:24:25,971 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 0
2020-04-21 12:24:26,231 - CClassifierSVM.0x7f83888ec048 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 0.01, 'class_weight': None, 'grad_sampling': 1.0, 'store_dual_vars': None, 'kernel': CKernelRBF{'gamma': 0.01, 'rv': None, 'preprocess': None}}
2020-04-21 12:24:27,889 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 1
2020-04-21 12:24:28,137 - CClassifierSVM.0x7f839002f908 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 0.01, 'class_weight': None, 'grad_sampling': 1.0, 'store_dual_vars': None, 'kernel': CKernelRBF{'gamma': 0.01, 'rv': None, 'preprocess': None}}
2020-04-21 12:24:29,629 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Tr

2020-04-21 12:24:52,497 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 0
2020-04-21 12:24:52,845 - CClassifierSVM.0x7f83888ec048 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 0.01, 'class_weight': None, 'grad_sampling': 1.0, 'store_dual_vars': None, 'kernel': CKernelRBF{'gamma': 10.0, 'rv': CArray(3348, 50)(dense: [[0.319539 0.564277 0.295036 ... 0.211639 0.294304 0.414772] [0.741261 0.53255  0.762431 ... 0.691528 0.550085 0.356605] [0.776435 0.809834 0.835342 ... 0.734293 0.522598 0.20544 ] ... [0.798788 0.10595  0.138171 ... 0.183795 0.765937 0.887765] [0.844642 0.152119 0.173919 ... 0.28547  0.78265  0.806754] [0.88371  0.113048 0.168824 ... 0.269053 0.836361 0.820962]]), 'preprocess': None}}
2020-04-21 12:24:54,574 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 1
2020-04-21 12:24:54,842 - CClassifierSVM.0x7f839002f908 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 0.01, 'class_weight': None

2020-04-21 12:25:20,301 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 2
2020-04-21 12:25:20,577 - CClassifierSVM.0x7f8390075f60 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 0.01, 'class_weight': None, 'grad_sampling': 1.0, 'store_dual_vars': None, 'kernel': CKernelRBF{'gamma': 100.0, 'rv': CArray(5372, 50)(dense: [[0.752742 0.068505 0.204044 ... 0.277333 0.711262 0.852773] [0.793809 0.061198 0.189204 ... 0.233018 0.798059 0.89644 ] [0.319539 0.564277 0.295036 ... 0.211639 0.294304 0.414772] ... [0.538573 0.742933 0.869151 ... 0.752485 0.497865 0.236443] [0.617783 0.757032 0.876123 ... 0.885706 0.447493 0.23082 ] [0.546089 0.894925 0.781912 ... 0.622607 0.488415 0.233982]]), 'preprocess': None}}
2020-04-21 12:25:24,311 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 0
2020-04-21 12:25:24,664 - CClassifierSVM.0x7f83888ec048 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 0.01, 'class_weight': Non

2020-04-21 12:25:44,854 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 1
2020-04-21 12:25:45,126 - CClassifierSVM.0x7f839002f908 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 1, 'class_weight': None, 'grad_sampling': 1.0, 'store_dual_vars': None, 'kernel': CKernelRBF{'gamma': 0.01, 'rv': CArray(276, 50)(dense: [[0.446855 0.663387 0.606221 ... 0.71087  0.332356 0.321569] [0.558022 0.539468 0.578252 ... 0.537235 0.440879 0.389965] [0.547089 0.6114   0.591295 ... 0.466219 0.787309 0.434095] ... [0.587711 0.195569 0.082981 ... 0.218329 0.531542 0.834464] [0.516069 0.369524 0.217363 ... 0.277316 0.403617 0.674785] [0.442718 0.548783 0.378541 ... 0.347898 0.388608 0.405302]]), 'preprocess': None}}
2020-04-21 12:25:46,341 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 2
2020-04-21 12:25:46,606 - CClassifierSVM.0x7f8390075f60 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 1, 'class_weight': None, 'grad

2020-04-21 12:26:07,983 - CPerfEvaluatorXVal.0x7f83888ec978 - INFO - Params: {'C': 1, 'kernel.gamma': 10} - Score: 0.9916077126728423
2020-04-21 12:26:07,995 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 0
2020-04-21 12:26:08,360 - CClassifierSVM.0x7f83888ec048 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 1, 'class_weight': None, 'grad_sampling': 1.0, 'store_dual_vars': None, 'kernel': CKernelRBF{'gamma': 100.0, 'rv': CArray(2101, 50)(dense: [[0.631801 0.609851 0.681576 ... 0.599143 0.45128  0.331064] [0.319539 0.564277 0.295036 ... 0.211639 0.294304 0.414772] [0.215432 0.830215 0.317806 ... 0.14048  0.182788 0.272299] ... [0.737836 0.149923 0.184834 ... 0.257052 0.659998 0.83195 ] [0.846979 0.032272 0.130714 ... 0.193807 0.851597 0.939976] [0.876803 0.06557  0.086884 ... 0.142622 0.822703 0.91911 ]]), 'preprocess': None}}
2020-04-21 12:26:10,918 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 1
2020-04-21 12:2

2020-04-21 12:26:40,925 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 2
2020-04-21 12:26:41,196 - CClassifierSVM.0x7f8390075f60 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 100, 'class_weight': None, 'grad_sampling': 1.0, 'store_dual_vars': None, 'kernel': CKernelRBF{'gamma': 0.01, 'rv': CArray(5555, 50)(dense: [[0.752742 0.068505 0.204044 ... 0.277333 0.711262 0.852773] [0.250139 0.755322 0.264548 ... 0.379916 0.208167 0.454044] [0.319539 0.564277 0.295036 ... 0.211639 0.294304 0.414772] ... [0.679432 0.387356 0.569345 ... 0.595423 0.77214  0.520467] [0.618286 0.494869 0.717315 ... 0.634201 0.664745 0.410195] [0.546089 0.894925 0.781912 ... 0.622607 0.488415 0.233982]]), 'preprocess': None}}
2020-04-21 12:26:42,559 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 0
2020-04-21 12:26:42,903 - CClassifierSVM.0x7f83888ec048 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 100, 'class_weight': None, 

2020-04-21 12:27:00,950 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 1
2020-04-21 12:27:01,221 - CClassifierSVM.0x7f839002f908 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 100, 'class_weight': None, 'grad_sampling': 1.0, 'store_dual_vars': None, 'kernel': CKernelRBF{'gamma': 10.0, 'rv': CArray(2143, 50)(dense: [[0.741261 0.53255  0.762431 ... 0.691528 0.550085 0.356605] [0.776435 0.809834 0.835342 ... 0.734293 0.522598 0.20544 ] [0.688999 0.299158 0.462572 ... 0.493556 0.831171 0.691465] ... [0.238485 0.655429 0.426532 ... 0.49161  0.313357 0.328109] [0.349617 0.6748   0.427632 ... 0.342974 0.362027 0.343559] [0.375594 0.633238 0.413938 ... 0.339545 0.401927 0.378512]]), 'preprocess': None}}
2020-04-21 12:27:02,945 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 2
2020-04-21 12:27:03,215 - CClassifierSVM.0x7f8390075f60 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 100, 'class_weight': None, 

2020-04-21 12:27:31,982 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 0
2020-04-21 12:27:32,308 - CClassifierSVM.0x7f83888ec048 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 100, 'class_weight': None, 'grad_sampling': 1.0, 'store_dual_vars': None, 'kernel': CKernelRBF{'gamma': 100.0, 'rv': CArray(5516, 50)(dense: [[0.250139 0.755322 0.264548 ... 0.379916 0.208167 0.454044] [0.215432 0.830215 0.317806 ... 0.14048  0.182788 0.272299] [0.320129 0.681291 0.356225 ... 0.302626 0.27539  0.41397 ] ... [0.823533 0.080374 0.171164 ... 0.264324 0.803994 0.896422] [0.844642 0.152119 0.173919 ... 0.28547  0.78265  0.806754] [0.902194 0.095051 0.176878 ... 0.313168 0.845025 0.825834]]), 'preprocess': None}}
2020-04-21 12:27:34,617 - CClassifierMulticlassOVA.0x7f83888ec0f0 - INFO - Training against class: 1
2020-04-21 12:27:34,869 - CClassifierSVM.0x7f839002f908 - INFO - Training SVM with parameters: {'preprocess': None, 'C': 100, 'class_weight': None,

In [106]:
# Setting properties 
clf.classifier.C = 0.01
clf.classifier.kernel = CKernelRBF(gamma=10)

In [108]:
clf.verbose = 0

In [109]:
# We can now fit the classifier
clf.fit(feat_vl_ds)

# Compute predictions on a test set
y_pred = clf.predict(feat_ts_ds.X)

# Evaluate the accuracy of the classifier
acc = metric.performance_score(y_true=feat_ts_ds.Y, y_pred=y_pred)

print("Accuracy on test set: {:.2%}".format(acc))

Accuracy on test set: 99.60%


# Wrap it with reject

In [110]:
from secml.ml.classifiers.reject import CClassifierRejectThreshold

clf_rej = CClassifierRejectThreshold(clf, 0.)
clf_rej.threshold

0.0

Set the `threshold` to have around 10% of false positives

In [140]:
clf_rej.threshold = 2.5

In [141]:
# Compute predictions on a test set
y_pred = clf_rej.predict(feat_vl_ds.X)

# Evaluate the accuracy of the classifier
acc = metric.performance_score(y_true=feat_vl_ds.Y, y_pred=y_pred)

print("Accuracy on training set: {:.2%}".format(acc))

Accuracy on training set: 90.82%


In [142]:
y_pred

CArray(9056,)(dense: [ 2 -1  0 ...  0  2  0])

In [143]:
fpr = (y_pred < 0).sum() / y_pred.shape[0]
fpr

0.09154151943462897

In [92]:
# Compute predictions on a test set
y_pred = clf_rej.predict(feat_ts_ds.X)

# Evaluate the accuracy of the classifier
acc = metric.performance_score(y_true=feat_ts_ds.Y, y_pred=y_pred)

print("Accuracy on test set: {:.2%}".format(acc))

Accuracy on test set: 34.80%


# Test it against BB adversarial samples

In [93]:
eva_adv_ds.Y, eva_y_pred

(CArray(1000,)(dense: [0 0 2 ... 1 1 1]),
 CArray(1000,)(dense: [2 2 0 ... 1 2 2]))

In [96]:
adv_feats = feat_extr.forward(eva_adv_ds.X)
adv_feats.shape

(1000, 50)

In [97]:
clf_rej.forward(adv_feats)

CArray(1000, 4)(dense: [[-0.973061 -0.695527 -0.791362  0.      ] [-0.973061 -0.695527 -0.791362  0.      ] [-0.973061 -0.695527 -0.791362  0.      ] ... [-0.973061 -0.695527 -0.791362  0.      ] [-0.973061 -0.695527 -0.791362  0.      ] [-0.973061 -0.695527 -0.791362  0.      ]])

In [98]:
# Compute predictions on a test set
y_pred = clf_rej.predict(adv_feats)

# Evaluate the accuracy of the classifier
acc = metric.performance_score(y_true=eva_adv_ds.Y, y_pred=y_pred)

print("Accuracy on test set: {:.2%}".format(acc))

Accuracy on test set: 0.00%


# HOW TO WRAP IN A UNIQUE MODEL? ASK ANYONE!