In [1]:
import numpy as np
from numpy.linalg import norm
import pandas as pd
from sklearn.model_selection import train_test_split
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

from matplotlib.colors import ListedColormap
%matplotlib inline
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import sklearn.linear_model
import os.path

import time

from NeuralNet import NeuralNet
from utils.json import read_from_json
from utils.nnsetup import *
from utils.attackperformance import *
from attacks.attacks import Attack

np.random.seed(1)

In [2]:
X_train, Y_train, Y_train_e, X_test, Y_test = get_datasets()

In [3]:
X_test_c = X_test[:10000].copy()
Y_test_c = Y_test[:10000].copy()

In [None]:
test_epsilon = [1/255]
ignore_not_adversarial = True
#test_epsilon = [1/255, 3/255, 5/255, 10/255, 15/255, 30/255, 50/255, 80/255, 120/255]

In [None]:
cls = getNN("nets/main_nn_300", 1, None, None)


df, insights, initial, adversarial = get_insights(cls, test_epsilon, X_test_c, Y_test_c,
                                                  ignore_not_adversarial=ignore_not_adversarial)
df

In [None]:
insights.head()

In [None]:
cls_dist = getNN("nets/distilled_nn_100_T_7", 1, None, None)
#cls_dist_T_5 = getNN("nets/distilled_nn_300_T_5", 1, None, None)
#cls_dist_T_10 = getNN("nets/distilled_nn_300_T_5_f", 1, None, None)

df_dist, insights_dist, initial_dist, adversarial_dist = get_insights(cls_dist, test_epsilon, X_test_c, Y_test_c,
                                                                      ignore_not_adversarial=ignore_not_adversarial)
df_dist

In [None]:
insights_dist.head()

In [None]:
n_iter = max(max(insights['iterations']), max(insights_dist['iterations']))
iterations = np.arange(n_iter)
n_iter

In [None]:
accuracy = []
accuracy_dist = []
for i in iterations:
  accuracy.append(1 - sum(insights['iterations']<=i)/insights.shape[0])
  accuracy_dist.append(1 - sum(insights_dist['iterations']<=i)/insights_dist.shape[0])

In [None]:
distance = []
distance_dist = []
ticks = np.linspace(np.min(insights['L2 norm']), np.max(insights['L2 norm']), 100)
for i in ticks:
  distance.append(1 - sum(insights['L2 norm']<=i)/insights.shape[0])
  distance_dist.append(1 - sum(insights_dist['L2 norm']<=i)/insights_dist.shape[0])

In [None]:
def set_axis(ax):
  ax.set_axisbelow(True)
  ax.grid()
  ax.legend()


fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (15, 6))
#fig.suptitle('Дистилякія при T=7')

ax1.plot(iterations, accuracy, label='Прообраз')
ax1.plot(iterations, accuracy_dist, label='Дистильована модель')
ax1.set_xlabel('Кількість ітерацій атаки')
ax1.set_ylabel('Точність')
set_axis(ax1)

ax2.plot(ticks, distance, label='Прообраз')
ax2.plot(ticks, distance_dist, label='Дистильована модель')
ax2.set_xlabel('Розмір шуму, $L_2$')
set_axis(ax2)

plt.savefig("images/distAdvT7.pdf", bbox_inches = 'tight', pad_inches = 0)

In [None]:
r_o =  robustness(initial, adversarial)
r_d_7 = robustness(initial_dist, adversarial_dist)
print('original', r_o)
print('defended', r_d_7)

In [None]:
(r_d_7 - r_o)/r_o

In [None]:
x_T = [1, 2, 7, 10, 20, 30, 40]
y_o = [0.5394695727113946 for _ in x_T]
y_r_d = [0.6125116365761678, 0.6150069675061154, 0.6383067947416767, 0.6408441519464926, 0.638219608438047, 0.6046881289549533, 0.5591443923632143]
y_r_d_p = [0.13539607710896648, 0.14002160384146772, 0.18321185666417195, 0.18791528635356664, 0.18305024179645754, 0.12089385489485108, 0.036470675357895214]

In [None]:
fig = plt.figure(figsize=(10,5))
ax1 = fig.add_subplot(111)

set_axis(ax1)

ax1.plot(x_T, y_r_d, 'o-.', label='Стійкість')
ax1.plot(x_T, y_o, 'r--', label='Оригінальна стійкість')

ax1.set_ylabel('Метрика стійкості, $r_p(M)$')
ax1.set_xlabel('Температура дистиляції, $T$')
ax1.legend()

plt.savefig("images/robustness.pdf", bbox_inches = 'tight', pad_inches = 0)
#plt.title('Accuracy score breakdown')

In [None]:
#plt.savefig('images/accuracy_T_10.png')

In [None]:
# accuracy = []
# accuracy_dist = []
# for e in epsilons:
#   accuracy.append(1 - sum(insights['epsilon']==e)/Y_test_c.shape[0])
#   accuracy_dist.append(1 - sum(insights_dist['epsilon']==e)/Y_test_c.shape[0])
  
plt.plot(test_epsilon, 1 - df['successful attempts']/300, label='without defence')
plt.plot(test_epsilon, 1 - df_dist['successful attempts']/300, label='distilled')
plt.legend()
plt.show()

In [None]:
Y_train_hat = cls.predict(X_train)
Y_test_hat = cls.predict(X_test)

print(accuracy_score(Y_train, Y_train_hat))
print(accuracy_score(Y_test, Y_test_hat))

In [7]:
cls_dist = getNN("nets/distilled_nn_100_T_2", 1, None, None)

Y_train_hat = cls_dist.predict(X_train)
Y_test_hat = cls_dist.predict(X_test)

print(accuracy_score(Y_train, Y_train_hat))
print(accuracy_score(Y_test, Y_test_hat))

0.922172619047619
0.919047619047619


In [None]:
# pretrub_by_iter = non_targeted.groupby('iterations', as_index=False)['L2 norm'].mean()
# pretrub_by_iter = pretrub_by_iter.sort_values("iterations")

# plt.plot(pretrub_by_iter['iterations'], pretrub_by_iter['L2 norm'])

In [None]:
# accuracy_t = []
# for iter in pretrub_by_iter["iterations"].values:
#     accuracy_t.append(1 - non_targeted[non_targeted["iterations"]<=iter].shape[0]/total_attempts)
    
# iterations_t = pretrub_by_iter["iterations"].values

In [None]:
# FGSM_t, = plt.plot(iterations_t, accuracy_t, 'g-', label='TFGSM')

In [None]:
def L2_norm(x, axis=0):
  return np.sqrt(np.square(x).sum(axis=axis))