In [None]:
import os
import time
import copy
import random
from tqdm import tqdm

import numpy as np

import sklearn
from sklearn import datasets
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

from aijack.attack import Poison_attack_sklearn

In [None]:
mnist = datasets.fetch_openml("mnist_784", version=1, data_home=".", return_X_y=True)
imagedata, labeldata = mnist[0].values, mnist[1].values

index_3 = [i for i, x in enumerate(labeldata) if x == "3"]
index_7 = [i for i, x in enumerate(labeldata) if x == "7"]
image_data_3_7 = imagedata[index_3 + index_7]
label_data_3_7 = labeldata[index_3 + index_7]

(
    imagedata_training,
    imagedata_validation,
    labeldata_training,
    labeldata_validation,
) = train_test_split(image_data_3_7, label_data_3_7, test_size=0.2, shuffle=True)

imagedata_training = imagedata_training / 255.0
imagedata_validation = imagedata_validation / 255.0

X_train = imagedata_training[:5000]
y_train = labeldata_training[:5000]
X_valid = imagedata_validation[:500]
y_valid = labeldata_validation[:500]

# X_train = np.load("data/X_train.npy", allow_pickle=True)
# y_train = np.load("data/y_train.npy", allow_pickle=True)
# X_valid = np.load("data/X_valid.npy", allow_pickle=True)
# y_valid = np.load("data/y_valid.npy", allow_pickle=True)

## Train target model (Before Attack)

In [None]:
start = time.time()  # かかった時間の計測のため計算開始時刻保存
clf = SVC(kernel="linear")
clf.fit(X_train, y_train)  # 学習
elapsed_time = time.time() - start  # 計算終了時刻から計算時間算出
print("elapsed_time:{0}".format(elapsed_time) + "[sec]")

start = time.time()
predict = clf.predict(X_valid)  # 評価用データで予測
elapsed_time = time.time() - start
print("elapsed_time:{0}".format(elapsed_time) + "[sec]")

print("結果")
ac_score = metrics.accuracy_score(y_valid, predict)  # 予測結果の答え合わせ
cl_report = metrics.classification_report(y_valid, predict)
print("正解率 = ", ac_score)
print(cl_report)

## Attack

In [None]:
# initial point
initial_idx = 0
xc = X_train[initial_idx, :]
yc = y_train[initial_idx]

plt.imshow(xc.reshape(28, 28), cmap="gray")
plt.title(f"initial point : {yc}")
plt.show()

In [None]:
train_idx = random.sample(list(range(1, X_train.shape[0])), 100)
X_train_ = copy.copy(X_train[train_idx, :])
y_train_ = copy.copy(y_train[train_idx])

y_train_ = np.where(y_train_ == "7", 1, -1)
y_valid_ = np.where(y_valid == "7", 1, -1)

In [None]:
attacker = Poison_attack_sklearn(clf, X_train_, y_train_, t=0.5)

xc_attacked, log = attacker.attack(xc, 1, X_valid, y_valid_, num_iterations=200)

In [None]:
plt.plot(log)
plt.title("poisoning attack against SVM")
plt.xlabel("num of iterations")
plt.ylabel("accuracy on validation data")
# plt.savefig("poison_loss.png")
plt.show()

In [None]:
plt.imshow(xc_attacked.reshape(28, 28), cmap="gray")
plt.title("After Attack")
# plt.savefig("poison_example.png")
plt.show()

In [None]:
clf = SVC(kernel="linear", C=1)
clf.fit(X_train_, y_train_)
print("before attack: ", clf.score(X_valid, y_valid_))

# add poinsoned data
clf = SVC(kernel="linear", C=1)
clf.fit(
    np.concatenate([X_train_, xc_attacked.reshape(1, -1)]),
    np.concatenate([y_train_, [-1]]),
)
print("after attack: ", clf.score(X_valid, y_valid_))

You can see that adding only one poisoned example dramatically decrease the accuracy of the model. 