<a href="https://colab.research.google.com/github/Re14m/training/blob/master/2022-0509_recipie268.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# [【AIセキュリティ入門】Adversarial Examplesを理解しAIモデルを頑健にするレシピ](https://axross-recipe.com/recipes/268)

In [None]:
# パッケージのインストール
!pip install adversarial-robustness-toolbox

In [None]:
# パッケージのインポート
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D
from tensorflow.keras.layers import MaxPooling2D, GlobalAveragePooling2D, Dropout
tf.compat.v1.disable_eager_execution()

import art
from art.defences.trainer import AdversarialTrainer
from art.attacks.evasion import FastGradientMethod
from art.estimators.classification import KerasClassifier

In [None]:
# データセットの準備
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# データセットの中身を9枚確認
show_images = []
for i in range(9):
    show_images.append(X_train[i * 100])

for idx, image in enumerate(show_images):
    plt.subplot(3, 3, idx + 1)
    plt.imshow(image)

# 学習データ数、テストデータ数を表示。
print("学習データ数: {}, テストデータ数: {}".format(X_train.shape, y_train.shape))

In [None]:
# 前処理（One-hot-vectorに変換）
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

y_train = tf.keras.utils.to_categorical(y_train, len(classes))
y_test = tf.keras.utils.to_categorical(y_test, len(classes))

In [None]:
# モデル生成（攻撃対象,CNN）
def gen_model():
  i = Input(shape=(32, 32, 3))
  x = Conv2D(64, (3, 3), padding='SAME', activation='relu')(i)
  x = Dropout(0.25)(x)
  x = MaxPooling2D()(x)

  x = Conv2D(128, (3,3), padding='SAME', activation='relu')(x)
  x = Dropout(0.25)(x)
  x = MaxPooling2D()(x)

  x = Conv2D(256, (3,3), padding='SAME', activation='relu')(x)
  x = GlobalAveragePooling2D()(x)

  x = Dense(1024, activation='relu')(x)
  x = Dropout(0.25)(x)
  x = Dense(512, activation='relu')(x)
  x = Dropout(0.25)(x)
  y = Dense(10, activation='softmax')(x)
  
  return Model(i, y)

model = gen_model()
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

In [None]:
# 学習
model.fit(X_train, y_train,
          batch_size=1024,
          epochs=30,
          validation_data=(X_test, y_test),
          shuffle=True)

In [None]:
# ARTを用いたAdvarsarialExamplesの作成

# 特徴量は0.0～1.0の範囲に収まるように正規化しているため、最小値は0.0、最大値は1.0とする。
min_pixel_value = 0.0
max_pixel_value = 1.0

# 画像分類器をARTのKerasClassifierでラップ。
classifier = KerasClassifier(model=model, clip_values=(min_pixel_value, max_pixel_value), use_logits=False)

In [None]:
# テストデータセットを元にAdversarial Examplesを作成

# epsは載せるノイズの量
attack_generic = FastGradientMethod(estimator=classifier, eps=0.05)
X_test_adv = attack_generic.generate(x=X_test)

In [None]:
# 精度の検証
preds = classifier.predict(X_test_adv)
accuracy = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Adversarial Exmaplesに対する推論精度: {}%'.format(accuracy * 100))

In [None]:
# ノイズを乗せた後の画像

target_index = 1
# 正常なデータ
print('=== 正常なデータ ===')
plt.imshow(X_test[target_index])
pred = classifier.predict(X_test[target_index][np.newaxis, ...])
print('正解ラベル: "{}"、推論結果: "{}"'.format(classes[np.argmax(y_test[target_index])], classes[np.argmax(pred)]))

In [None]:
# Adversarial Examples

print('=== Adversarial Examples ===')
plt.imshow(X_test_adv[target_index])
pred = classifier.predict(X_test_adv[target_index][np.newaxis, ...])
print('正解ラベル: "{}"、推論結果: "{}"'.format(classes[np.argmax(y_test[target_index])], classes[np.argmax(pred)]))

In [None]:
# Adversarial Trainingインスタンスの作成
# attacks: Adversarial Example作成手法, ratio: 学習データに混ぜるAdversarial Examplesの割合
new_model = AdversarialTrainer(classifier=classifier, attacks=attack_generic, ratio=0.5)

new_model.fit(X_train, 
            y_train,
            batch_size=512,
            nb_epochs=30,
            validation_data=(X_test, y_test),
            shuffle=True)

In [None]:
# 学習モデルの評価
preds = new_model.predict(X_test)
accuracy = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('頑健なモデルの通常のデータに対する精度: {}%'.format(accuracy * 100))

preds = new_model.predict(X_test_adv)
accuracy = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('頑健なモデルのAdversarial Examplesに対する精度: {}%'.format(accuracy * 100))