Загрузка и создание двух различных моделей.

In [2]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
# Загрузка данных MNIST
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# Нормализация данных
train_images = train_images / 255.0
test_images = test_images / 255.0
# Преобразование меток в one-hot encoding
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
# Модель 1: Простая полносвязная нейронная сеть
model1 = Sequential([
 Flatten(input_shape=(28, 28)),
 Dense(128, activation='relu'),
 Dense(10, activation='softmax') ])

# Компиляция модели
model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=
['accuracy'])
# Обучение модели
model1.fit(train_images, train_labels, epochs=5)
# Сохранение модели
model1.save('mnist_model1.h5')
# Модель 2: Свёрточная нейронная сеть (CNN)
model2 = Sequential([
 Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
 MaxPooling2D((2, 2)),
 Flatten(),
 Dense(128, activation='relu'),
 Dense(10, activation='softmax')
])
# Компиляция модели
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=
['accuracy'])
# Обучение модели
model2.fit(train_images.reshape(-1, 28, 28, 1), train_labels, epochs=5)
# Сохранение модели
model2.save('mnist_model2.h5')

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.8730 - loss: 0.4420
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9647 - loss: 0.1224
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9763 - loss: 0.0829
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9816 - loss: 0.0623
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9862 - loss: 0.0457




Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 18ms/step - accuracy: 0.9144 - loss: 0.2929
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 18ms/step - accuracy: 0.9835 - loss: 0.0554
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 19ms/step - accuracy: 0.9905 - loss: 0.0308
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 18ms/step - accuracy: 0.9936 - loss: 0.0204
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 17ms/step - accuracy: 0.9960 - loss: 0.0128




Мы применим атаку FGSM (Fast Gradient Sign Method) к первой модели, чтобы создать противоречивые примеры.

In [3]:
import numpy as np
# Функция FGSM атаки
def fgsm_attack(image, epsilon, gradient):
    # Применение знака градиента к изображению
    perturbed_image = image + epsilon * np.sign(gradient)
    perturbed_image = np.clip(perturbed_image, 0, 1)  # Ограничение значений в диапазоне [0, 1]
    return perturbed_image

# Генерация противоречивых примеров
def generate_fgsm_adversarial(model, images, labels, epsilon):
    adversarial_images = []
    for i in range(len(images)):
        image = tf.convert_to_tensor(images[i].reshape(1, 28, 28, 1), dtype=tf.float32) # Конвертирует размер под формат модели
        label = tf.convert_to_tensor(labels[i].reshape(1, -1), dtype=tf.float32) # Конвертируем one-hot вектор в индекс

        # Вычисление градиента
        with tf.GradientTape() as tape:
            tape.watch(image)
            prediction = model(image)
            loss = tf.keras.losses.categorical_crossentropy(label, prediction)

        gradient = tape.gradient(loss, image)
        adv_image = fgsm_attack(image.numpy(), epsilon, gradient.numpy())
        adversarial_images.append(adv_image.reshape(28, 28))

    return np.array(adversarial_images)

# Создание противоречивых примеров для первой модели
epsilon = 0.1
adversarial_images_model1 = generate_fgsm_adversarial(model1, test_images, test_labels, epsilon)

Оценка противоречивых примеров на обеих моделях.

In [4]:
# Оценка первой модели на противоречивых примерах
test_labels_argmax = np.argmax(test_labels, axis=1) # Преобразование onehot меток в целые числа
loss1, acc1 = model1.evaluate(adversarial_images_model1, test_labels)
print(f'Accuracy of model1 on adversarial examples: {acc1}')

# Оценка второй модели на противоречивых примерах (перенос атаки)
adversarial_images_model1_reshaped = adversarial_images_model1.reshape(-1, 28, 28, 1)
loss2, acc2 = model2.evaluate(adversarial_images_model1_reshaped, test_labels)
print(f'Accuracy of model2 on adversarial examples from model1: {acc2}')

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0898 - loss: 6.8015
Accuracy of model1 on adversarial examples: 0.12280000001192093
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9591 - loss: 0.1445
Accuracy of model2 on adversarial examples from model1: 0.9648000001907349


Анализ переносимости атак.

In [5]:
# Генерация противоречивых примеров для второй модели
adversarial_images_model2 = generate_fgsm_adversarial(model2, test_images.reshape(-1, 28, 28, 1), test_labels, epsilon)
# Оценка первой модели на противоречивых примерах второй модели
loss3, acc3 = model1.evaluate(adversarial_images_model2.reshape(-1, 28, 28), test_labels)
print(f'Accuracy of model1 on adversarial examples from model2: {acc3}')

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9519 - loss: 0.1567
Accuracy of model1 on adversarial examples from model2: 0.9585000276565552
