In [None]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from art.estimators.classification import TensorFlowV2Classifier
from art.utils import load_mnist
import matplotlib.pyplot as plt
from art.attacks.extraction.copycat_cnn import CopycatCNN

In [None]:
(X_train, y_train), (X_test, y_test), min_pixel_value, max_pixel_value = load_mnist()

nb_classes = 10

In [None]:
model = Sequential()
model.add(Conv2D(1 , kernel_size=(7, 7), activation='relu'))
model.add(MaxPooling2D(pool_size=(4, 4)))
model.add(Flatten())
model.add(Dense(nb_classes, activation='softmax'))
model.compile()

model.summary()

In [None]:
victim_classifier = TensorFlowV2Classifier(model=model, loss_object=tf.keras.losses.CategoricalCrossentropy(from_logits=False), optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), clip_values=(0, 1), nb_classes=nb_classes, input_shape=(28, 28, 1))
victim_classifier.fit(X_train, y_train, batch_size=128, nb_epochs=5)

In [None]:
predictions = victim_classifier.predict(X_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {} %".format(accuracy * 100))


In [None]:
target_index = 98
target_X = X_test[np.newaxis ,target_index]
target_pred = np.argmax(victim_classifier.predict(target_X), axis=1)[0]
plt.imshow(X_test[target_index].reshape(28, 28), cmap='gray')
print(f'Prediction {target_pred}, Ground Truth {np.argmax(y_test[target_index])}')

In [None]:
thieved_model = Sequential()
thieved_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
thieved_model.add(Conv2D(64, (3, 3), activation='relu'))
thieved_model.add(MaxPooling2D(pool_size=(2, 2)))
thieved_model.add(Dropout(0.5))
thieved_model.add(Flatten())
thieved_model.add(Dense(128, activation='relu'))
thieved_model.add(Dropout(0.5))
thieved_model.add(Dense(nb_classes, activation='softmax'))
thieved_model.compile()

thieved_classifier = TensorFlowV2Classifier(model=thieved_model, loss_object=tf.keras.losses.CategoricalCrossentropy(from_logits=False), optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), clip_values=(0, 1), nb_classes=nb_classes, input_shape=(28, 28, 1))

In [None]:
attack = CopycatCNN(classifier=victim_classifier, batch_size_fit=16, batch_size_query=16, nb_epochs=10, nb_stolen=1000)
thieved_classifier = attack.extract(x=X_train, thieved_classifier=thieved_classifier)

In [None]:
predictions = thieved_classifier.predict(X_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {} %".format(accuracy * 100))

In [None]:
target_index = 98
target_X = X_test[np.newaxis ,target_index]
target_pred = np.argmax(thieved_classifier.predict(target_X), axis=1)[0]
plt.imshow(X_test[target_index].reshape(28, 28), cmap='gray')
print(f'Prediction {target_pred}, Ground Truth {np.argmax(y_test[target_index])}')

In [None]:
victim_preds = np.argmax(victim_classifier.predict(x=X_test), axis=1)
thieved_preds = np.argmax(thieved_classifier.predict(x=X_test), axis=1)

agreement_rate = np.sum(victim_preds == thieved_preds) / len(victim_preds)
print(f'agreement rate between victim_preds and thieved_preds: {agreement_rate * 100}%')
