In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import load_model, Sequential
import tensorflow as tf
from keras.layers import Convolution2D, MaxPooling2D, Flatten, Dense

tf.compat.v1.disable_eager_execution()

DIMENSION = 256
loaded_model = load_model('./target_model')
x_stealed_loaded = np.load('x_val.npy')
y_stealed_loaded = np.loadtxt('y_val.txt', delimiter=',')

In [2]:
X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(x_stealed_loaded, y_stealed_loaded, test_size=0.30, random_state=1)

# Ataque de extraccion

In [3]:
from art.estimators.classification import KerasClassifier
classifier = KerasClassifier(loaded_model)

In [4]:
from art.attacks.extraction import CopycatCNN
copycat_cnn = CopycatCNN(
  batch_size_fit=16,
  batch_size_query=16,
  nb_epochs=5,
  nb_stolen=len(X_train_s),
  classifier=classifier
)

In [5]:
def createBlankModel():
  stolen_m = Sequential()
  stolen_m.add(Convolution2D(32, (3, 3), strides=(1, 1), input_shape=(DIMENSION, DIMENSION, 3), activation='relu'))
  stolen_m.add(MaxPooling2D(pool_size=(2, 2)))
  stolen_m.add(Convolution2D(64, (3, 3), strides=(1, 1), activation='relu'))
  stolen_m.add(MaxPooling2D(pool_size=(2, 2)))
  stolen_m.add(Flatten())
  stolen_m.add(Dense(512, activation='softplus'))
  stolen_m.add(Dense(25, activation='sigmoid'))

  stolen_m.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
  )
  return stolen_m

In [6]:
model_stolen = KerasClassifier(model=createBlankModel())

In [7]:
stolen_classifier = copycat_cnn.extract(
  x=X_train_s, 
  y=y_train_s, 
  thieved_classifier=model_stolen
)

  updates=self.state_updates,


Train on 1718 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
score_original = classifier._model.evaluate(
  x=X_test_s,
  y=y_test_s
)

score_stolen = stolen_classifier._model.evaluate(
  x=X_test_s,
  y=y_test_s
)

# Comparing test losses
print(f"Original test loss: {score_original[0]:.2f} "  f"vs stolen test loss: {score_stolen[0]:.2f}")

# Comparing test accuracies
print(f"Original test accuracy: {score_original[1]:.2f} " f"vs stolen test accuracy: {score_stolen[1]:.2f}")

  updates = self.state_updates


Original test loss: 0.92 vs stolen test loss: 14.11
Original test accuracy: 0.89 vs stolen test accuracy: 0.51


In [9]:
from art.defences.postprocessor import ReverseSigmoid

In [10]:
post_processor = ReverseSigmoid(
  beta=1.0,
  gamma=0.2
)

classifier_def = KerasClassifier(
  model=loaded_model,
  postprocessing_defences=post_processor,
)

In [11]:
model_stolen_def = KerasClassifier(model=createBlankModel())

copycat_cnn_def = CopycatCNN(
  batch_size_fit=16,
  batch_size_query=16,
  nb_epochs=5,
  nb_stolen=len(X_train_s),
  classifier=classifier_def
)

stolen_classifier_def = copycat_cnn_def.extract(
  x=X_train_s, 
  y=y_train_s, 
  thieved_classifier=model_stolen_def
)

  perturbation_r = self.beta * (sigmoid(-self.gamma * np.log((1.0 - preds_clipped) / preds_clipped)) - 0.5)


Train on 1718 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
score_original = classifier_def._model.evaluate(
  x=X_test_s,
  y=y_test_s
)

score_stolen = stolen_classifier_def._model.evaluate(
  x=X_test_s,
  y=y_test_s
)

# Comparing test losses
print(f"Original test loss: {score_original[0]:.2f} "  f"vs stolen test loss: {score_stolen[0]:.2f}")

# Comparing test accuracies
print(f"Original test accuracy: {score_original[1]:.2f} " f"vs stolen test accuracy: {score_stolen[1]:.2f}")

Original test loss: 0.92 vs stolen test loss: 12.01
Original test accuracy: 0.89 vs stolen test accuracy: 0.05


## Descripcion de resultados
Como se observan en los datos, se puede concluir que la defensa del modelo fue un exito. Debido a que al no tener defensa se pudo tener una precision de 0.51, mientras que al tener la capa de seguridad el robado tiene una precision de 0.05. Concluyendo asi que el ReverseSigmoid tuvo exito en deter el ataque de extraccion de modelo.