# Membership inference attack with images
## Target a CNN
Authors : Johan Jublanc

We use this article to simulate a membership inference attack : https://arxiv.org/pdf/1807.09173.pdf

Usefull reference : https://medium.com/disaitek/demystifying-the-membership-inference-attack-e33e510a0c39

### Imports

In [None]:
import os
import glob

# basics
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

# tensorflow privacy
from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy
from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPAdamGaussianOptimizer

# tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam

# sklearn and xgb
from sklearn import metrics
from sklearn.model_selection import train_test_split
import xgboost as xgb

In [None]:
# tf.__version__ == 2.x
tf.__version__

# Data

We use the CIFAR10 data which is a dataset of color images of size 32x32. For more information let's go here :
- https://www.cs.toronto.edu/~kriz/cifar.html

CIFAR10 data are splited in batches. For this example the first batche is used to build up a classifier and the second one will be used to build up the attack.

In [None]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def get_data():
    url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    data_dir = tf.keras.utils.get_file(origin=url, fname='cifar10', untar=True)

    root_keras_data_path = "/".join(data_dir.split("/")[:5])
    cifar_data_path = os.path.join(root_keras_data_path, "cifar-10-batches-py")

    data_batches_names = []
    for item in os.listdir(cifar_data_path):
        if item.startswith("data_batch"):
            data_batches_names.append(item)
    
    print("Files used to build the data list : ")
    
    data = []
    for data_batches_name in data_batches_names:
        data.append(unpickle(os.path.join(cifar_data_path, data_batches_name)))
        print(data_batches_name)
    
    return data

data = get_data()

def process_data(data):
    flat_array_normalized = data / 255.
    img_reshaped = np.reshape(flat_array_normalized, (10000, 3, 32, 32))
    data = np.transpose(img_reshaped, (0, 2, 3, 1))
    return data

#############################
# Data for the target model #
#############################

x_a = data[0][b"data"]
y_a = data[0][b"labels"] #[sparse_labels(x) for x in data[0][b"labels"]]

x_b = data[1][b"data"]
y_b = data[1][b"labels"] #[sparse_labels(x) for x in data[1][b"labels"]]

x_a = process_data(x_a)
x_b = process_data(x_b)

y_a = tf.keras.utils.to_categorical(y_a, num_classes=10)
y_b = tf.keras.utils.to_categorical(y_b, num_classes=10)

#######################
# Data for the attack #
#######################

x_prim_in = data[3][b"data"]
y_prim_in = data[3][b"labels"]

x_prim_out = data[4][b'data']
y_prim_out = data[4][b"labels"]

x_prim_in = process_data(x_prim_in)
x_prim_out = process_data(x_prim_out)

y_prim_in = tf.keras.utils.to_categorical(y_prim_in, num_classes=10)
y_prim_out = tf.keras.utils.to_categorical(y_prim_out, num_classes=10)

# Parameters

In [None]:
epochs = 15
batch_size = 250

input_shape = x_a[0].shape

l2_norm_clip = 1.5
noise_multiplier = 0 #1.5
num_microbatches = 250
learning_rate = 0.001

if batch_size % num_microbatches != 0:
  raise ValueError('Batch size should be an integer multiple of the number of microbatches')

dropout = True

train_target = True
if noise_multiplier > 0:
    train_shadow = False
else:
    train_shadow = True

In [None]:
def get_model_name_patterns(dropout, noise_multiplier):
    pattern_target = "models/target" + dropout*"WithDropOut" + "DP" + str(noise_multiplier) + "_*.h5"
    pattern_shadow = "models/shadow" + dropout*"WithDropOut" + "_*.h5"
    pattern_graphs_loss = "graphs/graphLoss" + dropout*"WithDropOut" + "DP" + str(noise_multiplier) + ".png"
    pattern_graphs_accuracy = "graphs/graphAccuracy" + dropout*"WithDropOut" + "DP" + str(noise_multiplier) + ".png"
    print(pattern_target)
    print(pattern_shadow)
    print(pattern_graphs_loss)
    print(pattern_graphs_accuracy)
    return pattern_target, pattern_shadow, pattern_graphs_loss, pattern_graphs_accuracy

In [None]:
pattern_target, pattern_shadow, pattern_graphs_loss, pattern_graphs_accuracy = get_model_name_patterns(dropout, noise_multiplier)

# Plot examples

In [None]:
def plt_img_labels(x, y):
    plt.figure(figsize=(10,10))

    for n in range(25):
        ax = plt.subplot(5,5,n+1)
        #img = rotate(img_batch[n], -90)
        plt.imshow(x[n])
        plt.title(np.argmax(y[n]))
        plt.axis('off')

In [None]:
plt_img_labels(x_a, y_b)

# Train and save functions

In [None]:
def create_model(input_shape, dropout):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
    model.add(MaxPooling2D((2, 2)))
    if dropout:
        model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    if dropout:
        model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    return model

__Load/Save model__

In [None]:
if "models" not in os.listdir():
    os.mkdir("models")

In [None]:
def return_model_num(pattern):
    target_models_list = glob.glob(pattern)
    num_list = [x.split("/")[1].split(".")[-2].split("_")[1] for x in target_models_list]
    num_list_int = [int(x) for x in num_list]
    return np.max(num_list_int)

In [None]:
def save_new_model(pattern, model):
    if len(glob.glob(pattern))==0:
        model.save(pattern.split("_")[0] + "_0.h5")
    else:
        num = return_model_num(pattern) + 1
        model.save(pattern.replace("*", str(num)))

In [None]:
def get_optimizer(noise_multiplier, 
                  l2_norm_clip,
                  num_microbatches,
                  learning_rate):
    if noise_multiplier > 0 :
        optimizer = DPAdamGaussianOptimizer(l2_norm_clip=l2_norm_clip,
                                            noise_multiplier=noise_multiplier,
                                            num_microbatches=num_microbatches,
                                            learning_rate=learning_rate)
    else:
        optimizer = Adam(learning_rate=learning_rate)
    return optimizer

In [None]:
def model_compile(model, optimizer):
    print("Manual compilation of the model")
    print("Optimizer : {}".format(optimizer))
    model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, reduction=tf.losses.Reduction.NONE),
                  optimizer=optimizer,
                  metrics=['categorical_accuracy'])
    return model

In [None]:
def get_new_model(input_shape, 
                  noise_multiplier, 
                  l2_norm_clip,
                  num_microbatches,
                  learning_rate,
                  dropout):
    
    model = create_model(input_shape, dropout)
    optimizer = get_optimizer(noise_multiplier, 
                              l2_norm_clip,
                              num_microbatches,
                              learning_rate)
    model = model_compile(model, optimizer)
    
    return model

In [None]:
def load_model(pattern, 
               noise_multiplier, 
               l2_norm_clip,
               num_microbatches,
               learning_rate):
    
    num = return_model_num(pattern)
    filepath = pattern.replace("*", str(num))
    
    model = tf.keras.models.load_model(filepath)
    print("\nModel retrieved from the file : {}\n".format(filepath))
    optimizer = get_optimizer(noise_multiplier, 
                              l2_norm_clip,
                              num_microbatches,
                              learning_rate)
    model = model_compile(model, optimizer)
    
    return model

## Train target model

__Load pretrained or create a new model__

In [None]:
def train_and_save_model(data_train,
                         data_test,
                         num_epochs,
                         batch_size,
                         pattern, 
                         input_shape, 
                         noise_multiplier, 
                         l2_norm_clip,
                         num_microbatches,
                         learning_rate,
                         dropout
                        ):
    
    ##################
    #create the model#
    ##################
    model = get_new_model(input_shape, noise_multiplier, 
                                       l2_norm_clip,
                                       num_microbatches,
                                       learning_rate,
                                       dropout)
    score = model.evaluate(data_test[0][:1000], data_test[1][:1000], batch_size=500, verbose=0)

    print("\n\nPERFORMANCES BEFORE TRAINNING: ")
    print('Test loss:', np.mean(score[0]))
    print('Test accuracy:', score[1])
    
    #######
    #train#
    #######
    history=None
    history = model.fit(data_train[0], data_train[1],
                        epochs=num_epochs,
                        batch_size=batch_size,
                        verbose=1,
                        validation_data=data_test)

    ######
    #save#
    ######
    save_new_model(pattern, model)
    
    return history

In [None]:
if train_target:
    history_target = train_and_save_model((x_b,  y_b),
                                          (x_a, y_a),
                                          epochs,
                                          batch_size,
                                          pattern_target, 
                                          input_shape, 
                                          noise_multiplier, 
                                          l2_norm_clip,
                                          num_microbatches,
                                          learning_rate,
                                          dropout
                                         )

In [None]:
target_model = load_model(pattern_target,
                          noise_multiplier, 
                          l2_norm_clip,
                          num_microbatches,
                          learning_rate)

In [None]:
score = target_model.evaluate(x_a, y_a, batch_size=500, verbose=0)
    
print("\n\nPERFORMANCES AFTER TRAINNING: ")
print('Test loss:', np.mean(score[0]))
print('Test accuracy:', score[1])
print("\n")

## Train the shadow model

In [None]:
if train_shadow:
    history_shadow = train_and_save_model((x_prim_in,  y_prim_in),
                                          (x_prim_out, y_prim_out),
                                          epochs,
                                          batch_size,
                                          pattern_shadow, 
                                          input_shape, 
                                          noise_multiplier=0, 
                                          l2_norm_clip=l2_norm_clip,
                                          num_microbatches=num_microbatches,
                                          learning_rate=learning_rate,
                                          dropout=dropout)

In [None]:
model_shadow = load_model(pattern_shadow, 
                          noise_multiplier=0,
                          l2_norm_clip=l2_norm_clip,
                          num_microbatches=num_microbatches,
                          learning_rate=learning_rate)
model_shadow.summary()
score = model_shadow.evaluate(x_prim_out, y_prim_out, batch_size=500, verbose=0)

print("\n\nPERFORMANCES BEFORE TRAINING: ")
print('Test loss:', np.mean(score[0]))
print('Test accuracy:', score[1])

## Compare learning process

In [None]:
if "graphs" not in os.listdir():
    os.mkdir("graphs")

In [None]:
def reduce_loss(val_loss):
    val_result = []
    for loss in val_loss:
        val_result.append(np.mean(loss))
    return val_result

In [None]:
if train_shadow:
    plt.plot(reduce_loss(history_shadow.history["val_loss"]), label="shadow model")
plt.plot(reduce_loss(history_target.history["val_loss"]), label="target model")
plt.legend()
plt.title("LOSS (val)")
plt.savefig(pattern_graphs_loss)

plt.show()

In [None]:
if train_shadow:
    plt.plot(history_shadow.history["categorical_accuracy"], label="shadow model")
plt.plot(history_target.history["categorical_accuracy"], label="target model")
plt.legend()
plt.xlabel("Epcohs")
plt.title("ACCURACY (VAL)")
plt.savefig(pattern_graphs_accuracy)
plt.show()

## Build up the attack

__Build a dataset $D^*$ to train the attack__

Now that we have trained our model on the "in" part of the data, we can make a prediction on both dataset's parts ("in" and "out") a labelise the results. The new dataset is named $D*$

In [None]:
def input_fn_pred(x):
    ds_x = tf.data.Dataset.from_tensor_slices(x)\
                                  .map(reshape_images)\
                                  .batch(x.shape[0])
    return ds_x

In [None]:
x_star_in = model_shadow.predict(x_prim_in)
y_star_in = [1 for i in range(len(x_star_in))]

x_star_out = model_shadow.predict(x_prim_out)
y_star_out = [0 for i in range(len(x_star_out))]

In [None]:
x_star = np.concatenate([x_star_in, x_star_out], axis=0)
y_star = np.concatenate([y_star_in, y_star_out], axis=0)

In [None]:
from sklearn.model_selection import train_test_split
x_star_train, \
x_star_test, \
y_star_train, \
y_star_test = train_test_split(x_star, y_star, test_size =.2)

__Create XGBOOST attack model__

ref : https://www.datacamp.com/community/tutorials/xgboost-in-python#apply

In [None]:
clf_attack  = xgb.XGBClassifier(objective ='reg:squarederror',
                                colsample_bytree = 0.8,
                                learning_rate = 0.01,
                                max_depth = 5,
                                alpha = 10,
                                n_estimators = 20)

clf_attack.fit(x_star_train, y_star_train)
print("Accuracy:", metrics.accuracy_score(y_star_test, clf_attack.predict(x_star_test)))

In [None]:
clf_attack  = xgb.XGBClassifier(objective ='reg:squarederror',
                                colsample_bytree = 0.3,
                                learning_rate = 0.1,
                                max_depth = 5,
                                alpha = 10,
                                n_estimators = 20)
clf_attack.fit(x_star,y_star)

# Test the attack against the true data set D

In [None]:
def get_predictions_and_labels(target_model, attack_model, data, label):
    
    # Information we have thanks to the API (original model)
    probas   = target_model.predict(data)

    # Model we have trained to make the attack
    prediction = clf_attack.predict(probas)

    # Results zipping prediction an true labels
    result  = pd.DataFrame(zip(prediction, [label for i in range(len(probas))]), 
                           columns = ("y_pred", "y"))
    
    return result

Results for images out of the training dataset

In [None]:
results_a = get_predictions_and_labels(target_model = target_model, 
                                       attack_model=clf_attack, 
                                       data=x_a, label=0)

Results for images in the training dataset

In [None]:
results_b = get_predictions_and_labels(target_model = target_model, 
                                       attack_model=clf_attack,
                                       data=x_b, label=1)

Measure the accuracy of the attack

In [None]:
attack_results = pd.concat([results_a, results_b]).reset_index().drop("index", axis=1)

In [None]:
print("Accuracy:", metrics.accuracy_score(attack_results["y"], attack_results["y_pred"]))

## save results

In [None]:
result_dict = {
    "dataset" : ["cifar10"],
    "attack_model" : ["XGBoost"],
    "accuracy_target" : [target_model.evaluate(x_a[:1000], y_a[:1000], batch_size=1000, verbose=0)[1]],
    "accuracy_shadow" : [model_shadow.evaluate(x_prim_out[:1000], y_prim_out[:1000], batch_size=1000, verbose=0)[1]],
    "accurracy_attack" : [metrics.accuracy_score(attack_results["y"], attack_results["y_pred"])],
    "DP_multiplicator" : [noise_multiplier],
    "droupout" : [dropout],
    "learning_rate" : [learning_rate],
    "num_epochs" : [epochs]
}

In [None]:
result_pd = pd.DataFrame(result_dict)
result_pd

In [None]:
if "results.csv" in os.listdir("models"):
    main_pd = pd.read_csv("models/results.csv", index_col=0)
    result_pd = pd.concat([main_pd, result_pd], ignore_index=True, sort=False)

In [None]:
result_pd

In [None]:
go = input("do you want to save the results ?")

In [None]:
if go.lower().startswith("y"):
    print("Results saved")
    result_pd.to_csv("models/results.csv")

# Retrieve and interpret the results

In [None]:
results_pd = pd.read_csv("models/results.csv", index_col=0)

In [None]:
results_pd