# Membership inference attack with images
## Target a CNN
Authors : Johan Jublanc

We use this article to simulate a membership inference attack : https://arxiv.org/pdf/1807.09173.pdf

Usefull reference : https://medium.com/disaitek/demystifying-the-membership-inference-attack-e33e510a0c39

### Imports

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets

from os import listdir
from os.path import isfile, join

import urllib.request

import tarfile

from sklearn import metrics
import xgboost as xgb

from scipy.ndimage import rotate

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.optimizers import SGD

import matplotlib.pyplot as plt

from dp_optimizer_fn import make_gaussian_optimizer_class
from dp_optimizer_fn import make_optimizer_class
import tensorflow as tf

import tensorflow as tf
import IPython.display as display
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import pathlib

In [None]:
tf.__version__

In [None]:
from tensorflow.keras.optimizers import RMSprop
import tensorflow_privacy

In [295]:
dir(tensorflow_privacy)

['DPAdagradGaussianOptimizer',
 'DPAdagradOptimizer',
 'DPAdamGaussianOptimizer',
 'DPAdamOptimizer',
 'DPGradientDescentGaussianOptimizer',
 'DPGradientDescentOptimizer',
 'DPQuery',
 'GaussianAverageQuery',
 'GaussianSumQuery',
 'GaussianSumQueryEntry',
 'NestedQuery',
 'NoPrivacyAverageQuery',
 'NoPrivacySumQuery',
 'NormalizedQuery',
 'PrivacyLedger',
 'QuantileAdaptiveClipAverageQuery',
 'QuantileAdaptiveClipSumQuery',
 'QueryWithLedger',
 'SampleEntry',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'absolute_import',
 'division',
 'print_function',
 'privacy',
 'sys']

In [None]:
#opt = tensorflow_privacy.DPGradientDescentGaussianOptimizer

In [None]:
#opt(learning_rate=0.1, l2_norm_clip=1, noise_multiplier=10)

In [None]:
# tf.__version__ == 2.x
tf.__version__

In [None]:
tf.random.set_seed(1717)

## Get the data from cifar10

We use the CIFAR10 data which is a dataset of color images of size 32x32. For more information let's go here :
- https://www.cs.toronto.edu/~kriz/cifar.html

CIFAR10 data are splited in batches. For this example the first batche is used to build up a classifier and the second one will be used to build up the attack.

In [None]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [None]:
def get_data():
    url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    data_dir = tf.keras.utils.get_file(origin=url, fname='cifar10', untar=True)

    root_keras_data_path = "/".join(data_dir.split("/")[:5])
    cifar_data_path = os.path.join(root_keras_data_path, "cifar-10-batches-py")

    data_batches_names = []
    for item in os.listdir(cifar_data_path):
        if item.startswith("data_batch"):
            data_batches_names.append(item)
    
    print("Files used to build the data list : ")
    
    data = []
    for data_batches_name in data_batches_names:
        data.append(unpickle(os.path.join(cifar_data_path, data_batches_name)))
        print(data_batches_name)
    
    return data

In [None]:
data = get_data()

__Split data__

We firstly build a model that is trained on the dataset $data_b$, the dataset $data_a$ is used to evaluate the attack.

In [None]:
x_a = data[0][b"data"]
y_a = data[0][b"labels"]

x_b = data[1][b"data"]
y_b = data[1][b"labels"]

__Get a shadow dataset__

Here the attacker knows another dataset that is similar to D. Here we use batch 2.

In [None]:
x_prim_in = data[3][b"data"]
y_prim_in = data[3][b"labels"]

__Batch 3 is used to get intput out of scope used to train the shadow model__

In [None]:
x_prim_out = data[4][b'data']
y_prim_out = data[4][b'labels']

__Define training parameters__

In [None]:
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224
SHUFFLE_SIZE = 200
NUM_EPOCHS = 100

In [None]:
input_shape = (32, 32, 3)
num_classes = 10
len_train = 10000

In [None]:
dropout = False
noise_multiplier = 2

In [None]:
train_target = True
if noise_multiplier > 0:
    train_shadow = False
else:
    train_shadow = True

In [None]:
def get_model_name_patterns(dropout, noise_multiplier):
    pattern_target = "models/target" + dropout*"WithDropOut" + "DP" + str(noise_multiplier) + "_*.h5"
    pattern_shadow = "models/shadow" + dropout*"WithDropOut" + "_*.h5"
    pattern_graphs_loss = "graphs/graphLoss" + dropout*"WithDropOut" + "DP" + str(noise_multiplier) + ".png"
    pattern_graphs_accuracy = "graphs/graphAccuracy" + dropout*"WithDropOut" + "DP" + str(noise_multiplier) + ".png"
    print(pattern_target)
    print(pattern_shadow)
    print(pattern_graphs_loss)
    print(pattern_graphs_accuracy)
    return pattern_target, pattern_shadow, pattern_graphs_loss, pattern_graphs_accuracy

In [None]:
pattern_target, pattern_shadow, pattern_graphs_loss, pattern_graphs_accuracy = get_model_name_patterns(dropout, noise_multiplier)

## Function constituting the whole pipeline :
* reshape images
* create the decay callback
* create a MapDataset
* plt example images
* create a target
* load and save trained models

In [None]:
def reshape_images(flat_array):
    flat_array_normalized = tf.cast(flat_array, tf.float32) / 255.
    img_reshaped = tf.reshape(flat_array_normalized, (3, 32, 32))
    return tf.transpose(img_reshaped)

In [None]:
def scheduler(epoch):
    cycle = epoch // 50
    
    if epoch < 10:
        return 0.005
    if cycle == 0:
        return 0.001
    elif cycle <= 2:
        return 0.0001
    else:
        return 0.00005

In [None]:
from dp_optimizer_fn import LearningRateScheduler_Perso
callback = LearningRateScheduler_Perso(scheduler)

We define a function to create a dataset tensorflow

In [None]:
def input_fn(flat_arrays, labels, 
             BATCH_SIZE = BATCH_SIZE, 
             SHUFFLE_SIZE = SHUFFLE_SIZE, 
             NUM_EPOCHS = NUM_EPOCHS):
    ds_x = tf.data.Dataset.from_tensor_slices(flat_arrays)
    ds_x = ds_x.map(reshape_images)
    ds_y = tf.data.Dataset.from_tensor_slices(labels)
    ds_x_y = tf.data\
               .Dataset\
               .zip((ds_x, ds_y))\
               .shuffle(SHUFFLE_SIZE)\
               .repeat()\
               .batch(BATCH_SIZE)\
               .prefetch(1)
    return ds_x_y

In [None]:
def plt_img_labels(img_batch, label_batch):
    plt.figure(figsize=(10,10))

    for n in range(25):
        ax = plt.subplot(5,5,n+1)
        img = rotate(img_batch[n], -90)
        plt.imshow(img)
        plt.title(str(label_batch[n].numpy()))
        plt.axis('off')

A first model is trained on 80% of the $data_b$ and test on the 20% left
We use this article to build a quite good model : https://machinelearningmastery.com/how-to-develop-a-cnn-from-scratch-for-cifar-10-photo-classification/

In [None]:
def create_model(input_shape, dropout=dropout):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), 
                     activation='relu', 
                     kernel_initializer='he_uniform', 
                     padding='same', 
                     input_shape=input_shape))
    model.add(Conv2D(32, (3, 3), activation='relu', 
                     kernel_initializer='he_uniform', 
                     padding='same'))
    model.add(MaxPooling2D((2, 2)))
    if dropout :
        model.add(Dropout(0.2))
        
    model.add(Conv2D(64, (3, 3), activation='relu', 
                     kernel_initializer='he_uniform', 
                     padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu', 
                     kernel_initializer='he_uniform', 
                     padding='same'))
    model.add(MaxPooling2D((2, 2)))
    if dropout:
        model.add(Dropout(0.2))
        
    model.add(Conv2D(128, (3, 3), activation='relu', 
                     kernel_initializer='he_uniform', 
                     padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu', 
                     kernel_initializer='he_uniform', 
                     padding='same'))
    model.add(MaxPooling2D((2, 2)))
    if dropout:
        model.add(Dropout(0.2))
        
    model.add(Flatten())
    model.add(Dense(128, activation='relu', 
                    kernel_initializer='he_uniform'
                   ))
    if dropout :
        model.add(Dropout(0.2))
    model.add(Dense(10, activation='softmax'))
    
    return model

__Load/Save model__

In [None]:
if "models" not in os.listdir():
    os.mkdir("models")

In [None]:
def return_model_num(pattern):
    target_models_list = glob.glob(pattern)
    num_list = [x.split("/")[1].split(".")[-2].split("_")[1] for x in target_models_list]
    num_list_int = [int(x) for x in num_list]
    return np.max(num_list_int)

In [None]:
def save_new_model(pattern, model):
    if len(glob.glob(pattern))==0:
        model.save(pattern.split("_")[0] + "_0.h5")
    else:
        num = return_model_num(pattern) + 1
        model.save(pattern.replace("*", str(num)))

In [None]:
def get_optimizer(noise_multiplier, l2_norm_clip=1):
    if noise_multiplier > 0 :
        # Use differentrial privacy
        GradientDescentOptimizer = tf.compat.v1.train.MomentumOptimizer
        DPGradientDescentGaussianOptimizer = make_gaussian_optimizer_class(GradientDescentOptimizer)
        optimizer = DPGradientDescentGaussianOptimizer(l2_norm_clip=1,
                                                       noise_multiplier=noise_multiplier,
                                                       learning_rate=0.001,
                                                       momentum=0.9)
    else:
        optimizer = tf.compat.v1.train.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
    return optimizer

In [None]:
def model_compile(model, optimizer, metric='sparse_categorical_accuracy', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)):
    print("Manual compilation of the model")
    print("Optimizer : {}".format(optimizer))
    model.compile(loss=loss,
                  optimizer=optimizer,
                  metrics=[metric])
    return model

In [None]:
def get_new_model(input_shape, noise_multiplier, l2_norm_clip=1, dropout=dropout):
    model = create_model(input_shape, dropout=dropout)
    optimizer = get_optimizer(noise_multiplier, l2_norm_clip)
    model = model_compile(model, optimizer)
    
    return model

In [None]:
def load_model(pattern, noise_multiplier):
    num = return_model_num(pattern)
    filepath = pattern.replace("*", str(num))
    
    
    model = tf.keras.models.load_model(filepath)
    print("\nModel retrieved from the file : {}\n".format(filepath))
    
    optimizer = get_optimizer(noise_multiplier)
    model = model_compile(model, optimizer)
    
    return model

## Train target model

__Create dataset objects__

In [None]:
ds_xy_b = input_fn(x_b, y_b)
ds_xy_a = input_fn(x_a, y_a)

__Plot some example__

In [None]:
len_train = len(y_b)
img_batch, label_batch = next(iter(ds_xy_b))
plt_img_labels(img_batch, label_batch)

__Load pretrained or create a new model__

In [None]:
def train_and_save_model(data_train,
                         data_test,
                         pattern, 
                         input_shape=input_shape, 
                         noise_multiplier=noise_multiplier,
                        ):
    
    ##################
    #create the model#
    ##################
    model = get_new_model(input_shape, noise_multiplier=noise_multiplier)
    score = model.evaluate(data_test, steps=200, verbose=0)

    print("\n\nPERFORMANCES BEFORE TRAINNING: ")
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    
    #######
    #train#
    #######
    history=None
    history = model.fit(data_train,
                        epochs=NUM_EPOCHS,
                        steps_per_epoch=len_train//BATCH_SIZE,
                        verbose=1,
                        validation_data=data_test,
                        validation_steps=500,
                        #callbacks=[callback]
                       )

    ######
    #save#
    ######
    save_new_model(pattern, model)
    
    return history

In [None]:
if train_target:
    history = train_and_save_model(data_train=ds_xy_b, 
                                   data_test=ds_xy_a,
                                   pattern=pattern_target,
                                   noise_multiplier=noise_multiplier)

In [None]:
target_model = load_model(pattern_target, noise_multiplier)

In [None]:
score = target_model.evaluate(ds_xy_a, steps=2000, verbose=0)
    
print("\n\nPERFORMANCES AFTER TRAINNING: ")
print('Test loss:', score[0])
print('Test accuracy:', score[1])
print("\n")

## Train the shadow model

In [None]:
ds_prim_in = input_fn(x_prim_in, y_prim_in)
ds_prim_out = input_fn(x_prim_out, y_prim_out)

In [None]:
if train_shadow:
    history_shadow = train_and_save_model(data_train=ds_prim_in, 
                                   data_test=ds_prim_out,
                                   pattern=pattern_shadow,
                                   noise_multiplier=noise_multiplier)

In [None]:
pattern_shadow

In [None]:
model_shadow = load_model(pattern_shadow, noise_multiplier=0)
model_shadow.summary()
score = model_shadow.evaluate(ds_prim_out, steps=200, verbose=0)

print("\n\nPERFORMANCES BEFORE TRAINING: ")
print('Test loss:', score[0])
print('Test accuracy:', score[1])

## Compare learning process

In [None]:
if "graphs" not in os.listdir():
    os.mkdir("graphs")

In [None]:
if train_shadow:
    plt.plot(history_shadow.history["loss"], label="shadow model")
plt.plot(history.history["loss"], label="target model")
plt.legend()
plt.title("LOSS")
plt.savefig(pattern_graphs_loss)

plt.show()

In [None]:
if train_shadow:
    plt.plot(history_shadow.history["val_sparse_categorical_accuracy"], label="shadow model")
plt.plot(history.history["val_sparse_categorical_accuracy"], label="target model")
plt.legend()
plt.xlabel("Epcohs")
plt.title("ACCURACY (VAL)")
plt.savefig(pattern_graphs_accuracy)
plt.show()

## Build up the attack

__Build a dataset $D^*$ to train the attack__

Now that we have trained our model on the "in" part of the data, we can make a prediction on both dataset's parts ("in" and "out") a labelise the results. The new dataset is named $D*$

In [None]:
def input_fn_pred(x):
    ds_x = tf.data.Dataset.from_tensor_slices(x)\
                                  .map(reshape_images)\
                                  .batch(x.shape[0])
    return ds_x

In [None]:
ds_x_prim_in = input_fn_pred(x_prim_in)
ds_x_prim_out = input_fn_pred(x_prim_out)

In [None]:
x_star_in = model_shadow.predict(ds_x_prim_in)
y_star_in = [1 for i in range(len(x_star_in))]

x_star_out = model_shadow.predict(ds_x_prim_out)
y_star_out = [0 for i in range(len(x_star_out))]

In [None]:
x_star = np.concatenate([x_star_in, x_star_out], axis=0)
y_star = np.concatenate([y_star_in, y_star_out], axis=0)

In [None]:
from sklearn.model_selection import train_test_split
x_star_train, \
x_star_test, \
y_star_train, \
y_star_test = train_test_split(x_star, y_star, test_size =.2)

__Create XGBOOST attack model__

ref : https://www.datacamp.com/community/tutorials/xgboost-in-python#apply

In [None]:
clf_attack  = xgb.XGBClassifier(objective ='reg:squarederror',
                                colsample_bytree = 0.8,
                                learning_rate = 0.01,
                                max_depth = 5,
                                alpha = 10,
                                n_estimators = 20)

clf_attack.fit(x_star_train, y_star_train)
print("Accuracy:", metrics.accuracy_score(y_star_test, clf_attack.predict(x_star_test)))

In [None]:
clf_attack  = xgb.XGBClassifier(objective ='reg:squarederror',
                                colsample_bytree = 0.3,
                                learning_rate = 0.1,
                                max_depth = 5,
                                alpha = 10,
                                n_estimators = 20)
clf_attack.fit(x_star,y_star)

# Test the attack against the true data set D

In [None]:
def get_predictions_and_labels(target_model, attack_model, data, label):
    
    ds_data = input_fn_pred(data)
    
    # Information we have thanks to the API (original model)
    probas   = target_model.predict(ds_data)

    # Model we have trained to make the attack
    prediction = clf_attack.predict(probas)

    # Results zipping prediction an true labels
    result  = pd.DataFrame(zip(prediction, [label for i in range(len(probas))]), 
                           columns = ("y_pred", "y"))
    
    return result

Results for images out of the training dataset

In [None]:
results_a = get_predictions_and_labels(target_model = target_model, 
                                       attack_model=clf_attack, 
                                       data=x_a, label=0)

Results for images in the training dataset

In [None]:
results_b = get_predictions_and_labels(target_model = target_model, 
                                       attack_model=clf_attack,
                                       data=x_b, label=1)

Measure the accuracy of the attack

In [None]:
attack_results = pd.concat([results_a, results_b]).reset_index().drop("index", axis=1)

In [None]:
print("Accuracy:", metrics.accuracy_score(attack_results["y"], attack_results["y_pred"]))

## save results

In [None]:
result_dict = {
    "dataset" : ["cifar10"],
    "attack_model" : ["XGBoost"],
    "accuracy_target" : [target_model.evaluate(ds_xy_a, steps=200, verbose=0)[1]],
    "accuracy_shadow" : [model_shadow.evaluate(ds_prim_out, steps=200, verbose=0)[1]],
    "accurracy_attack" : [metrics.accuracy_score(attack_results["y"], attack_results["y_pred"])],
    "DP_multiplicator" : [noise_multiplier],
    "droupout" : [dropout],
    "num_epochs" : [NUM_EPOCHS]
}

In [None]:
result_pd = pd.DataFrame(result_dict)
result_pd

TODO : trouver un moyen de sauvegarder les learning rate

In [None]:
if "results.csv" in os.listdir("models"):
    main_pd = pd.read_csv("models/results.csv", index_col=0)
    result_pd = pd.concat([main_pd, result_pd], ignore_index=True)

In [None]:
result_pd

In [None]:
go = input("do you want to save the results ?")

In [None]:
if go.lower().startswith("y"):
    print("Results saved")
    result_pd.to_csv("models/results.csv")

# Retrieve and interpret the results

In [None]:
#def convert_string_list(string_list):
#    list_ = string_list.replace("[","")\
#                      .replace("]","")\
#                      .split(',')
#    list_ = [float(x) for x in list_]
#    return list_

In [None]:
result_pd[:1].to_csv("models/results.csv")

In [None]:
results_pd = pd.read_csv("models/results.csv", index_col=0)