# Inversion Attack   

**Goal:** Give a controlled environment to perform an inversion attack and identify what data is in the training set. Should be tested with and without Differential Privacy in the model. Based on: https://arxiv.org/pdf/1610.05820v2.pdf    
**TODO:** Refactor to be more generalized.

In [None]:
from __future__ import absolute_import, division, print_function

import sys
import logging
import math
import datetime
import random
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import tensorflow_privacy
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from tensorflow_privacy.privacy.analysis.rdp_accountant import (
    compute_rdp,
    get_privacy_spent,
)
from tensorflow_privacy.privacy.optimizers import dp_optimizer
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasSGDOptimizer

In [None]:
# Load the pre-generated csv file (separated by semi-colons)
df = pd.read_csv("student-mat.csv", sep=";")
list(df)

In [None]:
# Configue the columns to use, can omit some but we are using all of them
y_actual = df.G3
# Any columns to be removed
exclude_var = ['G3']#,'school','sex','address','famsize','Pstatus','Mjob','Fjob','reason','guardian','schoolsup','famsup','paid','activities','nursery','higher','internet',
              #'romantic','goout']
# Remove all specified columns
df = df.drop(columns=exclude_var)
# Encode the values appropriately
df_encode = pd.get_dummies(df)
print(list(df_encode))
# Define the variable to be used later, can be refactored
X_actual = df_encode
# Bin the grades to be a binary problem
ybin = np.asarray(y_actual)
ybin[ybin < 10] = 0
ybin[ybin >= 10] = 1
print(ybin)

In [None]:
# Display the data
X_actual.head()

In [None]:
# min-max normalization
X_actual_min = np.min(X_actual)
X_actual_max = np.max(X_actual)
#X_actual = (X_actual - np.min(X_actual)) / (np.max(X_actual) - np.min(X_actual)).values

In [None]:
def normalize_data(data, actual_min, actual_max):
    """
    Normalizes data within numpy arrays.
    """
    return (data - actual_min) / (actual_max - actual_min).values

def normalize_individual_data(data, actual_min, actual_max):
    """
    Normalizes individual values, outside of numpy arrays.
    """
    return (data - actual_min) / (actual_max - actual_min)

In [None]:
# Normalize the data used
X_actual = normalize_data(X_actual, X_actual_min, X_actual_max)

In [None]:
# Define the train and test sets
x_train, x_test, y_train, y_test = train_test_split(X_actual, ybin, test_size=0.2, random_state=0)

# get validation dataset
x_train, x_valid, y_train, y_valid = train_test_split(
    x_train, y_train, test_size=0.2, random_state=0
)

# Get the shape
training_size = x_train.shape[0]

In [None]:
# from here https://github.com/VectorInstitute/PETs-Bootcamp/blob/main/DP_TensorFlowPrivacy/TFP_HeartDisease_KerasMLP_GridSearch.ipynb
def train(
    noise_multiplier,
    l2_norm_clip,
    batch_size,
    microbatches,
    x_train,
    y_train,
    dpsgd=True,
    learning_rate=0.1,
    epochs=150,
    model_dir=None,
    print_outputs=True,
    verbose=1,
):

    if dpsgd and batch_size % microbatches != 0:
        raise ValueError("Number of microbatches should divide evenly batch_size")

    # Define a sequential Keras model
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(40, input_dim=58, activation="relu"),
            tf.keras.layers.Dense(60, activation="relu"),
            tf.keras.layers.Dense(20, activation="relu"),
            #tf.keras.layers.Dense(1, activation="sigmoid"),
            # CHANGED: V2 for mia attack we need logit output
            tf.keras.layers.Dense(2, activation="sigmoid"),
        ]
    )

    if dpsgd:
        optimizer = DPKerasSGDOptimizer(
            l2_norm_clip=l2_norm_clip,
            noise_multiplier=noise_multiplier,
            #num_microbatches=microbatches, # COMMENTED OUT
            learning_rate=learning_rate,
        )
        loss = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.losses.Reduction.NONE)
        # CHANGED: Compute vector of per-example loss rather than its mean over a minibatch.
        #loss = tf.keras.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE)
        #loss = tf.keras.losses.BinaryCrossentropy(reduction=tf.losses.Reduction.NONE, from_logits=True) #V2

    else:
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
        # CHANGED
        loss = tf.keras.losses.SparseCategoricalCrossentropy()
        #loss = tf.keras.losses.BinaryCrossentropy(from_logits=True) #V2

    # F.MIA'S ATTACK USE THESE PARAMETERS
    '''
    # specify parameters
    optimizer = tf.keras.optimizers.Adam()
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    # compile the model
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    '''

    # Compile model with Keras
    model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])


    display(model.summary())

    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=log_dir, histogram_freq=1
    )

    # Train model with Keras
    history=model.fit(
        x_train,
        y_train,
        epochs=epochs,
        validation_data=(x_valid, y_valid),
        batch_size=batch_size,
        verbose=verbose,
        callbacks=[tensorboard_callback],
    )

    # plot accuracy for the first model
    plt.plot(history.history['accuracy'], label='acc')
    plt.plot(history.history['val_accuracy'], label='val_acc')
    plt.legend();
    plt.ylim(0, 1)

    score_train = model.evaluate(x_train, y_train, verbose=verbose)
    score_valid = model.evaluate(x_valid, y_valid, verbose=verbose)
    score_test = model.evaluate(x_test, y_test, verbose=verbose)

    weights = model.get_weights()

    # Compute the privacy budget expended.
    # // is integer division
    if dpsgd:
        eps = compute_epsilon(
            epochs * training_size // batch_size,
            training_size=training_size,
            noise_multiplier=noise_multiplier,
            batch_size=batch_size,
        )

    else:
        eps = "non-private SGD"

    if print_outputs:
        print(
            "\nhyperparamters: learning rate = "
            + str(learning_rate)
            + ", noise_multiplier = "
            + str(noise_multiplier)
            + ", l2_norm_clip = "
            + str(l2_norm_clip)
            + ", epochs = "
            + str(epochs)
            + ", batch_size = "
            + str(batch_size)
            + ", microbatches = "
            + str(microbatches)
        )

        print("  training loss: %.2f" % score_train[0])
        print("  training accuracy: %.2f" % score_train[1])

        print("  validation loss: %.2f" % score_valid[0])
        print("  validation accuracy: %.2f" % score_valid[1])

        print("  test loss: %.2f" % score_test[0])
        print("  test accuracy: %.2f" % score_test[1])
        
        Y_pred = model.predict(x_test)
        Y_pred = np.argmax(Y_pred, axis=1)#np.where(Y_pred > 0.5, 1,0)#np.argmax(Y_pred,axis=1)
        print(classification_report(y_test, Y_pred))   

        if dpsgd:
            print("For delta=0.00413223, the current epsilon is: %.2f" % eps)
        else:
            print("Trained with vanilla non-private SGD optimizer")

    return score_train, score_valid, score_test, eps, weights, Y_pred, model

In [None]:
def compute_epsilon(steps, training_size, noise_multiplier, batch_size):
    """
    Computes epsilon value for given hyperparameters.

    Parameters required:
      steps: Number of steps the optimizer takes over the training data
             steps = FLAGS.epochs * training_size// FLAGS.batch_size

      Noise multiplier:
          the amount of noise sampled and added to gradients during training
    """
    if noise_multiplier == 0.0:
        return float("inf")

    """ 
  Delta: for (epsilon, delta)-DP
    Delta bounds the probability of our privacy guarantee not holding.  
    rule of thumb for delta is to set it to less than the inverse of the training data size
    so I opted for it to equal to 1.1*training size
  """
    training_delta = 1 / (training_size * 1.1)

    """
  We need to define a list of orders, at which the Rényi divergence will be computed
  if you want epsilon between 1-10 and your delta is fixed
  your orders must cover the range between 1+ln(1/delta)/10 and 1+ln(1/delta)/1 
  """
    orders = np.linspace(
        1 + math.log(1.0 / training_delta) / 10,
        1 + math.log(1.0 / training_delta) / 1,
        num=100,
    )

    """ 
  Sampling ratio q:
    the probability of an individual training point being included in a minibatch
    sampling_probability = FLAGS.batch_size / training_size
  """
    sampling_probability = batch_size / training_size

    """ 
  compute Renyi Differential Privacy, a generalization of pure differential privacy
  RDP is well suited to analyze DP guarantees provided by sampling followed by Gaussian noise addition, 
  which is how gradients are randomized in the TFP implementation of the DP-SGD optimizer.
  """
    rdp = compute_rdp(
        q=sampling_probability,
        noise_multiplier=noise_multiplier,
        steps=steps,
        orders=orders,
    )

    return get_privacy_spent(orders, rdp, target_delta=training_delta)[0]

In [None]:
# Train a model, keeping this as the model to be attacked
training_outputs = train(
    noise_multiplier=1,
    l2_norm_clip=1,
    batch_size=22,
    microbatches=11,
    x_train=x_train,
    y_train=y_train,
    dpsgd=False,
    learning_rate=0.01,
    epochs=200,
    model_dir=None,
    print_outputs=True,
)

In [None]:
# Extract the target model
model = training_outputs[-1]
# Get the predictions for the
preds = model.predict(x_train)
preds_full = np.argmax(preds, axis=1)

# SYNTHETIC

In [None]:
class StudentRecord:
    """
    Very poorly programmed class for a student record in the dataset.
    Can be refactored to avoid hardcoding each value in the class.
    """
    def __init__(self, y_label, actual_min, actual_max):
        """
        Given a y label, and the min/max normalization values,
        generates a sample Student when initialized.
        """
        self.age = 15
        self.m_edu = 0
        self.f_edu = 0
        self.travel_time = 1
        self.study_time = 1
        self.failures = 1
        self.fam_rel = 1
        self.free_time = 1
        self.go_out = 1
        self.d_alc = 1
        self.w_alc = 1
        self.health = 1
        self.absences = 0
        self.g1 = 0
        self.g2 = 0
        self.school_GP = 0
        self.school_MS = 0
        self.sex_F = 0
        self.sex_M = 0
        self.address_R = 0
        self.address_U = 0
        self.fam_size_GT3 = 0
        self.fam_size_LE3 = 0
        self.p_status_a = 0
        self.p_status_t = 0
        self.m_job_at_home = 0
        self.m_job_health = 0
        self.m_job_other = 0
        self.m_job_services = 0
        self.m_job_teacher = 0
        self.f_job_at_home = 0
        self.f_job_health = 0
        self.f_job_other = 0
        self.f_job_services = 0
        self.f_job_teacher = 0
        self.reason_course = 0
        self.reason_home = 0
        self.reason_other = 0
        self.reason_reputation = 0
        self.guardian_father = 0
        self.guardian_mother = 0
        self.guardian_other = 0
        self.school_sup_no = 0
        self.school_sup_yes = 0
        self.fam_sup_no = 0
        self.fam_sup_yes = 0
        self.paid_no = 0
        self.paid_yes = 0
        self.activities_no = 0
        self.activities_yes = 0
        self.nursery_no = 0
        self.nursery_yes = 0
        self.higher_no = 0
        self.higher_yes = 0
        self.internet_no = 0
        self.internet_yes = 0
        self.romantic_no = 0
        self.romantic_yes = 0
        # Generate a default Student record
        self.generate_record(X_actual_min, X_actual_max)
        self.y_label = y_label
        
    def output_to_list(self):
        """
        Outputs the internal paramters as a list to be input to a 
        Machine Learning algorithm
        """
        return [self.age,
                self.m_edu,
                self.f_edu,
                self.travel_time,
                self.study_time,
                self.failures,
                self.fam_rel,
                self.free_time,
                self.go_out,
                self.d_alc,
                self.w_alc,
                self.health,
                self.absences,
                self.g1,
                self.g2,
                self.school_GP,
                self.school_MS,
                self.sex_F,
                self.sex_M,
                self.address_R,
                self.address_U,
                self.fam_size_GT3,
                self.fam_size_LE3,
                self.p_status_a,
                self.p_status_t,
                self.m_job_at_home,
                self.m_job_health,
                self.m_job_other,
                self.m_job_services,
                self.m_job_teacher,
                self.f_job_at_home,
                self.f_job_health,
                self.f_job_other,
                self.f_job_services,
                self.f_job_teacher,
                self.reason_course,
                self.reason_home,
                self.reason_other,
                self.reason_reputation,
                self.guardian_father,
                self.guardian_mother,
                self.guardian_other,
                self.school_sup_no,
                self.school_sup_yes,
                self.fam_sup_no,
                self.fam_sup_yes,
                self.paid_no,
                self.paid_yes,
                self.activities_no,
                self.activities_yes,
                self.nursery_no,
                self.nursery_yes,
                self.higher_no,
                self.higher_yes,
                self.internet_no,
                self.internet_yes,
                self.romantic_no,
                self.romantic_yes]
        
    def generate_record(self, actual_min, actual_max, specific_update=-1):
        '''
        Randomly instantiates a sample or randomly adjusts a specific parameter.
        Each value is normalized appropriately.
        
        Currently all hard-coded, should be refactored.
        
        Args:
            acutal_min: A numpy array of the minimum values from the dataset
            actual_max: A numpy array of the maximum values from the dataset
            specific_update: If -1, randomizes all, otherwise only randomizes the specific arguement
        '''
        if specific_update < 0 or specific_update == 0: self.age = self.normalize_arguement(np.random.randint(15, 23), actual_min[0], actual_max[0])
        if specific_update < 0 or specific_update == 1: self.m_edu = self.normalize_arguement(np.random.randint(0, 5), actual_min[1], actual_max[1])
        if specific_update < 0 or specific_update == 2: self.f_edu = self.normalize_arguement(np.random.randint(0, 5), actual_min[2], actual_max[2])
        if specific_update < 0 or specific_update == 3: self.travel_time = self.normalize_arguement(np.random.randint(1, 5), actual_min[3], actual_max[3])
        if specific_update < 0 or specific_update == 4: self.study_time = self.normalize_arguement(np.random.randint(1, 5), actual_min[4], actual_max[4])
        # Although failues should be [1, 4], only [1, 3] is in the dataset
        if specific_update < 0 or specific_update == 5: self.failures = self.normalize_arguement(np.random.randint(1, 4), actual_min[5], actual_max[5])
        if specific_update < 0 or specific_update == 6: self.fam_rel = self.normalize_arguement(np.random.randint(1, 6), actual_min[6], actual_max[6])
        if specific_update < 0 or specific_update == 7: self.free_time = self.normalize_arguement(np.random.randint(1, 6), actual_min[7], actual_max[7])
        if specific_update < 0 or specific_update == 7: self.go_out = self.normalize_arguement(np.random.randint(1, 6), actual_min[8], actual_max[8])
        if specific_update < 0 or specific_update == 8: self.d_alc = self.normalize_arguement(np.random.randint(1, 6), actual_min[9], actual_max[9])
        if specific_update < 0 or specific_update == 9: self.w_alc = self.normalize_arguement(np.random.randint(1, 6), actual_min[10], actual_max[10])
        if specific_update < 0 or specific_update == 10: self.health = self.normalize_arguement(np.random.randint(1, 6), actual_min[11], actual_max[11])
        if specific_update < 0 or specific_update == 11: self.absences = self.normalize_arguement(np.random.randint(0, 94), actual_min[12], actual_max[12])
        if specific_update < 0 or specific_update == 12: self.g1 = self.normalize_arguement(np.random.randint(0, 21), actual_min[13], actual_max[13])
        if specific_update < 0 or specific_update == 13: self.g2 = self.normalize_arguement(np.random.randint(0, 21), actual_min[14], actual_max[14])
        if specific_update < 0 or (specific_update >= 14 and specific_update <= 15):
            updated_values = [0, 0]
            updated_values[np.random.randint(0, 2)] = 1
            self.school_GP, self.school_MS = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 16 and specific_update <= 17):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.sex_F, self.sex_M = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 18 and specific_update <= 19):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.address_R, self.address_U = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 20 and specific_update <= 21):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.fam_size_GT3, self.fam_size_LE3 = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 22 and specific_update <= 23):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.p_status_a, self.p_status_t = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 24 and specific_update <= 28):
            updated_values = [0] * 5
            updated_values[np.random.randint(0, 5)] = 1
            self.m_job_at_home, self.m_job_health, self.m_job_other, self.m_job_services, self.m_job_teacher = updated_values[0], updated_values[1], updated_values[2], updated_values[3], updated_values[4]
        if specific_update < 0 or (specific_update >= 29 and specific_update <= 33):
            updated_values = [0] * 5
            updated_values[np.random.randint(0, 5)] = 1
            self.f_job_at_home, self.f_job_health, self.f_job_other, self.f_job_services, self.f_job_teacher = updated_values[0], updated_values[1], updated_values[2], updated_values[3], updated_values[4]
        if specific_update < 0 or (specific_update >= 34 and specific_update <= 37):
            updated_values = [0] * 4
            updated_values[np.random.randint(0, 4)] = 1
            self.reason_course, self.reason_home, self.reason_other, self.reason_reputation = updated_values[0], updated_values[1], updated_values[2], updated_values[3]
        if specific_update < 0 or (specific_update >= 38 and specific_update <= 40):
            updated_values = [0] * 3
            updated_values[np.random.randint(0, 3)] = 1
            self.guardian_father, self.guardian_mother, self.guardian_other = updated_values[0], updated_values[1], updated_values[2]
        if specific_update < 0 or (specific_update >= 41 and specific_update <= 42):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.school_sup_no, self.school_sup_yes = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 43 and specific_update <= 44):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.fam_sup_no, self.fam_sup_yes = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 45 and specific_update <= 46):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.paid_no, self.paid_yes = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 47 and specific_update <= 48):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.activities_no, self.activities_yes = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 49 and specific_update <= 50):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.nursery_no, self.nursery_yes = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 51 and specific_update <= 52):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.higher_no, self.higher_yes = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 53 and specific_update <= 54):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.internet_no, self.internet_yes = updated_values[0], updated_values[1]
        if specific_update < 0 or (specific_update >= 55 and specific_update <= 56):
            updated_values = [0] * 2
            updated_values[np.random.randint(0, 2)] = 1
            self.romantic_no, self.romantic_yes = updated_values[0], updated_values[1]
            
    def normalize_arguement(self, value, actual_min, actual_max):
        """
        Normalizes an individual parameter.
        """
        return (value - actual_min) / (actual_max - actual_min)
        
    def randomize_k_features(self, k):
        """
        Out of the 58 features used, randomly update k of the features.
        """
        TOTAL_ELEMENTS = 58
        random_choice = np.random.choice(TOTAL_ELEMENTS, TOTAL_ELEMENTS, replace=False)
        for index in random_choice[:k]:
            self.generate_record(X_actual_min, X_actual_max, index)
        return random_choice[:k]
    
    def __eq__(self, other):
        """
        Determines whether two student record are equivalent.
        """
        return self.__dict__ == other.__dict__

In [None]:
# Generate and view a synthetic student sample
temp = StudentRecord(1, X_actual_min, X_actual_max)
print(temp.output_to_list())
temp.randomize_k_features(5)
print(temp.output_to_list())

In [None]:
# Determine the class of the synthetic sample from the host model (just to view how it works)
np.argmax(model.predict([temp.output_to_list()]), axis=1)

In [None]:
# How long to proceed until stopping
MAX_ITERATIONS = 100000
# Maximum number of potential rejections before restarting
MAX_REJECTIONS = 50
# Minimum class confidence percentage needed from the target model when
# predicting the class of the synthetic sample
# The higher the value, the more similar the datapoints become
MIN_CONFIDENCE = 0.65
def synthesize(y_label, X_actual_min, X_actual_max, model):
    '''
    Generates synthetic datapoints which are are considered to be close enough to
    the target class when predicted by the target model.
    
    Args:
        y_label: The class label to be assigned to the sample (0 or 1)
        X_acutal_min: A numpy array of the minimum values from the dataset
        X_actual_max: A numpy array of the maximum values from the dataset
    index of prediction must equal class label
    '''
    # Generate the initial randomized student record
    student = StudentRecord(y_label, X_actual_min, X_actual_max)
    # Track the confidence of the record generated
    received_class_conf = 0
    # Track the numer of rejections
    j = 0
    # How many parameters to randomize at a time
    k = 14
    for i in range(MAX_ITERATIONS):
        # Query the target model
        y_query = model.predict([student.output_to_list()], verbose=0)
        #print(y_query)
        # Continue if the predicted confidence is higher than the last attempt
        if y_query[0][y_label] > received_class_conf:
            # Get the index from the query output vector that has max prob.
            c =  np.argmax(y_query, axis=1)[0]
            # Verify that the confidence is higher than the threshold, 
            # that the predicted label is the target label, and that the
            # confidence passes a random threshold
            if y_query[0][y_label] > MIN_CONFIDENCE and c == y_label and np.random.random() < y_query[0][y_label]:
                # Return the synthetic sample
                return student
            # Update the confidence if not accepted, but was higher than the
            # previous value
            received_class_conf = y_query[0][y_label]
            # Reset the number of rejections
            j = 0
        else:
            # Update the number of rejections and adjust k
            j += 1
            if j > MAX_REJECTIONS:
                k = max(1, math.ceil(k / 2))
                j = 0
        # Randomize k features of the synthetic sample
        student.randomize_k_features(k)
    return False

In [None]:
# Generate a random synthetic sample for class=0
s = synthesize(0, X_actual_min, X_actual_max, model)
print(s.output_to_list())

In [None]:
# TESTING CODE BLOCK (IGNORE)
a = []
b = [[1, 1],[2,2] ,[3,3] ]
c  =[[4,4], [5,5], [6,6]]
a.append(np.append(b, c, axis=0))
np.random.shuffle(a[0])
print(a)

In [None]:
# Create k datasets for k shadow models
# The number of samples chosen will result in double that total number of 
# samples being used when combined as a train/test set
NUM_SAMPLES_PER_CLASS = 250
NUM_SHADOW_MODELS = 1
# Ensure reproducability
np.random.seed(1)
# Track the generated datasets
k_datasets = []
print("Starting to generate synthetic datasets...")
# Multiply by two since one will be for training and one will be for testing
# - i % 2 == 0 -> training set
# - i % 2 == 1 -> testing set
for i in range(NUM_SHADOW_MODELS * 2):
    print("CREATING DATASET:")
    # Generate the synthetic samples for class=0 and class=1
    students_0 = [synthesize(0, X_actual_min, X_actual_max, model) for _ in range(NUM_SAMPLES_PER_CLASS)]
    students_1 = [synthesize(1, X_actual_min, X_actual_max, model) for _ in range(NUM_SAMPLES_PER_CLASS)]
    k_datasets.append(np.append(students_0, students_1, axis=0))
    # Ensure that the test dataset does not contain datapoints from the train dataset
    if i % 2 == 1:
        for j in range(len(k_datasets[i])):
            unique_entry = False
            while not unique_entry:
                unique_entry = True
                # Determine whether any duplicates are found
                for student in k_datasets[i - 1]:
                    if k_datasets[i][j] == student:
                        print("Redoing entry", j)
                        unique_entry = False
                        k_datasets[i][j] = synthesize(1, X_actual_min, X_actual_max, model)
                        break
    # Shuffle the generated data
    np.random.shuffle(k_datasets[i])

In [None]:
# Train each of the k shadow models with the same model setup as the normal model
models = []
for i in range(0, len(k_datasets), 2):
    # TODO: ADD test sets into function input and change to be x,y
    trained_model = train(
        noise_multiplier=1,
        l2_norm_clip=1,
        batch_size=22,
        microbatches=11,
        x_train=[elem.output_to_list() for elem in k_datasets[i]],
        y_train=[elem.y_label for elem in k_datasets[i]],
        dpsgd=False,
        learning_rate=0.01,
        epochs=1000,
        model_dir=None,
        print_outputs=True,
    )[-1]
    models.append(trained_model)


# Then we can synthesize data likely in the dataset and extract it
...
# Formulate evaluation metrics
...
# Test on a DP model
...

In [None]:
# For each sample in the train and test sets for a model i, get the predictions 
# from that model and add it to the sample as a new feature
k_prediction_sets = []
for i in range(0, len(k_datasets), 2):
    print("Generating probabilities...")
    predictions_train = models[i // 2].predict([elem.output_to_list() for elem in k_datasets[i]])
    predictions_test = models[i // 2].predict([elem.output_to_list() for elem in k_datasets[i + 1]])
    k_prediction_sets.append(predictions_train)
    k_prediction_sets.append(predictions_test)

In [None]:
# Define an attack model for both class=0 and class=1 and train to predict 'no' or 'yes' on whether the data was used to train the shadow models
# Requires: initial_class_label, output_vector, in_or_out
# Initialize an attack model for each initial_class_label which uses the output_vector to predict in_or_out
# Define a sequential Keras model
def generate_attack_model(learning_rate=0.001):
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Dense(64, input_dim=2, activation="relu"),
            tf.keras.layers.Dropout(rate=0.1, noise_shape=None, seed=1), # 0.1 was good
            tf.keras.layers.Dense(32, activation="relu"),
            tf.keras.layers.Dense(16, activation="relu"),
            tf.keras.layers.Dense(8, activation="relu"),
            #tf.keras.layers.Dense(1, activation="sigmoid"),
            # CHANGED: V2 for mia attack we need logit output
            tf.keras.layers.Dense(2, activation="sigmoid"),
        ]
    )
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    
    # Compile model with Keras
    model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])
    return model

In [None]:
# Specify the learning rate to use
LR = 0.1
attack_models = [generate_attack_model(LR), generate_attack_model(LR)]
# Track the samples and labels for the attack model for a specific class
X_attacks = [[], []]
y_attacks = [[], []]
for i in range(len(k_prediction_sets)):
    # Add all probabilities for samples which have a class of 0
    attack_train_dataset_0 = [k_prediction_sets[i][j] for j in range(len(k_prediction_sets[i])) if k_datasets[i][j].y_label == 0]
    attack_train_dataset_1 = [k_prediction_sets[i][j] for j in range(len(k_prediction_sets[i])) if k_datasets[i][j].y_label == 1]
    X_attacks[0] += attack_train_dataset_0
    X_attacks[1] += attack_train_dataset_1
    # Below ensures that 1 is in the training set and 0 is outside the training set
    y_attacks[0] += [((i + 1) % 2)] * len(attack_train_dataset_0)
    y_attacks[1] += [((i + 1) % 2)] * len(attack_train_dataset_1)
# Convert the lists to nupy arrays
X_attacks[0], X_attacks[1] = np.array(X_attacks[0]), np.array(X_attacks[1])
y_attacks[0], y_attacks[1] = np.array(y_attacks[0]), np.array(y_attacks[0])
print(attack_models[0].summary())

In [None]:
# Ensure that the number of labels and samples (the probabilities) match
assert(len(y_attacks[0]) == len(X_attacks[0]))
assert(len(y_attacks[1]) == len(X_attacks[1]))

In [None]:
train_histories = []
# Train model with Keras for class=0
for i in range(len(attack_models)):
    history = attack_models[i].fit(
        X_attacks[i],
        y_attacks[i],
        epochs=2000,
        #shuffle=True,
        batch_size=64
    )
    train_histories.append(history)
#64: 0.58 max
#16: 0.59

In [None]:
# Have the attack models and the model
# Run the synthetic data through the model, get the probabilities, then run through the attack models
print("Getting statistics for class = 0")
y_pred_0 = attack_models[0].predict(x=X_attacks[0])
print(classification_report(y_attacks[0], np.argmax(y_pred_0, axis=1)))
print("\nGetting statistics for class = 1")
y_pred_1 = attack_models[1].predict(x=X_attacks[1])
print(classification_report(y_attacks[1], np.argmax(y_pred_1, axis=1)))

In [None]:
# Generate the probabilities for the train/test sets
actual_prediction_sets = []
for i in range(0, len(k_datasets), 2):
    print("Generating probabilities...")
    predictions_train = models[i // 2].predict(x_train)
    predictions_test = models[i // 2].predict(x_test)
    actual_prediction_sets.append(predictions_train)
    actual_prediction_sets.append(predictions_test)

In [None]:
# Configure the input data of the actual dataset to input to the attack model
actual_X_attacks = [[], []]
actual_y_attacks = [[], []]
y_combined = [y_train, y_test]
for i in range(len(actual_prediction_sets)):
    # Add all probabilities for samples which have a class of 0
    attack_train_dataset_0 = [actual_prediction_sets[i][j] for j in range(len(actual_prediction_sets[i])) if y_combined[i][j] == 0]
    attack_train_dataset_1 = [actual_prediction_sets[i][j] for j in range(len(actual_prediction_sets[i])) if y_combined[i][j] == 1]
    actual_X_attacks[0] += attack_train_dataset_0
    actual_X_attacks[1] += attack_train_dataset_1
    # Below ensures that 1 is in the training set and 0 is outside the training set
    actual_y_attacks[0] += [((i + 1) % 2)] * len(attack_train_dataset_0)
    actual_y_attacks[1] += [((i + 1) % 2)] * len(attack_train_dataset_1)
actual_X_attacks[0], actual_X_attacks[1] = np.array(actual_X_attacks[0]), np.array(actual_X_attacks[1])
actual_y_attacks[0], actual_y_attacks[1] = np.array(actual_y_attacks[0]), np.array(actual_y_attacks[1])

In [None]:
# Have the attack models and the model
# Run the original data through the model, get the probabilities, then run through the attack models
print("Testing accuracy for class = 0")
attack_models[0].evaluate(x=actual_X_attacks[0], y=actual_y_attacks[0])
print("Testing accuracy for class = 1")
attack_models[1].evaluate(x=actual_X_attacks[1], y=actual_y_attacks[1])

In [None]:
# Have the attack models and the model
# Run the original data through the model, get the probabilities, then run through the attack models
print("Getting statistics for class = 0")
y_pred_0 = attack_models[0].predict(x=actual_X_attacks[0])
print(classification_report(actual_y_attacks[0], np.argmax(y_pred_0, axis=1)))
print("\nGetting statistics for class = 1")
y_pred_1 = attack_models[1].predict(x=actual_X_attacks[1])
print(classification_report(actual_y_attacks[1], np.argmax(y_pred_1, axis=1)))