### General imports

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
from tensorflow.keras import layers, models
import tensorflow.keras.backend as K
import time
from IPython import display

from tensorflow_privacy.privacy.analysis import compute_dp_sgd_privacy
from tensorflow_privacy.privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer

In [None]:
# Method obtained from https://stackoverflow.com/questions/41123879/numpy-random-choice-in-tensorflow
def _random_choice(inputs, n_samples):
    """
    With replacement.
    Params:
      inputs (Tensor): Shape [n_states, n_features]
      n_samples (int): The number of random samples to take.
    Returns:
      sampled_inputs (Tensor): Shape [n_samples, n_features]
    """
    # (1, n_states) since multinomial requires 2D logits.
    uniform_log_prob = tf.expand_dims(tf.zeros(tf.shape(inputs)[0]), 0)

    ind = tf.compat.v1.multinomial(uniform_log_prob, n_samples)
    ind = tf.squeeze(ind, 0, name="random_choice_ind")  # (n_samples,)

    return tf.gather(inputs, ind, name="random_choice")

In [None]:
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import roc_curve, auc
ConvergenceWarning('ignore')

In [None]:
# Method obtained from https://github.com/reihaneh-torkzadehmahani/DP-CGAN
def compute_fpr_tpr_roc(Y_test, Y_score):
    n_classes = Y_score.shape[1]
    false_positive_rate = dict()
    true_positive_rate = dict()
    roc_auc = dict()
    for class_cntr in range(n_classes):
        false_positive_rate[class_cntr], true_positive_rate[class_cntr], _ = roc_curve(Y_test[:, class_cntr],
                                                                                       Y_score[:, class_cntr])
        roc_auc[class_cntr] = auc(false_positive_rate[class_cntr], true_positive_rate[class_cntr])

    # Compute micro-average ROC curve and ROC area
    false_positive_rate["micro"], true_positive_rate["micro"], _ = roc_curve(Y_test.ravel(), Y_score.ravel())
    roc_auc["micro"] = auc(false_positive_rate["micro"], true_positive_rate["micro"])

    return false_positive_rate, true_positive_rate, roc_auc

## Modified Optimizer for DP

The optimizer below is a modification of the original from TF Privacy, [available here](https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/optimizers/dp_optimizer.py) to allow setting different values of noise multipliers and clipping factor on different steps of the optimization.

The main modification lies on the `compute_gradients` method, which now includes:
- *curr_noise_mult*: Current noise_multiplier
- *curr_norm_clip*: Current L2 norm clipping factor

On every step of the optimization we now additionally pass these parameters to control the privacy effects.

In [None]:
from absl import logging
import collections

from tensorflow_privacy.privacy.analysis import privacy_ledger
from tensorflow_privacy.privacy.dp_query import gaussian_query

def make_optimizer_class(cls):
  """Constructs a DP optimizer class from an existing one."""
  parent_code = tf.compat.v1.train.Optimizer.compute_gradients.__code__
  child_code = cls.compute_gradients.__code__
  GATE_OP = tf.compat.v1.train.Optimizer.GATE_OP  # pylint: disable=invalid-name
  if child_code is not parent_code:
    logging.warning(
        'WARNING: Calling make_optimizer_class() on class %s that overrides '
        'method compute_gradients(). Check to ensure that '
        'make_optimizer_class() does not interfere with overridden version.',
        cls.__name__)

  class DPOptimizerClass(cls):
    """Differentially private subclass of given class cls."""

    _GlobalState = collections.namedtuple(
      '_GlobalState', ['l2_norm_clip', 'stddev'])
    
    def __init__(
        self,
        dp_sum_query,
        num_microbatches=None,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg, g-doc-args
        **kwargs):
      """Initialize the DPOptimizerClass.

      Args:
        dp_sum_query: DPQuery object, specifying differential privacy
          mechanism to use.
        num_microbatches: How many microbatches into which the minibatch is
          split. If None, will default to the size of the minibatch, and
          per-example gradients will be computed.
        unroll_microbatches: If true, processes microbatches within a Python
          loop instead of a tf.while_loop. Can be used if using a tf.while_loop
          raises an exception.
      """
      super(DPOptimizerClass, self).__init__(*args, **kwargs)
      self._dp_sum_query = dp_sum_query
      self._num_microbatches = num_microbatches
      self._global_state = self._dp_sum_query.initial_global_state()
      # TODO(b/122613513): Set unroll_microbatches=True to avoid this bug.
      # Beware: When num_microbatches is large (>100), enabling this parameter
      # may cause an OOM error.
      self._unroll_microbatches = unroll_microbatches

    def compute_gradients(self,
                          loss,
                          var_list,
                          gate_gradients=GATE_OP,
                          aggregation_method=None,
                          colocate_gradients_with_ops=False,
                          grad_loss=None,
                          gradient_tape=None,
                          curr_noise_mult=0,
                          curr_norm_clip=1):

      self._dp_sum_query = gaussian_query.GaussianSumQuery(curr_norm_clip, 
                                                           curr_norm_clip*curr_noise_mult)
      self._global_state = self._dp_sum_query.make_global_state(curr_norm_clip, 
                                                                curr_norm_clip*curr_noise_mult)
      

      # TF is running in Eager mode, check we received a vanilla tape.
      if not gradient_tape:
        raise ValueError('When in Eager mode, a tape needs to be passed.')

      vector_loss = loss()
      if self._num_microbatches is None:
        self._num_microbatches = tf.shape(input=vector_loss)[0]
      sample_state = self._dp_sum_query.initial_sample_state(var_list)
      microbatches_losses = tf.reshape(vector_loss, [self._num_microbatches, -1])
      sample_params = (self._dp_sum_query.derive_sample_params(self._global_state))

      def process_microbatch(i, sample_state):
        """Process one microbatch (record) with privacy helper."""
        microbatch_loss = tf.reduce_mean(input_tensor=tf.gather(microbatches_losses, [i]))
        grads = gradient_tape.gradient(microbatch_loss, var_list)
        sample_state = self._dp_sum_query.accumulate_record(sample_params, sample_state, grads)
        return sample_state
    
      for idx in range(self._num_microbatches):
        sample_state = process_microbatch(idx, sample_state)

      if curr_noise_mult > 0:
        grad_sums, self._global_state = (self._dp_sum_query.get_noised_result(sample_state, self._global_state))
      else:
        grad_sums = sample_state

      def normalize(v):
        return v / tf.cast(self._num_microbatches, tf.float32)

      final_grads = tf.nest.map_structure(normalize, grad_sums)
      grads_and_vars = final_grads#list(zip(final_grads, var_list))
    
      return grads_and_vars

  return DPOptimizerClass


def make_gaussian_optimizer_class(cls):
  """Constructs a DP optimizer with Gaussian averaging of updates."""

  class DPGaussianOptimizerClass(make_optimizer_class(cls)):
    """DP subclass of given class cls using Gaussian averaging."""

    def __init__(
        self,
        l2_norm_clip,
        noise_multiplier,
        num_microbatches=None,
        ledger=None,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs):
      dp_sum_query = gaussian_query.GaussianSumQuery(
          l2_norm_clip, l2_norm_clip * noise_multiplier)

      if ledger:
        dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query,
                                                      ledger=ledger)

      super(DPGaussianOptimizerClass, self).__init__(
          dp_sum_query,
          num_microbatches,
          unroll_microbatches,
          *args,
          **kwargs)

    @property
    def ledger(self):
      return self._dp_sum_query.ledger

  return DPGaussianOptimizerClass

In [None]:
GradientDescentOptimizer = tf.compat.v1.train.GradientDescentOptimizer
DPGradientDescentGaussianOptimizer_NEW = make_gaussian_optimizer_class(GradientDescentOptimizer)

## Dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as io

dataset = io.loadmat('indianpines_dataset.mat')
number_of_bands = int(dataset['number_of_bands'])
number_of_rows = int(dataset['number_of_rows'])
number_of_columns = int(dataset['number_of_columns'])
pixels = np.transpose(dataset['pixels'])

groundtruth = io.loadmat('indianpines_gt.mat')
gt = np.transpose(groundtruth['pixels'])

In [None]:
# normalizing the dataset (standard procedure)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
pixels = sc.fit_transform(pixels)

# colors for each category in the dataset
indianpines_colors = np.array([[255, 255, 255],
                               [255, 254, 137], [3,  28,  241], [255, 89,    1], [5,   255, 133],
                               [255,   2, 251], [89,  1,  255], [3,   171, 255], [12,  255,   7],
                               [172, 175,  84], [160, 78, 158], [101, 173, 255], [60,   91, 112],
                               [104, 192,  63], [139, 69,  46], [119, 255, 172], [254, 255,   3]])

# normalize in the range of 0 and 1 for displaying
import sklearn.preprocessing
indianpines_colors = sklearn.preprocessing.minmax_scale(indianpines_colors, feature_range=(0, 1))
pixels_normalized = sklearn.preprocessing.minmax_scale(pixels, feature_range=(0, 1))

# build the RGB Image
gt_thematic_map = np.zeros(shape=(number_of_rows, number_of_columns, 3))
cont = 0
for i in range(number_of_rows):
    for j in range(number_of_columns):
        gt_thematic_map[i, j, :] = indianpines_colors[gt[cont, 0]]
        cont += 1

# names of the categories in the dataset
indianpines_class_names = ['background',
                           'alfalfa',           'corn-notill',               'corn-min',               'corn',
                           'grass/pasture',     'grass/trees',    'grass/pasture-mowed',      'hay-windrowed',
                           'oats',          'soybeans-notill',           'soybeans-min',      'soybean-clean',
                           'wheat',                   'woods', 'bldg-grass-tree-drives', 'stone-steel towers']

fig = plt.figure(figsize=(10, 10))
plt.imshow(gt_thematic_map)
COND_num_classes = 17
y = gt
x = gt_thematic_map.reshape(gt_thematic_map.shape[0]*gt_thematic_map.shape[1], 3)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.75, random_state=0)
train_dataset = np.expand_dims(x_train, axis = 1)
test_dataset = np.expand_dims(x_test, axis = 1)
print (train_dataset.shape)
train_labels = y_train
test_labels = y_test
train_labels_vec = np.zeros((len(train_labels), COND_num_classes), dtype='float32')
test_labels_vec = np.zeros((len(test_labels), COND_num_classes), dtype='float32')
for i, label in enumerate(train_labels):
    train_labels_vec[i, int(train_labels[i])] = 1.0
    
for i, label in enumerate(test_labels):
    test_labels_vec[i, int(test_labels[i])] = 1.0

## C-GAN Models

Both Generator and Discriminator follow simple architectures, with fully connected neural networks.

We emphasize the use of C-GAN, therefore conditioning the models to the label information - notice the additional input on both networks below for labels.

In [None]:
# Dimension of Latent Space - Does NOT affect DP-EPSILON
Z_DIM = 10

In [None]:
def make_generator_model_FCC():
    # INPUT: label input
    in_label = layers.Input(shape=(COND_num_classes,))

    # INPUT: image generator input
    in_lat = layers.Input(shape=(Z_DIM,))

    # MERGE
    merge = layers.concatenate([in_lat, in_label], axis=1)

    ge1 = layers.Dense(128, use_bias=True)(merge)
    ge1 = layers.ReLU()(ge1)

    ge2 = layers.Dense(3, use_bias=True, activation="tanh")(ge1)
    ge2 = layers.ReLU()(ge2)
    out_layer = layers.Reshape((1,3))(ge2)

    model = models.Model([in_lat, in_label], out_layer)

    return model

def make_discriminator_model_FCC():
    # INPUT: Label
    in_label = layers.Input(shape=(COND_num_classes,))

    # INPUT: Image
    in_image = layers.Input(shape=(1,3))
    in_image_b = layers.Flatten()(in_image)

    # MERGE
    merge = layers.concatenate([in_image_b, in_label], axis=1)

    ge1 = layers.Dense(128, use_bias=True)(merge)
    ge1 = layers.ReLU()(ge1)

    ge2 = layers.Dense(3, use_bias=True)(ge1)
    ge2 = layers.ReLU()(ge2)
    out_layer = layers.Reshape((1,3))(ge2)

    model = models.Model([in_image, in_label], out_layer)

    return model

### Initiate and test models

In [None]:
generator = make_generator_model_FCC()
generator.summary()

In [None]:
discriminator = make_discriminator_model_FCC()
discriminator.summary()

In [None]:
# Test GEN created
noise = tf.Variable(tf.random.normal([1, Z_DIM]))
noise_label = tf.Variable(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,0,0,0,0,0,0], dtype='float32').reshape((1,17)))
print(noise.shape)
print(noise_label.shape)
generated_image = generator([noise, noise_label], training=False).numpy()
plt.imshow(generated_image)

# Test DISC created
decision = discriminator([generated_image, noise_label])

### Loss and Updates

- Please note that, during the training step of the Discriminator `train_step_DISC`, we **combine gradients** from both real and generated on a single update step into `sanitized_grads_and_vars`, following the approach from [Torkzadehmahani et al. 2019](http://openaccess.thecvf.com/content_CVPRW_2019/papers/CV-COPS/Torkzadehmahani_DP-CGAN_Differentially_Private_Synthetic_Data_and_Label_Generation_CVPRW_2019_paper.pdf).
- When learning from the **real/training dataset** we <u>clip and add noise</u> to the gradients of the Discriminator.
- When learning from the **generated data** we <u>only clip</u> the gradients of the Discriminator.

In [None]:
cross_entropy_DISC = tf.keras.losses.BinaryCrossentropy(from_logits=True, reduction=tf.losses.Reduction.NONE)
cross_entropy_GEN = tf.keras.losses.BinaryCrossentropy(from_logits=True)

# Notice the use of `tf.function`: This annotation causes the function to be "compiled".
@tf.function
def train_step_DISC(images, labels, noise, labels_to_gen):    
    with tf.GradientTape(persistent=True) as disc_tape_real:
        # This dummy call is needed to obtain the var list.
        dummy = discriminator([images, labels], training=True)
        var_list = discriminator.trainable_variables
        
        # In Eager mode, the optimizer takes a function that returns the loss.
        def loss_fn_real():
            real_output = discriminator([images, labels], training=True)
            disc_real_loss = cross_entropy_DISC(tf.ones_like(real_output), real_output)
            return disc_real_loss
        
        grads_and_vars_real = discriminator_optimizer.compute_gradients(loss_fn_real, 
                                                                        var_list, 
                                                                        gradient_tape=disc_tape_real, 
                                                                        curr_noise_mult=NOISE_MULT,
                                                                        curr_norm_clip=NORM_CLIP)
        
        # In Eager mode, the optimizer takes a function that returns the loss.
        def loss_fn_fake():
            generated_images = generator([noise, labels_to_gen], training=True)
            fake_output = discriminator([generated_images, labels_to_gen], training=True)
            disc_fake_loss = cross_entropy_DISC(tf.zeros_like(fake_output), fake_output)
            return disc_fake_loss
        
        grads_and_vars_fake = discriminator_optimizer.compute_gradients(loss_fn_fake,
                                                                        var_list, 
                                                                        gradient_tape=disc_tape_real,
                                                                        curr_noise_mult=0,
                                                                        curr_norm_clip=NORM_CLIP)
        disc_loss_r = loss_fn_real()
        disc_loss_f = loss_fn_fake()
        
        s_grads_and_vars = [(grads_and_vars_real[idx] + grads_and_vars_fake[idx])
                            for idx in range(len(grads_and_vars_real))]
        sanitized_grads_and_vars = list(zip(s_grads_and_vars, var_list))
        
        discriminator_optimizer.apply_gradients(sanitized_grads_and_vars)
        
    return(disc_loss_r, disc_loss_f)

# Notice the use of `tf.function`: This annotation causes the function to be "compiled".
@tf.function
def train_step_GEN(labels, noise):
    with tf.GradientTape() as gen_tape:
        generated_images = generator([noise, labels], training=True)
        fake_output = discriminator([generated_images, labels], training=True)
        # if the generator is performing well, the discriminator will classify the fake images as real (or 1)
        gen_loss = cross_entropy_GEN(tf.ones_like(fake_output), fake_output)
        
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    
    return(gen_loss)

In [None]:
result_dir = 'results'
checkpoint_dir = result_dir + '/training_checkpoints'

def checkpoint_name(title):  
  checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt__" + str(title))
  return(checkpoint_prefix)

In [None]:
images_dir = result_dir +'/images'

def generate_and_save_images(title, model, epoch, test_input, test_label):
  # Notice `training` is set to False: This is so all layers run in inference mode (batchnorm).
  predictions = model([test_input, test_label], training=False)

  fig = plt.figure(figsize=(2,17))

  for i in range(predictions.shape[0]):
      plt.subplot(17, 1, i+1)
      prediction = predictions.numpy()
        
      plt.imshow(prediction)
      plt.axis('off')

  #plt.savefig(images_dir + '/' + title + '___image_at_epoch_{:04d}.png'.format(epoch))
  plt.show()

### Train function definition

- The Generator receives labels as input, in addition to noise, but since the labels are considered sensitive, as part of the training data, the Generator will **NOT** see/receive them.
- In this sense, we get **uniform random samples** of the possible labels to pass to the Generator.
- Therefore, we do **NOT** use DP-SGD on the Generator, since only the Discriminator trains using the sensitive training data.

In [None]:
def train(dataset, title, verbose):
    for epoch in range(EPOCHS):
        start = time.time()

        i_gen = 0
        for image_batch, label_batch in dataset:
            if verbose:
                print("Iteration: " + str(i_gen+1))
            
            noise = tf.random.normal([BATCH_SIZE, Z_DIM])
            labels_to_gen = _random_choice(labels_gen_vec, BATCH_SIZE)
    
            d_loss_r, d_loss_f = train_step_DISC(image_batch, label_batch, noise, labels_to_gen)
            if verbose:
                print("Loss DISC Real: " + str(tf.reduce_mean(d_loss_r)))
                print("Loss DISC Fake: " + str(tf.reduce_mean(d_loss_f)))

            if (i_gen + 1) % N_DISC == 0:
                g_loss_f = train_step_GEN(labels_to_gen, noise)
                if verbose:
                    print("Loss GEN Fake:: " + str(g_loss_f))

            i_gen = i_gen + 1

        # Produce images for the GIF as we go
        display.clear_output(wait=True)
        generate_and_save_images(title,
                                 generator,
                                 epoch + 1,
                                 seed,
                                 seed_labels)
        
        print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

        # Save the model
        #checkpoint.save(file_prefix = checkpoint_name(title + "__epoch=" + str(epoch) + "__"))

---

## Parameters

Specific parameters due to DP-SGD:
- **NR_MICROBATCHES** (microbatches - int): Each batch of data (of size BATCH_SIZE) is split into smaller units called microbatches. So naturally NR_MICROBATCHES should evenly divide BATCH_SIZE. If NR_MICROBATCHES = BATCH_SIZE then every training example is a microbatch, clipped individually and with noise added to the average. As NR_MICROBATCHES decreases, we have more examples in a single microbatch, where *averaged* microbatches are clipped and noise is added to the *average* of averaged microbatches.
- **NORM_CLIP** (l2_norm_clip - float) - The maximum Euclidean (L2) norm of each individual (or microbatch) gradient. To enforce such maximum norm gradients are clipped, which bounds the optimizer's sensitivity to individual training data.
- **NOISE_MULT** (noise_multiplier - float) - The amount of noise sampled and added to gradients during training. Generally, more noise gives better privacy, which often, but not necessarily, lowers utility.
    - Please have in mind that the actual noise added in practice is sampled from a Gaussian distribution with mean zero and standard deviation NORM_CLIP * NOISE_MULT.
    - Therefore, a larger NORM_CLIP may pass more signal from the data via gradients, but it also increases the noise added to the gradients.
    - TF Privacy's authors [have already pointed out](http://www.cleverhans.io/privacy/2019/03/26/machine-learning-with-differential-privacy-in-tensorflow.html) that setting NR_MICROBATCHES trades off performance (e.g. NR_MICROBATCHES = 1) with utility (e.g. NR_MICROBATCHES = BATCH_SIZE).
- **DP_DELTA**: Delta from the DP definition. We emphasize that DP_DELTA needs to be smaller than 1/BUFFER_SIZE.

In [None]:
BUFFER_SIZE = len(train_dataset) # Total size of training data
BATCH_SIZE = 256
NR_MICROBATCHES = 64 # Each batch of data is split in smaller units called microbatches.


NORM_CLIP = 1.1 # Does NOT affect EPSILON, but increases NOISE on gradients
NOISE_MULT = 1.15


DP_DELTA = 1e-4 # Needs to be smaller than 1/BUFFER_SIZE
EPOCHS = 10


N_DISC = 1 # Number of times we train DISC before training GEN once


# Learning Rate for DISCRIMINATOR
LR_DISC = tf.compat.v1.train.polynomial_decay(learning_rate=0.150,
                                              global_step=tf.compat.v1.train.get_or_create_global_step(),
                                              decay_steps=10000,
                                              end_learning_rate=0.052,
                                              power=1)

if BATCH_SIZE % NR_MICROBATCHES != 0:
    raise ValueError('Batch size should be an integer multiple of the number of microbatches')

### Get DP epsilon from parameters

- Instead of updating and consulting the moments accountant on each step of training, we just previously check the epsilon we obtain from the given parameters.
- Therefore, we can just quickly keep manually adjusting the parameters above to reach our desired epsilon below, and avoid extra computation during training.
- Moreover, this allows a better understanding of the privacy implications of each parameter above.

In [None]:
# Obtain DP_EPSILON
compute_dp_sgd_privacy.compute_dp_sgd_privacy(n = BUFFER_SIZE, 
                                              batch_size = BATCH_SIZE, 
                                              noise_multiplier = NOISE_MULT, 
                                              epochs = EPOCHS, 
                                              delta = DP_DELTA)

In [None]:
# SD of noise that will be added to gradients: sanity check
NOISE_MULT*NORM_CLIP

### Optimizers

Instantiating optimizers

In [None]:
generator_optimizer = tf.keras.optimizers.Adam()

discriminator_optimizer = DPGradientDescentGaussianOptimizer_NEW(
   learning_rate = LR_DISC,
   l2_norm_clip = NORM_CLIP,
   noise_multiplier = NOISE_MULT,
   num_microbatches = NR_MICROBATCHES)

---

## TRAINING

- We emphasize here that when batching our training dataset, DP requires random shuffling.
- To help track the progress of our GAN, we fix some seeds for labels and noise for the generator, and constantly plot the generated images. Below we create one seed for each of the 10 classes on MNIST.

In [None]:
# Create/reinitiate models
generator = make_generator_model_FCC()
discriminator = make_discriminator_model_FCC()

In [None]:
# Create checkpoint structure
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generator,
                                 discriminator=discriminator)

In [None]:
tf.random.set_seed(1)

# Batch and random shuffle training data
train_datasets = tf.data.Dataset.from_tensor_slices(
    (train_dataset, train_labels_vec)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
# Fix some seeds to help visualize progress
seed = tf.random.normal([17, Z_DIM])
seed_labels = tf.Variable(np.diag(np.full(17,1)).reshape((17,17)), dtype='float32')

# To be used for sampling random labels to pass to generator
labels_gen_vec = np.zeros((17, COND_num_classes), dtype='float32')
for i in [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]:
  labels_gen_vec[i, int(i)] = 1.0

In [None]:
# GIVES CURRENT TRIAL A NAME - Suggestion: from parameters used
training_title = 'eps9.6'

In [None]:
# STARTS TRAINING
train(train_datasets, training_title, False)

---

## VALIDATION

We consider that the GAN training is performed specifically with the goal of publicly sharing the generated data to allow others to train a ML model. 

For this reason, we validate the results by training models on the generated data, and finally, after deciding on a single final GAN properly validated, we test the results by applying on the real test data the models trained on the generated data.


### Choose model to use
Select one of the trials (a fixed GAN) to validate.

### Generate images

In [None]:
# Number of images to generate
N_GEN = 5000

In [None]:
N_GEN_per_CLASS = np.int(N_GEN/COND_num_classes)

tf.random.set_seed(17)
COND_GEN = int(COND_num_classes * N_GEN_per_CLASS)
noise_GEN = tf.random.normal([COND_GEN, Z_DIM])
labels_GEN = tf.Variable(np.array( [1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]*N_GEN_per_CLASS + 
                                   [0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]*N_GEN_per_CLASS +
                                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1]*N_GEN_per_CLASS,
                                   dtype='float32').reshape((COND_GEN,COND_num_classes)))

In [None]:
images_GEN = generator([noise_GEN, labels_GEN], training=False)
images_flat = layers.Flatten()(images_GEN)

In [None]:
labels_flat = tf.Variable(np.array([0]*N_GEN_per_CLASS + 
                                   [1]*N_GEN_per_CLASS +
                                   [2]*N_GEN_per_CLASS +
                                   [3]*N_GEN_per_CLASS +
                                   [4]*N_GEN_per_CLASS +
                                   [5]*N_GEN_per_CLASS +
                                   [6]*N_GEN_per_CLASS +
                                   [7]*N_GEN_per_CLASS +
                                   [8]*N_GEN_per_CLASS +
                                   [9]*N_GEN_per_CLASS + 
                                   [10]*N_GEN_per_CLASS +
                                   [11]*N_GEN_per_CLASS +
                                   [12]*N_GEN_per_CLASS +
                                   [13]*N_GEN_per_CLASS +
                                   [14]*N_GEN_per_CLASS +
                                   [15]*N_GEN_per_CLASS +
                                   [16]*N_GEN_per_CLASS,
                                   dtype='float32').reshape((COND_GEN,1)))

In [None]:
Y_train = labels_flat[:images_flat.shape[0]]
X_train = images_flat

classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
Y_train_org = label_binarize(Y_train, classes=classes)
Y_train_vec = layers.Flatten()(Y_train_org)

### Get validation results

In [None]:
##### Vanilla Neural Network

tf.random.set_seed(100)
classifier_NN = OneVsRestClassifier(MLPClassifier(random_state=2, alpha=1))
NN_model = classifier_NN.fit(X_train, Y_train)

In [None]:
# ROC per class: Validating on REAL training dataset
Y_score = NN_model.predict_proba(np.squeeze(train_dataset, axis = 1))
false_positive_rate, true_positive_rate, roc_auc = compute_fpr_tpr_roc(np.array(train_labels_vec), Y_score)
[str(au) + " = " + str(roc_auc[au]) for au in roc_auc]

In [None]:
predicted_thematic_map = np.zeros(shape=(number_of_rows, number_of_columns, 3))
predicted_dataset = NN_model.predict(x).astype(int)
cont = 0
for i in range(number_of_rows):
    for j in range(number_of_columns):
        gt_thematic_map[i, j, :] = indianpines_colors[gt[cont, 0]]
        predicted_thematic_map[i, j, :] = indianpines_colors[predicted_dataset[cont]]
        cont += 1
fig = plt.figure(figsize=(15, 15))
columns = 2
rows = 1
fig.add_subplot(rows, columns, 1)
plt.imshow(gt_thematic_map)
fig.add_subplot(rows, columns, 2)
plt.imshow(predicted_thematic_map)
plt.show()

In [None]:
##### Logistic Regression

tf.random.set_seed(100)
classifier_LR = OneVsRestClassifier(LogisticRegression(solver='lbfgs', 
                                                       multi_class='multinomial', 
                                                       random_state=2))
LR_model = classifier_LR.fit(X_train, Y_train)

In [None]:
# ROC per class: Validating on REAL training dataset
Y_score = LR_model.predict_proba(np.squeeze(train_dataset, axis = 1))
false_positive_rate, true_positive_rate, roc_auc = compute_fpr_tpr_roc(np.array(train_labels_vec), Y_score)
[str(au) + " = " + str(roc_auc[au]) for au in roc_auc]

---

## TESTING

Model trained on generated data is tested on the real MNIST test dataset to evaluate utility.

### Load test data

In [None]:
(X_test_org, Y_test_org) = test_dataset, test_labels

X_test_org = test_dataset
X_test_org = (X_test_org - 127.5) / 127.5 # Normalize the images to [-1, 1]

Y_test_org = [int(y) for y in Y_test_org]
classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
Y_test_org = label_binarize(Y_test_org, classes=classes)

X_test = layers.Flatten()(X_test_org)
Y_test = layers.Flatten()(Y_test_org)

### Get test results

In [None]:
##### Vanilla Neural Network

tf.random.set_seed(1)
classifier_NN = OneVsRestClassifier(MLPClassifier(random_state=2, alpha=1))
NN_model2 = classifier_NN.fit(X_train, Y_train)

In [None]:
# ROC per class
Y_score = NN_model2.predict_proba(X_test)
false_positive_rate, true_positive_rate, roc_auc = compute_fpr_tpr_roc(np.array(Y_test), Y_score)
[str(au) + " = " + str(roc_auc[au]) for au in roc_auc]

In [None]:
##### Logistic Regression

tf.random.set_seed(1)
classifier_LR = OneVsRestClassifier(LogisticRegression(solver='lbfgs', 
                                                       multi_class='multinomial', 
                                                       random_state=2))
LR_model2 = classifier_LR.fit(X_train, Y_train)

In [None]:
# ROC per class
Y_score = LR_model2.predict_proba(X_test)
false_positive_rate, true_positive_rate, roc_auc = compute_fpr_tpr_roc(np.array(Y_test), Y_score)
[str(au) + " = " + str(roc_auc[au]) for au in roc_auc]