## Using Tenserflow Keras

In [1]:
import numpy as np
import tensorflow as tf
from keras import optimizers

#https://github.com/tensorflow/privacy/blob/master/tutorials/mnist_dpsgd_tutorial_keras.py

Using TensorFlow backend.


In [0]:
def load_mnist():
  """Loads MNIST and preprocesses to combine training and validation data."""
  train, test = tf.keras.datasets.mnist.load_data()
  train_data, train_labels = train
  test_data, test_labels = test

  train_data = np.array(train_data, dtype=np.float32) / 255
  test_data = np.array(test_data, dtype=np.float32) / 255

  train_data = train_data.reshape(train_data.shape[0], 28, 28, 1)
  test_data = test_data.reshape(test_data.shape[0], 28, 28, 1)

  train_labels = np.array(train_labels, dtype=np.int32)
  test_labels = np.array(test_labels, dtype=np.int32)

  train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
  test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

  assert train_data.min() == 0.
  assert train_data.max() == 1.
  assert test_data.min() == 0.
  assert test_data.max() == 1.

  return train_data, train_labels, test_data, test_labels

In [3]:
pip install tensorflow_privacy

Collecting tensorflow_privacy
[?25l  Downloading https://files.pythonhosted.org/packages/9a/a9/761c3b54b01041b6008a4dde837e7bbb89418c1a86d4213abddeefab4793/tensorflow_privacy-0.0.1-py3-none-any.whl (79kB)
[K     |████████████████████████████████| 81kB 22.9MB/s 
Installing collected packages: tensorflow-privacy
Successfully installed tensorflow-privacy-0.0.1


In [4]:
from privacy.analysis.rdp_accountant import compute_rdp
from privacy.analysis.rdp_accountant import get_privacy_spent
from privacy.optimizers.dp_optimizer import DPGradientDescentGaussianOptimizer

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.





module `bolt_on` was not found in this version of TF Privacy


## Without differential_privacy

訓練運行時間快, 準確率較高


In [6]:
##Without differential_privacy

def main():

  # Load training and test data.
  train_data, train_labels, test_data, test_labels = load_mnist()

  # Define a sequential Keras model
  model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(16, 8,
                             strides=2,
                             padding='same',
                             activation='relu',
                             input_shape=(28, 28, 1)),
      tf.keras.layers.MaxPool2D(2, 1),
      tf.keras.layers.Conv2D(32, 4,
                             strides=2,
                             padding='valid',
                             activation='relu'),
      tf.keras.layers.MaxPool2D(2, 1),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(32, activation='relu'),
      tf.keras.layers.Dense(10)
  ])
  """
  optimizer = DPGradientDescentGaussianOptimizer(
      l2_norm_clip=1.0,
      noise_multiplier=1.1,
      num_microbatches=250,
      learning_rate=0.15)
    # Compute vector of per-example loss rather than its mean over a minibatch.
  loss = tf.keras.losses.CategoricalCrossentropy(
      from_logits=True, reduction=tf.losses.Reduction.NONE)
  """
  GradientDescentOptimizer = tf.keras.optimizers.SGD
  optimizer = GradientDescentOptimizer(learning_rate=0.15)
  loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)


  # Compile model with Keras
  model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

  # Train model with Keras
  model.fit(train_data, train_labels,
            epochs=5,
            validation_data=(test_data, test_labels),
            batch_size=250)

  # Compute the privacy budget expended.

    
if __name__ == '__main__':
  main()

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## With differential_privacy 


加入 differential_privacy, 運行時間較長, 準確率相較之下較低



In [0]:
noise_multiplier=1.1
def compute_epsilon(steps):
  """Computes epsilon value for given hyperparameters."""
  if noise_multiplier == 0.0:
    return float('inf')
  orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
  sampling_probability = 250 / 60000
  rdp = compute_rdp(q=sampling_probability,
                    noise_multiplier=1.1,
                    steps=steps,
                    orders=orders)
  # Delta is set to 1e-5 because MNIST has 60000 training points.
  return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]

In [8]:
##With differential_privacy

def main():

  # Load training and test data.
  train_data, train_labels, test_data, test_labels = load_mnist()

  # Define a sequential Keras model
  model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(16, 8,
                             strides=2,
                             padding='same',
                             activation='relu',
                             input_shape=(28, 28, 1)),
      tf.keras.layers.MaxPool2D(2, 1),
      tf.keras.layers.Conv2D(32, 4,
                             strides=2,
                             padding='valid',
                             activation='relu'),
      tf.keras.layers.MaxPool2D(2, 1),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(32, activation='relu'),
      tf.keras.layers.Dense(10)
  ])
  
  optimizer = DPGradientDescentGaussianOptimizer(
      l2_norm_clip=1.0,
      noise_multiplier=1.1,
      num_microbatches=250,
      learning_rate=0.15)
    # Compute vector of per-example loss rather than its mean over a minibatch.
  loss = tf.keras.losses.CategoricalCrossentropy(
      from_logits=True, reduction=tf.losses.Reduction.NONE)

  # Compile model with Keras
  model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

  # Train model with Keras
  model.fit(train_data, train_labels,
            epochs=5,
            validation_data=(test_data, test_labels),
            batch_size=250)

  # Compute the privacy budget expended.
  eps = compute_epsilon(5 * 60000 // 250)
  print('For delta=1e-5, the current epsilon is: %.2f' % eps)
    
if __name__ == '__main__':
  main()


Train on 60000 samples, validate on 10000 samples
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
For delta=1e-5, the current epsilon is: 1.22
