In [1]:
!pip install tensorflow_federated

Collecting tensorflow_federated
  Downloading tensorflow_federated-0.19.0-py2.py3-none-any.whl (602 kB)
[K     |████████████████████████████████| 602 kB 26.8 MB/s 
[?25hCollecting grpcio~=1.34.0
  Downloading grpcio-1.34.1-cp37-cp37m-manylinux2014_x86_64.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 48.6 MB/s 
[?25hCollecting cachetools~=3.1.1
  Downloading cachetools-3.1.1-py2.py3-none-any.whl (11 kB)
Collecting attrs~=19.3.0
  Downloading attrs-19.3.0-py2.py3-none-any.whl (39 kB)
Collecting tensorflow~=2.5.0
  Downloading tensorflow-2.5.2-cp37-cp37m-manylinux2010_x86_64.whl (454.4 MB)
[K     |████████████████████████████████| 454.4 MB 24 kB/s 
Collecting semantic-version~=2.8.5
  Downloading semantic_version-2.8.5-py2.py3-none-any.whl (15 kB)
Collecting tensorflow-model-optimization~=0.5.0
  Downloading tensorflow_model_optimization-0.5.0-py2.py3-none-any.whl (172 kB)
[K     |████████████████████████████████| 172 kB 68.8 MB/s 
Collecting tensorflow-privacy~=0.5.

In [2]:
!pip install git+https://github.com/adap/flower.git@release/0.17#egg=flwr["simulation"]  # For a specific branch (release/0.17) w/ extra ("simulation")

Collecting flwr[simulation]
  Cloning https://github.com/adap/flower.git (to revision release/0.17) to /tmp/pip-install-_pvr1qxl/flwr_db75f9946e5f41f1a9bd5db8cd0644c1
  Running command git clone -q https://github.com/adap/flower.git /tmp/pip-install-_pvr1qxl/flwr_db75f9946e5f41f1a9bd5db8cd0644c1
  Running command git checkout -b release/0.17 --track origin/release/0.17
  Switched to a new branch 'release/0.17'
  Branch 'release/0.17' set up to track remote branch 'release/0.17' from 'origin'.
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting ray[default]==1.6.0
  Downloading ray-1.6.0-cp37-cp37m-manylinux2014_x86_64.whl (49.6 MB)
[K     |████████████████████████████████| 49.6 MB 38 kB/s 
Collecting redis>=3.5.0
  Downloading redis-4.0.2-py3-none-any.whl (119 kB)
[K     |████████████████████████████████| 119 kB 57.9 MB/s 
[?25hCollecting aiohttp-cors
  Downloadin

In [3]:
!pip install tensorflow_privacy
from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras_vectorized import (
    VectorizedDPKerasAdamOptimizer, VectorizedDPKerasSGDOptimizer
)

#RUNTIME ENVIRONMENT MUST BE NONE/CPU

#Dummy Training Parameters 
#Microbatch_Num and Noise are used for Differential Privacy
#Make sure MicroBatch_Num evenly divides Batch_Size
#Higher values of Noise result in much slower training--needs to be investigated
#Haven't experimented with microbatch_num size yet. Below is from TensorFlow description of microbatch_num parameter

"""microbatches (int) - Each batch of data is split in smaller units called microbatches. 
By default, each microbatch should contain a single training example. 
This allows us to clip gradients on a per-example basis rather than after they have been averaged across the minibatch. 
This in turn decreases the (negative) effect of clipping on signal found in the gradient and typically maximizes utility.
 However, computational overhead can be reduced by increasing the size of microbatches to include more than one training examples. 
The average gradient across these multiple training examples is then clipped.
The total number of examples consumed in a batch, i.e., one step of gradient descent, remains the same. 
The number of microbatches should evenly divide the batch size."""


import tensorflow_privacy



In [4]:
from google.colab import drive
drive.mount('drive')

Mounted at drive


In [5]:
#imports
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import tensorflow_federated as tff
from datetime import datetime
import collections
import tensorflow as tf
import math
import numpy as np
from typing import Any, Callable, Dict, List, Optional, Tuple
import pandas as pd
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
import tensorflow_privacy
from time import time

import flwr as fl

In [None]:
#data processing
@tf.autograph.experimental.do_not_convert
def preprocess(dataset, batch_size):

  def batch_format_fn(element):
    """Flatten a batch `pixels` and return the features as an `OrderedDict`."""
    return collections.OrderedDict(
        x=tf.reshape(element['pixels'], [-1, 28,28]),
        y=tf.reshape(element['label'], [-1, 1]))

  return dataset.shuffle(SHUFFLE_BUFFER, seed=1).batch(
      batch_size).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

In [None]:
#load in specific partition of dataset based on client id
def load_partition(idx: int):
    """Load 1/10th of the training and test data to simulate a partition."""

    emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data(only_digits=True)
    client_train = emnist_train.create_tf_dataset_for_client(emnist_train.client_ids[idx])
    client_test = emnist_test.create_tf_dataset_for_client(emnist_test.client_ids[idx])

    processed_train = preprocess(client_train, len(list(client_train)))
    processed_test = preprocess(client_test, len(list(client_test)))

    sample_train = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(processed_train)))

    sample_test = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(processed_test)))

    x_train = sample_train['x']
    y_train = sample_train['y']

    x_test = sample_test['x']
    y_test = sample_test['y']

    #Reshape to proper dimensions
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)

    #If number of overall training samples not evenly divided by batch size. Because every batch needs to be evenly divided by Microbatch_Num
    if x_train.shape[0] % BATCH_SIZE != 0:
      drop_num = x_train.shape[0] % BATCH_SIZE
      x_train = x_train[:-drop_num]
      y_train = y_train[:-drop_num]
    
    if x_train.shape[0] % BATCH_SIZE != 0:
                raise ValueError(
                    "Batch Size should divide total train samples"
                )
    x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

    
    y_train = np.array(y_train, dtype=np.int32)
    y_test = np.array(y_test, dtype=np.int32)

    y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
    y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

    return (x_train,y_train),(x_test,y_test)

In [None]:
#Returns Model for Client and Server Side. SoftMax function at end removed for necessity
def create_model():
    model = tf.keras.Sequential()
    # 32 convolution filters used each of size 3x3
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
    # 64 convolution filters used each of size 3x3
    model.add(Conv2D(64, (3, 3), activation='relu'))
    # choose the best features via pooling
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # randomly turn neurons on and off to improve convergence
    model.add(Dropout(0.25))  
    # flatten since too many dimensions, we only want a classification output
    model.add(Flatten())
    # fully connected to get all relevant data
    model.add(Dense(128, activation='relu'))
    # one more dropout
    model.add(Dropout(0.5))
    # output a softmax to squash the matrix into output probabilities
    model.add(Dense(10))
    return model

In [None]:
#Centralized Model

#preprocessing
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

NOISE_ARRAY = [.2, .4, .6, .8, 1, 1.2]

l2_norm_clip = 1
BATCH_SIZE = 32
MICROBATCH_NUM = 8
LEARNING_RATE = .1

x_t = np.empty((0,28,28,1))
y_t = np.empty((0,10))

x_v = np.empty((0,28,28,1))
y_v = np.empty((0,10))

for i in range(100):

  (x_train,y_train),(x_test,y_test) = load_partition(i)

  x_t = np.append(x_t, x_train, axis=0)
  y_t = np.append(y_t, y_train, axis=0)

  x_v = np.append(x_v, x_test, axis=0)
  y_v = np.append(y_v, y_test, axis=0)

In [None]:
epochs_for_noise = []
val_accuracy_noise = []

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor = 'val_accuracy', patience = 2)


for noise in NOISE_ARRAY:
  epochs = []
  val_accuracy = []
  model = create_model()

  class MetricsCallback(Callback):    
    def __init__(self, test_examples, test_data):
        self.test_examples = test_examples
        self.test_data = test_data

    def on_epoch_begin(self, epoch, logs = None):
        epochs.append(epoch)

    def on_epoch_end(self, epoch, logs = None):
        loss, accuracy = self.model.evaluate(self.test_examples, self.test_data, verbose = 1)
        print("validation accuracy is")
        print(accuracy)
        val_accuracy.append(accuracy)
  
  my_callback = MetricsCallback(test_examples = x_v, test_data=y_v)

  print(noise)

  optimizer =  VectorizedDPKerasSGDOptimizer(
  l2_norm_clip=l2_norm_clip,
  noise_multiplier=noise,
  num_microbatches=MICROBATCH_NUM, learning_rate = LEARNING_RATE)

  loss = tf.keras.losses.CategoricalCrossentropy(
  from_logits=True, reduction=tf.losses.Reduction.NONE)

  model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

  model.fit(x_t, y_t, batch_size=BATCH_SIZE, epochs = 15, callbacks=[early_stop, my_callback], validation_data=(x_v, y_v))

  epochs_for_noise.append(epochs)

  val_accuracy_noise.append(val_accuracy)

  print(epochs_for_noise, val_accuracy_noise)


In [None]:
import numpy as np

SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

l2_norm_clip = 1
MICROBATCH_NUM = 8

x_t = np.empty((0,28,28,1))
y_t = np.empty((0,10))

x_v = np.empty((0,28,28,1))
y_v = np.empty((0,10))

for i in range(100):

  (x_train,y_train),(x_test,y_test) = load_partition(i)

  x_t = np.append(x_t, x_train, axis=0)
  y_t = np.append(y_t, y_train, axis=0)

  x_v = np.append(x_v, x_test, axis=0)
  y_v = np.append(y_v, y_test, axis=0)

batch_sizes = [16, 32, 64, 128]
learning_rates = [.1, .01, .001]

p = np.random.permutation(len(x_t))

x_t = x_t[p]
y_t = y_t[p]

In [None]:
def three_fold_splitter(split, x_t, y_t, b_size):
  if split == 1:
    x_train = x_t[:5376]
    y_train = y_t[:5376]
    x_test = x_t[5376:]
    y_test = y_t[5376:]
    if len(x_train) % b_size != 0:
      drop_num = x_train.shape[0] % BATCH_SIZE
      x_train = x_train[:-drop_num]
      y_train = y_train[:-drop_num]
    return (x_train, y_train), (x_test, y_test)
  if split == 2:
    x_train = x_t[2688:]
    y_train = y_t[2688:]
    x_test = x_t[:2688]
    y_test = y_t[:2688]
    if len(x_train) % b_size != 0:
      drop_num = x_train.shape[0] % BATCH_SIZE
      x_train = x_train[:-drop_num]
      y_train = y_train[:-drop_num]
    return (x_train, y_train), (x_test, y_test)
  if split == 3:
    x_train = np.concatenate((x_t[0:2688], x_t[5376:]))
    y_train = np.concatenate((y_t[0:2688], y_t[5376:]))
    x_test = x_t[2688:5376]
    y_test = y_t[2688:5376]
    if len(x_train) % b_size != 0:
      drop_num = x_train.shape[0] % BATCH_SIZE
      x_train = x_train[:-drop_num]
      y_train = y_train[:-drop_num]
    return (x_train, y_train), (x_test, y_test)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor = 'val_loss', patience = 3, verbose = 1, restore_best_weights = True)

validation_dict = {}

for batch in batch_sizes:
  for rate in learning_rates:
    print(batch, rate)

    pochs = []
    losses = []

    for split in [1, 2, 3]:

      print(split)

      (x_train, y_train), (x_val, y_val) = three_fold_splitter(split, x_t, y_t, batch)

      model = create_model()
        
      my_callback = MetricsCallback(test_examples = x_v, test_data=y_v)

      optimizer =  VectorizedDPKerasSGDOptimizer(
      l2_norm_clip=l2_norm_clip,
      noise_multiplier=.2,
      num_microbatches=MICROBATCH_NUM, learning_rate = rate)

      loss = tf.keras.losses.CategoricalCrossentropy(
      from_logits=True, reduction=tf.losses.Reduction.NONE)

      model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

      model.fit(x_train, y_train, batch_size=batch, epochs = 15, callbacks=[early_stop], validation_data=(x_val, y_val))

      epoch = early_stop.stopped_epoch

      print(epoch)

      loss, accuracy =  model.evaluate(x_val, y_val, verbose = 1)

      print(loss)

      pochs.append(epoch)
      losses.append(loss)

    average_loss = sum(losses)/3

    validation_dict[(batch, rate)] = pochs, average_loss

    print(validation_dict)

In [None]:
#Final Centralized Model Train
import numpy as np

l2_norm_clip = 1
BATCH_SIZE = 32
MICROBATCH_NUM = 8
LEARNING_RATE = .1

SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

x_t = np.empty((0,28,28,1))
y_t = np.empty((0,10))

x_v = np.empty((0,28,28,1))
y_v = np.empty((0,10))

for i in range(100):

  (x_train,y_train),(x_test,y_test) = load_partition(i)

  x_t = np.append(x_t, x_train, axis=0)
  y_t = np.append(y_t, y_train, axis=0)

  x_v = np.append(x_v, x_test, axis=0)
  y_v = np.append(y_v, y_test, axis=0)

p = np.random.permutation(len(x_t))

x_t = x_t[p]
y_t = y_t[p]

In [None]:
batch_test = []
test_acc = []
test_loss = []

In [None]:

model = create_model()

class MetricsCallback(tf.keras.callbacks.Callback):    
  def __init__(self, test_examples, test_data):
      self.test_examples = test_examples
      self.test_data = test_data

  def on_train_batch_end(self, batch, logs=None):
        if batch % 140 == 0:
          batch_test.append(batch)
          loss, accuracy = self.model.evaluate(self.test_examples, self.test_data, verbose = 1)
          test_acc.append(accuracy)
          test_loss.append(loss)
          print(batch_test)
          print(test_acc)
          print(test_loss)

my_callback = MetricsCallback(test_examples = x_v, test_data=y_v)

optimizer =  VectorizedDPKerasSGDOptimizer(
l2_norm_clip=l2_norm_clip,
noise_multiplier=.2,
num_microbatches=MICROBATCH_NUM, learning_rate = LEARNING_RATE)


loss = tf.keras.losses.CategoricalCrossentropy(
from_logits=True, reduction=tf.losses.Reduction.NONE)

model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

model.fit(x_t, y_t, batch_size=32, epochs = 30, callbacks=[my_callback])



In [14]:

compute_dp_sgd_privacy.compute_dp_sgd_privacy(n=60000,
                                              batch_size = 32,
                                              noise_multiplier=.2,
                                              epochs=30,
                                              delta=.00001)

DP-SGD with sampling rate = 0.0533% and noise_multiplier = 0.2 iterated over 56250 steps satisfies differential privacy with eps = 818 and delta = 1e-05.
The optimal RDP order is 1.25.
The privacy estimate is likely to be improved by expanding the set of orders.


(817.7613584737297, 1.25)