In [35]:
import collections

import dp_accounting
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_federated as tff

## Download and preprocess the federated EMNIST dataset.

In [36]:
def get_emnist_dataset():
  emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data(
      only_digits=True)

  def element_fn(element):
    return collections.OrderedDict(
        x=tf.expand_dims(element['pixels'], -1), y=element['label'])

  def preprocess_train_dataset(dataset):
    # Use buffer_size same as the maximum client dataset size,
    # 418 for Federated EMNIST
    return (dataset.map(element_fn)
                   .shuffle(buffer_size=418)
                   .repeat(1)
                   .batch(32, drop_remainder=False))

  def preprocess_test_dataset(dataset):
    return dataset.map(element_fn).batch(128, drop_remainder=False)

  emnist_train = emnist_train.preprocess(preprocess_train_dataset)
  emnist_test = preprocess_test_dataset(
      emnist_test.create_tf_dataset_from_all_clients())
  return emnist_train, emnist_test

train_data, test_data = get_emnist_dataset()

## Define our model.

In [38]:
def my_model_fn():
  model = tf.keras.models.Sequential([
      tf.keras.layers.Reshape(input_shape=(28, 28, 1), target_shape=(28 * 28,)),
      tf.keras.layers.Dense(200, activation=tf.nn.relu),
      tf.keras.layers.Dense(200, activation=tf.nn.relu),
      tf.keras.layers.Dense(10)])
  return tff.learning.models.from_keras_model(
      keras_model=model,
      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
      input_spec=test_data.element_spec,
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [None]:
def train(rounds, noise_multiplier, clients_per_round, data_frame):
    # Increase clipped_count_stddev to a larger value for stability
    clipped_count_stddev = max(1.0, 0.5 * clients_per_round)  # increase multiplier here
    
    aggregation_factory = tff.learning.model_update_aggregator.dp_aggregator(
        noise_multiplier,
        clients_per_round,
        clipped_count_stddev=clipped_count_stddev
    )
    total_clients = len(train_data.client_ids)
    # Poisson subsampling probability
    sampling_prob = clients_per_round / total_clients

    # Build federated averaging process
    learning_process = tff.learning.algorithms.build_unweighted_fed_avg(
        my_model_fn,
        client_optimizer_fn=tff.learning.optimizers.build_sgdm(0.01),
        server_optimizer_fn=tff.learning.optimizers.build_sgdm(1.0, momentum=0.9),
        model_aggregator=aggregation_factory
    )

    eval_process = tff.learning.algorithms.build_fed_eval(my_model_fn)
    state = learning_process.initialize()
    eval_state = eval_process.initialize()

    records = []

    for round in range(rounds):
        if round % 5 == 0:
            model_weights = learning_process.get_model_weights(state)
            eval_state = eval_process.set_model_weights(eval_state, model_weights)
            eval_output = eval_process.next(eval_state, [test_data])

            metrics = eval_output.metrics
            print(f'Round {round:3d}: {metrics}')
            records.append({'Round': round, 'NoiseMultiplier': noise_multiplier, **metrics})

        x = np.random.uniform(size=total_clients)
        sampled_clients = [train_data.client_ids[i] for i in range(total_clients) if x[i] < sampling_prob]

        if not sampled_clients:
            sampled_clients = [train_data.client_ids[np.random.randint(total_clients)]]

        sampled_train_data = [train_data.create_tf_dataset_for_client(client) for client in sampled_clients]

        result = learning_process.next(state, sampled_train_data)
        state = result.state

    model_weights = learning_process.get_model_weights(state)
    eval_state = eval_process.set_model_weights(eval_state, model_weights)
    eval_output = eval_process.next(eval_state, [test_data])
    metrics = eval_output.metrics
    print(f'Round {rounds:3d}: {metrics}')
    records.append({'Round': rounds, 'NoiseMultiplier': noise_multiplier, **metrics})

    data_frame = pd.concat([data_frame, pd.DataFrame.from_records(records)], ignore_index=True)
    return data_frame


In [None]:
data_frame = pd.DataFrame()
rounds = 15
clients_per_round = 5

for noise_multiplier in [0.0, 0.5, 0.75, 1.0]:
  print(f'Starting training with noise multiplier: {noise_multiplier}')
  data_frame = train(rounds, noise_multiplier, clients_per_round, data_frame)
  print()

Starting training with noise multiplier: 0.0


TypeError: dp_aggregator() got an unexpected keyword argument 'clipped_count_stddev'

Now we can visualize the evaluation set accuracy and loss of those runs.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


def make_plot(data_frame):
  plt.figure(figsize=(15, 5))

  dff = data_frame.rename(
      columns={'sparse_categorical_accuracy': 'Accuracy', 'loss': 'Loss'}
  )

  plt.subplot(121)
  sns.lineplot(
      data=dff, x='Round', y='Accuracy', hue='NoiseMultiplier', palette='dark'
  )
  plt.subplot(122)
  sns.lineplot(
      data=dff, x='Round', y='Loss', hue='NoiseMultiplier', palette='dark'
  )

In [None]:
make_plot(data_frame)

It appears that with 50 expected clients per round, this model can tolerate a noise multiplier of up to 0.5 without degrading model quality. A noise multiplier of 0.75 seems to cause a bit of model degradation, and 1.0 makes the model diverge.

There is typically a tradeoff between model quality and privacy. The higher noise we use, the more privacy we can get for the same amount of training time and number of clients. Conversely, with less noise, we may have a more accurate model, but we'll have to train with more clients per round to reach our target privacy level.

With the experiment above, we might decide that the small amount of model deterioration at 0.75 is acceptable in order to train the final model faster, but let's assume we want to match the performance of the 0.5 noise-multiplier model.

Now we can use dp_accounting functions to determine how many expected clients per round we would need to get acceptable privacy. Standard practice is to choose delta somewhat smaller than one over the number of records in the dataset. This dataset has 3383 total training users, so let's aim for (2, 1e-5)-DP.

We use `dp_accounting.calibrate_dp_mechanism` to search over the number of clients per round. The privacy accountant (`RdpAccountant`) we use to estimate privacy given a `dp_accounting.DpEvent` is based on [Wang et al. (2018)](https://arxiv.org/abs/1808.00087) and [Mironov et al. (2019)](https://arxiv.org/pdf/1908.10530.pdf).

In [None]:
total_clients = 3383
noise_to_clients_ratio = 0.01
target_delta = 1e-5
target_eps = 2

# Initialize arguments to dp_accounting.calibrate_dp_mechanism.

# No-arg callable that returns a fresh accountant.
make_fresh_accountant = dp_accounting.rdp.RdpAccountant


# Create function that takes expected clients per round and returns a
# dp_accounting.DpEvent representing the full training process.
def make_event_from_param(clients_per_round):
  q = clients_per_round / total_clients
  noise_multiplier = clients_per_round * noise_to_clients_ratio
  gaussian_event = dp_accounting.GaussianDpEvent(noise_multiplier)
  sampled_event = dp_accounting.PoissonSampledDpEvent(q, gaussian_event)
  composed_event = dp_accounting.SelfComposedDpEvent(sampled_event, rounds)
  return composed_event


# Create object representing the search range [1, 3383].
bracket_interval = dp_accounting.ExplicitBracketInterval(1, total_clients)

# Perform search for smallest clients_per_round achieving the target privacy.
clients_per_round = dp_accounting.calibrate_dp_mechanism(
    make_fresh_accountant,
    make_event_from_param,
    target_eps,
    target_delta,
    bracket_interval,
    discrete=True,
)

noise_multiplier = clients_per_round * noise_to_clients_ratio
print(
    f'To get ({target_eps}, {target_delta})-DP, use {clients_per_round} '
    f'clients with noise multiplier {noise_multiplier}.'
)

Now we can train our final private model for release.


In [None]:
rounds = 100
noise_multiplier = 1.2
clients_per_round = 120

data_frame = pd.DataFrame()
data_frame = train(rounds, noise_multiplier, clients_per_round, data_frame)

make_plot(data_frame)

As we can see, the final model has similar loss and accuracy to the model trained without noise, but this one satisfies (2, 1e-5)-DP.