In [2]:
!pip install --quiet --upgrade tensorflow-federated
!pip install --quiet --upgrade nest-asyncio

import nest_asyncio
nest_asyncio.apply()

# if you see error run this block twice

In [3]:
!pip uninstall numpy
!pip install numpy==1.25

## restart runtime

Found existing installation: numpy 1.25.2
Uninstalling numpy-1.25.2:
  Would remove:
    /usr/local/bin/f2py
    /usr/local/bin/f2py3
    /usr/local/bin/f2py3.10
    /usr/local/lib/python3.10/dist-packages/numpy-1.25.2.dist-info/*
    /usr/local/lib/python3.10/dist-packages/numpy.libs/libgfortran-040039e1.so.5.0.0
    /usr/local/lib/python3.10/dist-packages/numpy.libs/libopenblas64_p-r0-5007b62f.3.23.dev.so
    /usr/local/lib/python3.10/dist-packages/numpy.libs/libquadmath-96973f99.so.0.0.0
    /usr/local/lib/python3.10/dist-packages/numpy/*
Proceed (Y/n)? y
  Successfully uninstalled numpy-1.25.2
Collecting numpy==1.25
  Downloading numpy-1.25.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m71.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. 

In [1]:
import collections
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff

In [2]:
SEED = 200474710  #TODO: set seed to stuent ID number
np.random.seed(SEED) #TODO: random number generator seed set to stuent ID number

In [3]:
# preprocess the input data
def preprocess(dataset, epoch):
  def batch_format_fn(element):
    """Flatten a batch `pixels` and return the features as an `OrderedDict`."""
    return collections.OrderedDict(
        x=tf.reshape(element['pixels'], [-1, 784]),
        y=tf.reshape(element['label'], [-1, 1]))

  return dataset.repeat(epoch).shuffle(100, seed=SEED).batch(
      20).map(batch_format_fn).prefetch(10)

# combine data from multiple clients
def make_federated_data(client_data, client_ids, epoch):
  return [
      preprocess(client_data.create_tf_dataset_for_client(x), epoch)
      for x in client_ids
  ]

In [4]:
# download the MNIST data
emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data()
print ("Total number of clients: ",len(emnist_train.client_ids))

Downloading emnist_all.sqlite.lzma: 100%|██████████| 170507172/170507172 [00:24<00:00, 7041006.47it/s]

Total number of clients:  3383





In [5]:
# determine the sample data input data structure for ML model
example_dataset = emnist_train.create_tf_dataset_for_client(emnist_train.client_ids[0])
preprocessed_example_dataset = preprocess(example_dataset,0)

# Neural network model
def create_keras_model():
  return tf.keras.models.Sequential([
      tf.keras.layers.InputLayer(input_shape=(784,)),
      tf.keras.layers.Dense(10, kernel_initializer='zeros'),
      tf.keras.layers.Softmax(),
  ])


def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.models.from_keras_model(
      keras_model,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [None]:
#NUM_CLIENTS = 1 #TODO: change number of clients as needed
#NUM_EPOCHS = 1 #TODO: change the number of training epoch for local training by each client

## TODO: you need to iteratively change NUM_CLIENTS for part 'a'
#PART A:
#part a asks us to fix the number of epochs to 5.
NUM_EPOCHS = 5

#putting the whole block in the loop and iteratign over the num_clients
for NUM_CLIENTS in [5, 25, 50]:
    sample_clients = np.random.choice(emnist_train.client_ids, NUM_CLIENTS)
    print("Number of clients selected: ", NUM_CLIENTS)

    # consider data from only the selected clients
    federated_train_data = make_federated_data(emnist_train, sample_clients, NUM_EPOCHS)
    print(f'Number of epochs considered: {NUM_EPOCHS}')

    # Initialize the iterative training object with the right learning parameter
    iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
        model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.01),
        server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

    # initialize the parameters of the ML model (you need to initialize this each time you change the client number or epoch numer)
    state = iterative_process.initialize()

    # total number of server and client interactions
    NUM_ROUNDS = 11
    for round_num in range(1, NUM_ROUNDS):
      result, metrics = iterative_process.next(state, federated_train_data)
      state = result
      print('round {:2d}, training accuracy= {}%'.format(round_num, metrics['client_work']['train']['sparse_categorical_accuracy']*100))

    # evalute the latest converged model
    federated_test_data = make_federated_data(emnist_test, sample_clients,1)
    result, metrics = iterative_process.next(state, federated_test_data)
    print('Test accuracy= {}%'.format(metrics['client_work']['train']['sparse_categorical_accuracy']*100))

In [None]:
 ## TODO: you need to iteratively change NUM_EPOCHS for part 'b'
 #PART B
 #asks us to fix num of clients to 5
NUM_CLIENTS = 5

# puttting the whole block in loop and iterating over num_epochs
for NUM_EPOCHS in [5, 50, 100]:
    sample_clients = np.random.choice(emnist_train.client_ids, NUM_CLIENTS)
    print("Number of clients selected: ", NUM_CLIENTS)

    # consider data from only the selected clients
    federated_train_data = make_federated_data(emnist_train, sample_clients, NUM_EPOCHS)
    print(f'Number of epochs considered: {NUM_EPOCHS}')

    # Initialize the iterative training object with the right learning parameter
    iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
        model_fn,
        client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.01),
        server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

    # initialize the parameters of the ML model (you need to initialize this each time you change the client number or epoch numer)
    state = iterative_process.initialize()

    # total number of server and client interactions
    NUM_ROUNDS = 11
    for round_num in range(1, NUM_ROUNDS):
      result, metrics = iterative_process.next(state, federated_train_data)
      state = result
      print('round {:2d}, training accuracy= {}%'.format(round_num, metrics['client_work']['train']['sparse_categorical_accuracy']*100))

    # evalute the latest converged model
    federated_test_data = make_federated_data(emnist_test, sample_clients,1)
    result, metrics = iterative_process.next(state, federated_test_data)
    print('Test accuracy= {}%'.format(metrics['client_work']['train']['sparse_categorical_accuracy']*100))