<a href="https://colab.research.google.com/github/Herdol/DoubleDIP/blob/master/FedSim_Beta.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Federated Learning Notebook
## 1. Installing Necessary Packages
Since colab works on remote virtual machine it requires to install packages everytime after disconnection from remote server. 


Local runtime can prevent this situation. However it will run this code locally and will depend on the local machine harware. 

---

Installation of "*tensorflow_federated*" wasn't added in example. However if it is not installed, it causes errors in further cells.




In [None]:
#@test {"skip": true}
!pip install --quiet --upgrade tensorflow_federated_nightly
!pip install --quiet --upgrade nest_asyncio
!pip install tensorflow_federated
import nest_asyncio
nest_asyncio.apply()

%load_ext tensorboard

[K     |████████████████████████████████| 522kB 2.8MB/s 
[K     |████████████████████████████████| 153kB 10.0MB/s 
[K     |████████████████████████████████| 394.3MB 42kB/s 
[K     |████████████████████████████████| 112kB 52.4MB/s 
[K     |████████████████████████████████| 1.1MB 39.7MB/s 
[K     |████████████████████████████████| 174kB 38.0MB/s 
[K     |████████████████████████████████| 3.0MB 47.2MB/s 
[K     |████████████████████████████████| 10.6MB 47.6MB/s 
[K     |████████████████████████████████| 1.3MB 43.2MB/s 
[K     |████████████████████████████████| 471kB 45.5MB/s 
[?25h  Building wheel for absl-py (setup.py) ... [?25l[?25hdone
[31mERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.[0m
[31mERROR: tf-nightly 2.4.0.dev20201022 has requirement absl-py~=0.10, but you'll have absl-py 0.9.0 which is incompatible.[0m
[31mERROR: tf-nightly 2.4.0.dev20201022 has requirement grpcio~=1.32.0, but you'll have grpcio 

In [None]:
!pip uninstall -q tensorboard tb-nightly
!pip install -q tb-nightly  # or tensorboard, but not both

This cell allows the tensorboard dashboard

## 2. Importing of libraries

If output is ```b'Hello World! ```, then everything (at least a lot of thing) is working fine. 

In [None]:
import collections

import numpy as np
import tensorflow as tf
import tensorflow_federated as tff

np.random.seed(0)

tff.federated_computation(lambda: 'Hello, World!')()

b'Hello, World!'

In [None]:
emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data()

## 3. Data Distribution
In this cell data is distributed among the clients and number of examples are shown in graphs

In [None]:
# Number of examples per layer for a sample of clients
from matplotlib import pyplot as plt
f = plt.figure(figsize=(12, 7))
f.suptitle('Label Counts for a Sample of Clients')
for i in range(6):
  client_dataset = emnist_train.create_tf_dataset_for_client(
      emnist_train.client_ids[i]) #Data distribution to clients
  plot_data = collections.defaultdict(list)
  for example in client_dataset:
    # Append counts individually per label to make plots
    label = example['label'].numpy()
    plot_data[label].append(label)
  plt.subplot(2, 3, i+1)
  plt.title('Client {}'.format(i))
  for j in range(10):
    plt.hist(
        plot_data[j],
        density=False,
        bins=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

Each client has different dataset. Therefore, mean of images will differ from each other. This cell illustrated the difference between client datasets.

In [None]:
# Each client has different mean images, meaning each client will be nudging
# the model in their own directions locally.

for i in range(5):
  client_dataset = emnist_train.create_tf_dataset_for_client(
      emnist_train.client_ids[i])
  plot_data = collections.defaultdict(list)
  for example in client_dataset:
    plot_data[example['label'].numpy()].append(example['pixels'].numpy())
  f = plt.figure(i, figsize=(12, 5))
  f.suptitle("Client #{}'s Mean Image Per Label".format(i))
  for j in range(10):
    mean_img = np.mean(plot_data[j], 0)
    plt.subplot(2, 5, j+1)
    plt.imshow(mean_img.reshape((28, 28)))
    plt.axis('off')

## 4. Data preperation
Each image reshaped into 1 x 784 elements vector. 

In [None]:
NUM_CLIENTS = 10
NUM_EPOCHS = 5
BATCH_SIZE = 20
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER= 10

def preprocess(dataset):

  def batch_format_fn(element):
    """Flatten a batch `pixels` and return the features as an `OrderedDict`."""
    return collections.OrderedDict(
        x=tf.reshape(element['pixels'], [-1, 784]),
        y=tf.reshape(element['label'], [-1, 1]))

  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

In [None]:
def make_federated_data(client_data, client_ids):
  return [
      preprocess(client_data.create_tf_dataset_for_client(x))
      for x in client_ids
  ]

In [None]:
sample_clients = emnist_train.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(emnist_train, sample_clients)

print('Number of client datasets: {l}'.format(l=len(federated_train_data)))
print('First dataset: {d}'.format(d=federated_train_data[0]))

In [None]:
def create_keras_model():
  return tf.keras.models.Sequential([
      tf.keras.layers.Input(shape=(784,)),
      tf.keras.layers.Dense(10, kernel_initializer='zeros'),
      tf.keras.layers.Softmax(),
  ])

In [None]:
def model_fn():
  # We _must_ create a new model here, and _not_ capture it from an external
  # scope. TFF will call this within different graph contexts.
  keras_model = create_keras_model()
  return tff.learning.from_keras_model(
      keras_model,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [None]:
example_dataset = emnist_train.create_tf_dataset_for_client(
    emnist_train.client_ids[0])
example_element = next(iter(example_dataset))
preprocessed_example_dataset = preprocess(example_dataset)
sample_batch = tf.nest.map_structure(lambda x: x.numpy(),
                                     next(iter(preprocessed_example_dataset)))
iterative_process = tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

In [None]:
str(iterative_process.initialize.type_signature)

In [None]:
state = iterative_process.initialize()

In [None]:
state, metrics = iterative_process.next(state, federated_train_data)
print('round  1, metrics={}'.format(metrics))

In [None]:
NUM_ROUNDS = 11
for round_num in range(2, NUM_ROUNDS):
  state, metrics = iterative_process.next(state, federated_train_data)
  print('round {:2d}, metrics={}'.format(round_num, metrics))

In [None]:
#@test {"skip": true}
logdir = "/tmp/logs/scalars/training/"
summary_writer = tf.summary.create_file_writer(logdir)
state = iterative_process.initialize()

In [None]:
#@test {"skip": true}
with summary_writer.as_default():
  for round_num in range(1, NUM_ROUNDS):
    state, metrics = iterative_process.next(state, federated_train_data)
    for name, value in metrics['train'].items():
      tf.summary.scalar(name, value, step=round_num)

In [None]:
%tensorboard --logdir /tmp/logs/scalars/ --port=0

In [None]:
#@test {"skip": true}
# Run this this cell to clean your directory of old output for future graphs from this directory.
!rm -R /tmp/logs/scalars/*