In [1]:
import numpy as np
import random
import cv2
import os
from imutils import paths
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K


#Import ipynb file

In [4]:
!pip install import-ipynb
import import_ipynb
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Copy the link and remove the front part of the link (i.e. https://drive.google.com/open?id=) to get the file ID.
your_module = drive.CreateFile({'id':'19XOxqFe7EA8WJ5Yd522iZYqPvHDNZ9Kb'})
your_module.GetContentFile('federated_implementation_utils.ipynb')
from federated_implementation_utils import *

importing Jupyter notebook from federated_implementation_utils.ipynb


In [5]:
#declear path to your mnist data folder
img_path = '/content/drive/My Drive/295-1/archive.zip (Unzipped Files)/trainingSample/trainingSample'

In [6]:
#get the path list using the path object
image_paths = list(paths.list_images(img_path))

In [7]:
#apply our function
image_list, label_list = load(image_paths, verbose=50)

[INFO] processed 50/600
[INFO] processed 100/600
[INFO] processed 150/600
[INFO] processed 200/600
[INFO] processed 250/600
[INFO] processed 300/600
[INFO] processed 350/600
[INFO] processed 400/600
[INFO] processed 450/600
[INFO] processed 500/600
[INFO] processed 550/600
[INFO] processed 600/600


In [8]:
#binarize the labels
lb = LabelBinarizer()
label_list = lb.fit_transform(label_list)

In [9]:
#split data into training and test set
X_train, X_test, y_train, y_test = train_test_split(image_list, 
                                                    label_list, 
                                                    test_size=0.1, 
                                                    random_state=42)

In [10]:
#create clients
clients = create_clients(X_train, y_train, num_clients=5, initial='client')

In [11]:
#process and batch the training data for each client
clients_batched = dict()
for (client_name, data) in clients.items():
    clients_batched[client_name] = batch_data(data)

In [12]:
#process and batch the test set  
test_batched = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(len(y_test))

In [13]:
comms_round = 100

In [14]:
#create optimizer
lr = 0.01 
loss='categorical_crossentropy'
metrics = ['accuracy']
optimizer = SGD(lr=lr, 
                decay=lr / comms_round, 
                momentum=0.9
               ) 

In [15]:
#initialize global model
smlp_global = SimpleMLP()
global_model = smlp_global.build(784, 10)

In [18]:
#commence global training loop
for comm_round in range(comms_round):
            
    # get the global model's weights - will serve as the initial weights for all local models
    global_weights = global_model.get_weights()
    
    #initial list to collect local model weights after scalling
    scaled_local_weight_list = list()

    #randomize client data - using keys
    client_names= list(clients_batched.keys())
    random.shuffle(client_names)
    
    #loop through each client and create new local model
    for client in client_names:
        smlp_local = SimpleMLP()
        local_model = smlp_local.build(784, 10)
        local_model.compile(loss=loss, 
                      optimizer=optimizer, 
                      metrics=metrics)
        
        #set local model weight to the weight of the global model
        local_model.set_weights(global_weights)
        
        #fit local model with client's data
        local_model.fit(clients_batched[client], epochs=1, verbose=0)
        
        #scale the model weights and add to list
        scaling_factor = weight_scalling_factor(clients_batched, client)
        scaled_weights = scale_model_weights(local_model.get_weights(), scaling_factor)
        scaled_local_weight_list.append(scaled_weights)
        
        #clear session to free memory after each communication round
        K.clear_session()
        
    #to get the average over all the local model, we simply take the sum of the scaled weights
    average_weights = sum_scaled_weights(scaled_local_weight_list)
    
    #update global model 
    global_model.set_weights(average_weights)

    #test global model and print out metrics after each communications round
    for(X_test, Y_test) in test_batched:
        global_acc, global_loss = test_model(X_test, Y_test, global_model, comm_round,False)
        SGD_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(len(y_train)).batch(320)


comm_round: 0 | global_acc: 28.333% | global_loss: 2.2875120639801025
comm_round: 1 | global_acc: 36.667% | global_loss: 2.2818262577056885
comm_round: 2 | global_acc: 40.000% | global_loss: 2.2755138874053955
comm_round: 3 | global_acc: 43.333% | global_loss: 2.268787384033203
comm_round: 4 | global_acc: 50.000% | global_loss: 2.261622428894043
comm_round: 5 | global_acc: 55.000% | global_loss: 2.254131555557251
comm_round: 6 | global_acc: 60.000% | global_loss: 2.245543956756592
comm_round: 7 | global_acc: 61.667% | global_loss: 2.236078977584839
comm_round: 8 | global_acc: 63.333% | global_loss: 2.226475477218628
comm_round: 9 | global_acc: 68.333% | global_loss: 2.216399908065796
comm_round: 10 | global_acc: 71.667% | global_loss: 2.2056996822357178
comm_round: 11 | global_acc: 71.667% | global_loss: 2.193910837173462
comm_round: 12 | global_acc: 71.667% | global_loss: 2.1815361976623535
comm_round: 13 | global_acc: 73.333% | global_loss: 2.1690993309020996
comm_round: 14 | global_

In [19]:
smlp_SGD = SimpleMLP()
SGD_model = smlp_SGD.build(784, 10) 

In [20]:
SGD_model.compile(loss=loss, 
              optimizer=optimizer, 
              metrics=metrics)

In [21]:
# fit the SGD training data to model
_ = SGD_model.fit(SGD_dataset, epochs=100, verbose=0)

In [23]:
#test the SGD global model and print out metrics
for(X_test, Y_test) in test_batched:
        SGD_acc, SGD_loss = test_model(X_test, Y_test, SGD_model, 1,True)

tf.Tensor(
[1 7 9 1 3 4 0 6 1 5 0 1 1 9 7 3 0 3 3 1 0 9 8 9 7 8 1 2 9 6 9 2 1 4 3 6 2
 8 6 4 7 0 8 9 0 3 6 1 1 8 4 4 1 9 9 5 3 2 2 3], shape=(60,), dtype=int64)
tf.Tensor(
[1 9 9 1 2 4 0 6 1 1 0 1 1 9 7 9 0 3 3 1 0 9 4 5 7 3 5 2 9 6 9 2 1 6 3 6 6
 8 6 4 7 0 8 9 0 3 6 1 1 5 4 4 1 9 9 5 9 2 2 3], shape=(60,), dtype=int64)
comm_round: 1 | global_acc: 80.000% | global_loss: 1.7105252742767334
