# Initial Setting

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import nest_asyncio
nest_asyncio.apply()

import numpy as np
import tensorflow as tf
import random

FRACTION=0.1
BATCH_SIZE = 10 # inf = -1
NUM_EPOCHS = 5 # fixed!
TRAINING_ROUNDS=100

CLIENTS_SHUFFLE_PER_ROUND=False
#CLIENTS_SHUFFLE_PER_ROUND=True 

In [None]:
import os
import time
import sys
import csv
import pandas as pd

class ParameterSaver:
    def __init__(self):
        self.save_path = "/content/drive/MyDrive/Federated-Learning/fl-model-parameters-dataset"
        
        now = time.localtime()
        self.directory_name = "parameter_set_"+"%04d%02d%02d%02d%02d%02d" % (now.tm_year, now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec)
        
        os.mkdir(os.path.join(self.save_path, self.directory_name))
        print(f"{self.directory_name} directory is created in {self.save_path}")
        
        self.current_round_directory_name=""

    def save_initial_parameter(self, initial_parameter) :
        np.savetxt(os.path.join(self.save_path, self.directory_name, "initial_parameter.csv"), initial_parameter, fmt='%s', delimiter=',')
        
    def round_start(self, round_number):
        self.current_round_directory_name=f"round_{round_number:06d}"
        os.mkdir(os.path.join(self.save_path, self.directory_name, self.current_round_directory_name))
        
    def save_local_parameter(self, client_id, local_parameter) :
        #np.savetxt(os.path.join(self.save_path, self.directory_name, self.current_round_directory_name, f"local_parameter_cli_{client_id}.csv"), local_parameter, fmt='%s', delimiter=',')
        # Each Sheet, split to 2D (axa) width + b (height)
        # 1st: [5*5=25 width] x [32 height] + [32 height]
        temp1 = local_parameter[0].reshape(25,32)
        temp2 = local_parameter[1].reshape(1,32)
        sheet1 = np.concatenate((temp1,temp2))
        df_sheet1 = pd.DataFrame(sheet1)
        # 2nd: [5*5*32 width] x [64 height] + [64 height]
        temp1 = local_parameter[2].reshape(5*5*32,64)
        temp2 = local_parameter[3].reshape(1,64)
        sheet2 = np.concatenate((temp1,temp2))
        df_sheet2 = pd.DataFrame(sheet2)
        #sheet2 = local_parameter[1].reshape(5*5*32 + 64)
        # 3rd: [3136 width] * [512 height] + [512 height]
        temp1 = local_parameter[4].reshape(3136,512)
        temp2 = local_parameter[5].reshape(1,512)
        sheet3 = np.concatenate((temp1,temp2))
        df_sheet3 = pd.DataFrame(sheet3)
        # 4th: [512 width] * [10 height] + [10 height]
        temp1 = local_parameter[6].reshape(512,10)
        temp2 = local_parameter[7].reshape(1,10)
        sheet4 = np.concatenate((temp1,temp2)) 
        df_sheet4 = pd.DataFrame(sheet4)
        with pd.ExcelWriter(os.path.join(self.save_path, self.directory_name, self.current_round_directory_name, f"local_parameter_cli_{client_id}.xlsx")) as writer:  
            df_sheet1.to_excel(writer, sheet_name='conv_layer1')
            df_sheet2.to_excel(writer, sheet_name='conv_layer2')
            df_sheet3.to_excel(writer, sheet_name='dense_1')
            df_sheet4.to_excel(writer, sheet_name='dense_2')
    def save_aggreated_global_parameter(self, aggregated_global_parameter) :
        # np.savetxt(os.path.join(self.save_path, self.directory_name, self.current_round_directory_name, f"aggregated_global_parameter.csv"), aggregated_global_parameter, fmt='%s', delimiter=',')
        temp1 = local_parameter[0].reshape(25,32)
        temp2 = local_parameter[1].reshape(1,32)
        sheet1 = np.concatenate((temp1,temp2))
        df_sheet1 = pd.DataFrame(sheet1)
        # 2nd: [5*5*32 width] x [64 height] + [64 height]
        temp1 = local_parameter[2].reshape(5*5*32,64)
        temp2 = local_parameter[3].reshape(1,64)
        sheet2 = np.concatenate((temp1,temp2))
        df_sheet2 = pd.DataFrame(sheet2)
        #sheet2 = local_parameter[1].reshape(5*5*32 + 64)
        # 3rd: [3136 width] * [512 height] + [512 height]
        temp1 = local_parameter[4].reshape(3136,512)
        temp2 = local_parameter[5].reshape(1,512)
        sheet3 = np.concatenate((temp1,temp2))
        df_sheet3 = pd.DataFrame(sheet3)
        # 4th: [512 width] * [10 height] + [10 height]
        temp1 = local_parameter[6].reshape(512,10)
        temp2 = local_parameter[7].reshape(1,10)
        sheet4 = np.concatenate((temp1,temp2)) 
        df_sheet4 = pd.DataFrame(sheet4)
        with pd.ExcelWriter(os.path.join(self.save_path, self.directory_name, self.current_round_directory_name, f"aggregated_global_parameter.xlsx")) as writer:  
            df_sheet1.to_excel(writer, sheet_name='conv_layer1')
            df_sheet2.to_excel(writer, sheet_name='conv_layer2')
            df_sheet3.to_excel(writer, sheet_name='dense_1')
            df_sheet4.to_excel(writer, sheet_name='dense_2')


        
#---------------------------------------------------------------------------------
parameter_saver= ParameterSaver() # make directory for saving parameter set

parameter_set_20210303044924 directory is created in /content/drive/MyDrive/Federated-Learning/fl-model-parameters-dataset


# Make Preprocessed-(I.I.D)Dataset

In [None]:
mnist_train, mnist_test = tf.keras.datasets.mnist.load_data() # This dataset is not "E"mnist. Don't confuse!

raw_dataset_for_iid=list(zip(mnist_train[0].reshape(-1, 28, 28, 1).astype("float32")/255.0, mnist_train[1].astype("float32")))
random.shuffle(raw_dataset_for_iid)

el_size=600
temp_list_for_image=[]
temp_list_for_label=[]
federated_train_data_for_iid=[]
for idx, el in enumerate(raw_dataset_for_iid) :
    temp_list_for_image.append(el[0])
    temp_list_for_label.append(el[1])
    if (idx+1)%(el_size)==0 :
        federated_train_data_for_iid.append((np.array(temp_list_for_image, dtype="float32"), np.array(temp_list_for_label, dtype="float32")))
        temp_list_for_image=[]
        temp_list_for_label=[]
        
federated_train_data = federated_train_data_for_iid

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


# Make MNIST-CNN 99% model using Keras

In [None]:
keras_model= tf.keras.models.Sequential([
    tf.keras.Input(shape=(28, 28, 1)),
    tf.keras.layers.Conv2D(32, kernel_size=(5, 5), activation="relu", padding='same'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2), padding='same'),
    
    tf.keras.layers.Conv2D(64, kernel_size=(5, 5), activation="relu", padding='same'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2), padding='same'),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(10, activation="softmax"),
])

keras_model.summary()

keras_model.compile(
    optimizer = 'adam',
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics = ['accuracy']
)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 64)        51264     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 3136)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               1606144   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5

# Start Training

In [None]:
TOTAL_CLIENTS = len(federated_train_data)
SELECTED_CLIENTS = int(TOTAL_CLIENTS*FRACTION)
print("total client :", TOTAL_CLIENTS, ", selected client :", SELECTED_CLIENTS)

# starting to training
selected_clients_list=clients_status_list=np.random.choice(TOTAL_CLIENTS, size=SELECTED_CLIENTS, replace=False) # that is relevant to 4-2 step.

global_parameter=keras_model.get_weights()
parameter_saver.save_initial_parameter(global_parameter)

print("-- prameter shape --")
for layer in global_parameter :
    print(layer.shape)

list_of_local_parameter=[]
list_of_local_dataset_size=[]
list_of_local_accuracy=[]
list_of_local_loss=[]

#for round in range(TRAINING_ROUNDS) :
for round in range(93,TRAINING_ROUNDS) :
    print("\n▶ Round", round+1, "◀")
    parameter_saver.round_start(round+1)
    
        # check whether to apply shuffle mode per round
    if CLIENTS_SHUFFLE_PER_ROUND == True :
        selected_clients_list = np.random.choice(TOTAL_CLIENTS, size=SELECTED_CLIENTS, replace=False)
    #print("selected clients :", selected_clients_list)

        # recevie Local parameter.
    for client_dataset in selected_clients_list :
        train_images, train_labels=federated_train_data[client_dataset]
        
        keras_model.set_weights(global_parameter)
        
        train_result=keras_model.fit(train_images, train_labels, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, verbose=0)
            
        local_parameter=keras_model.get_weights()
        list_of_local_parameter.append(local_parameter)
        parameter_saver.save_local_parameter(client_dataset, local_parameter)
        list_of_local_dataset_size.append(len(train_images))
        list_of_local_accuracy.append(train_result.history["accuracy"][-1])
        list_of_local_loss.append(train_result.history["loss"][-1])
        
        #print("    clint ID :", client_dataset, "training complete.")
        #print("        accuracy :", train_result.history["accuracy"][-1], "- loss :", train_result.history["loss"][-1])
    
        #4-5. aggregate Local parameters.
    global_parameter = np.mean(list_of_local_parameter, axis=0)
    #global_parameter = np.mean(list_of_local_parameter, axis=0)*np.sum(list_of_local_dataset_size)
    #print("global_parameter :",global_parameter)
    parameter_saver.save_aggreated_global_parameter(global_parameter)
    current_mean_accuracy = np.mean(np.array(list_of_local_accuracy, dtype="float32"))
    current_mean_loss = np.mean(np.array(list_of_local_loss, dtype="float32"))
    print(f"  evaluation mean : accuracy - {current_mean_accuracy}, loss - {current_mean_loss}")   
    
    list_of_local_parameter.clear()
    list_of_local_dataset_size.clear()
    list_of_local_accuracy.clear()
    list_of_local_loss.clear()
    
print("\n\n▶▶▶ Round is over.")

total client : 100 , selected client : 10
-- prameter shape --
(5, 5, 1, 32)
(32,)
(5, 5, 32, 64)
(64,)
(3136, 512)
(512,)
(512, 10)
(10,)

▶ Round 94 ◀


  return array(a, dtype, copy=False, order=order)
  return array(a, dtype, copy=False, order=order, subok=True)


  evaluation mean : accuracy - 0.9923332929611206, loss - 0.027462929487228394

▶ Round 95 ◀
  evaluation mean : accuracy - 0.9979999661445618, loss - 0.00956199411302805

▶ Round 96 ◀
  evaluation mean : accuracy - 0.9985000491142273, loss - 0.004739153664559126

▶ Round 97 ◀
  evaluation mean : accuracy - 0.9985000491142273, loss - 0.00439043901860714

▶ Round 98 ◀
  evaluation mean : accuracy - 0.999666690826416, loss - 0.001970861107110977

▶ Round 99 ◀
  evaluation mean : accuracy - 0.9991666674613953, loss - 0.0022725150920450687

▶ Round 100 ◀
  evaluation mean : accuracy - 0.9983333349227905, loss - 0.007015646900981665


▶▶▶ Round is over.
