# FedAvg algorithm (Homogenous Data)

The followng script contains the result of 3 server 60000 datapoints FSGD algorithm results.The training data for each of these clients is common and homogenous in classes.

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from tensorflow.keras.datasets import mnist

**Loading Data**

The models train on an mnist dataset which are  grayscale images.

The images are normalized on the pixel intensity range and reshaped into a singe vector of length 784.

In [None]:
def FSGD(weights,config):
    """Performs Federated Average Stochastic Gradient Descent.

    Args:
        weights: A list of dictionaries containing "weights"
                 and "datapoints" (nk)
        config: A config file / dictionary generated by the
                to_config() method

    Returns:
        new_mode: The final model to be saved.
    """

    final_weights = []
    for i in weights[0]["weights"]:
        final_weights.append(np.zeros(shape = i.shape))
    total = 0.0
    for i in weights:
      total += i["datapoints"]
    for i in range (0,len(weights)):
        for j in range (0,len(weights[i]["weights"])):
          final_weights[j] = np.add(final_weights[j], (weights[i]["datapoints"] / total) * weights[i]["weights"][j])
    new_model = tf.keras.Sequential.from_config(config)
    new_model.set_weights(final_weights)
    return new_model

In [None]:
from sklearn.model_selection import KFold
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import SGD

**The CNN Model**

The model is a 2DCNN to 100 dense model used commonly for MNIST classification.

In [None]:
def define_model():
 model = Sequential()
 model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
 model.add(MaxPooling2D((2, 2)))
 model.add(Flatten())
 model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
 model.add(Dense(10, activation='softmax'))
 # compile model
 opt = SGD(learning_rate=0.01, momentum=0.9)
 model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
 return model

In [None]:

# load train and test dataset
def load_dataset():
 # load dataset
 (trainX, trainY), (testX, testY) = mnist.load_data()
 # reshape dataset to have a single channel
 trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
 testX = testX.reshape((testX.shape[0], 28, 28, 1))
 # one hot encode target values
 trainY = to_categorical(trainY)
 testY = to_categorical(testY)
 return trainX, trainY, testX, testY

In [None]:

# scale pixels
def prep_pixels(train, test):
 # convert from integers to floats
 train_norm = train.astype('float32')
 test_norm = test.astype('float32')
 # normalize to range 0-1
 train_norm = train_norm / 255.0
 test_norm = test_norm / 255.0
 # return normalized images
 return train_norm, test_norm

In [None]:

# evaluate a model using k-fold cross-validation
def evaluate_model(dataX, dataY,weights,n_folds=5):
 scores, histories = list(), list()
 models = []
 # prepare cross validation
 kfold = KFold(n_folds, shuffle=True, random_state=1)
 # enumerate splits
 for train_ix, test_ix in kfold.split(dataX):
    d ={}
    # define model
    model = define_model()
    model.set_weights(weights)
    # select rows for train and test
    trainX, trainY, testX, testY = dataX[train_ix], dataY[train_ix], dataX[test_ix], dataY[test_ix]
    # fit model
    history = model.fit(trainX, trainY, epochs=1, batch_size=32, validation_data=(testX, testY), verbose=0)
    # evaluate model
    _, acc = model.evaluate(testX, testY, verbose=0)
    print('> %.3f' % (acc * 100.0))
    # stores scores
    scores.append(acc)
    histories.append(history)
    d["weights"] = model.get_weights()
    d["datapoints"] = len(trainX)
    models.append(d)
 return scores, histories ,models

In [None]:
trainX, trainY, testX, testY = load_dataset()
# prepare pixel data
trainX, testX = prep_pixels(trainX, testX)

**FedAvg**

This function is the main federated average algorithm which initializes the CNN model and runs the evaluate_model() method which trains 5 models on 5 unequal distributions of the data and returns the 5 models weights along with no. of datapoints then this list of weights is sent to the FSGD() mdethod which averages the weights ,this is then the updated model and this repeated "rounds" times.

In [None]:
def FedAvg(trainX, trainY, testX, testY,rounds = 10):
    model = define_model()
    for i in range(0,rounds):
      d = {}
      scores,histories,models = evaluate_model(trainX, trainY,model.get_weights())
      model = FSGD(models,model.get_config())
    return model


**Result**

The Federated averaging algorithms is run for 10 rounds where the local epochs is 1.(no. of clients = 5)

We observer that every 5 observation the average accuracy increases showing that the quality of the starting weights of the final model keeps increasing for only 1 round the observed average model gives and accuracy of 49 percent andd gives an accuarcy of 98.9 percent after 10 rounds of updation

In [None]:
model = FedAvg(trainX, trainY, testX, testY,rounds = 10)


> 97.283
> 97.325
> 97.083
> 97.233
> 97.675
> 98.150
> 97.800
> 97.942
> 98.342
> 98.133
> 98.550
> 98.417
> 98.325
> 98.767
> 98.842
> 99.050
> 99.217
> 98.525
> 99.142
> 99.083
> 99.383
> 98.942
> 99.125
> 99.400
> 99.283
> 99.350
> 99.467
> 99.308
> 99.417
> 99.425
> 99.392
> 99.733
> 99.617
> 99.650
> 99.658
> 99.750
> 99.725
> 99.850
> 99.742
> 99.850
> 99.842
> 99.867
> 99.917
> 99.875
> 99.758
> 99.933
> 99.967
> 99.900
> 99.958
> 99.883


  output, from_logits = _get_logits(


ValueError: ignored

In [None]:
opt = SGD(learning_rate=0.01, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.evaluate(testX,testY)



[0.03756684064865112, 0.989300012588501]