# Initial Federated Learning models

This script creates a deep learning model for the detection of cancer given a number of features, this is then compared to a federated learning model. In the interest of fairness, the initial model is trained on the same amount of data as a single node in the federated learning model.

Importing Section

In [23]:
#Imports
import os    
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
from sklearn import preprocessing
from sklearn import metrics
from keras.datasets import mnist

Activation and loss functions

In [24]:
#Activation function
def sigmoid(v):
    return 1/(1+np.exp(-v))
def sigmoid_der(v):
    return sigmoid(v)*(1-sigmoid(v))

#Loss Function
def crossEntrop(o,y):
    return (-y*(np.log(o)) - (1-y)* np.log(1-o))
def crossEntrDeriv(o,y):
    return -(y/o - (1-y)/(1-o))

Reading data and converting to two training and testing nodes for experimentation. We use the MinMaxScaler preprocessing to prevent overflow later

In [25]:
df = pd.read_csv("Mixcancer.csv")
df = df.values

df[:,1:] = preprocessing.MinMaxScaler().fit_transform(df[:,1:])

zeros = df[df[:,0] == 0]
ones = df[df[:,0] == 1]

no0sper = int(zeros.shape[0]/2)
no1sper = int(ones.shape[0]/2)

node1 = np.concatenate([zeros[:int(0.4*zeros.shape[0]),:], ones[:int(0.4*ones.shape[0]),:]])
node2 = np.concatenate([zeros[int(0.4*zeros.shape[0]):int(0.8*zeros.shape[0]),:], ones[int(0.4*ones.shape[0]):int(0.8*ones.shape[0]),:]])
test1 = np.concatenate([zeros[int(0.8*zeros.shape[0]):int(0.9*zeros.shape[0]),:], ones[int(0.8*ones.shape[0]):int(0.9*ones.shape[0]),:]])
test2 = np.concatenate([zeros[int(0.9*zeros.shape[0]):,:], ones[int(0.9*ones.shape[0]):,:]])
fullTrain = np.concatenate([zeros[:int(0.8*zeros.shape[0]),:], ones[:int(0.8*ones.shape[0]),:]])
fullTest = np.concatenate([zeros[int(0.8*zeros.shape[0]):,:], ones[int(0.8*ones.shape[0]):,:]])


Splitting the nodes training and test data into features and target feature. And reshaping acitvation feature for use in DNN

In [26]:
x_train1 = node1[:,1:]
y_train1 = node1[:,0]
x_train2 = node2[:,1:]
y_train2 = node2[:,0]

x_test1 = test1[:,1:]
y_test1 = test1[:,0]
x_test2 = test2[:,1:]
y_test2 = test2[:,0]

x_train = fullTrain[:,1:]
y_train = fullTrain[:,0]
x_test = fullTest[:,1:]
y_test = fullTest[:,0]

def reshape(data):
    data = data.reshape(len(data),1)
    return data

y_train = reshape(y_train)
y_test = reshape(y_test)
y_train1 = reshape(y_train1)
y_test1 = reshape(y_test1)
y_train2 = reshape(y_train2)
y_test2 = reshape(y_test2)

In [42]:
#Altering the DNN algorithm to train two individual models
def DNN(x_train,y_train):
    np.random.seed(42)
    neuronNo = 5
    w1 = np.random.uniform(-1,1,[len(x_train[0]),neuronNo])
    w2 = np.random.uniform(-1,1,[neuronNo,1])
    b1 = np.zeros([1,neuronNo])
    b2 = np.zeros([1,1])
    l = 0.01
    epochs = 1
    miniBatch = 400

    train_L = []
    test_L = []
    train_Acc = []
    test_Acc = []

    for epoch in range(epochs):
        for i in range(0, len(x_train[0]), miniBatch):

            x_trainSample = x_train[i:i+miniBatch,:]
            y_trainSample = y_train[i:i+miniBatch,:]

            #feedforward
            in1 = x_trainSample@w1 + b1
            o1 = sigmoid(in1)
            in2 = o1@w2 + b2
            o2 = sigmoid(in2)

            #backpropagation output layer
            dE_dO2 = crossEntrDeriv(o2, y_trainSample)
            dO2_dIn2 = sigmoid_der(in2)
            dIn2_dW2 = o1
            dIn2_B2 = 1
            dE_dW2 = (1/miniBatch)*dIn2_dW2.T@(dE_dO2*dO2_dIn2)
            dE_dB2 = (1/miniBatch)*np.ones([1,len(x_trainSample)])@(dE_dO2*dO2_dIn2)

            #backpropagation hidden layer
            dIn2_dO1 = w2
            dO1_dIn1 = sigmoid_der(in1)
            dIn1_dW1 = x_trainSample
            dE_dW1 = (1/miniBatch)*dIn1_dW1.T@((dE_dO2*dO2_dIn2@dIn2_dO1.T)*dO1_dIn1)
            dE_dB1 = (1/miniBatch)*np.ones([len(x_trainSample)])@((dE_dO2*dO2_dIn2@dIn2_dO1.T)*dO1_dIn1)

            #updating parameters
            b2-=l*dE_dB2
            w2-=l*dE_dW2
            b1-=l*dE_dB1
            w1-=l*dE_dW1

        #Error
        error = crossEntrop(o2 ,y_trainSample).mean()
        train_L.append(error)

        #Accuracy
        pred_train = np.where(o2 > 0.5, 1,0)
        train_Acc.append(metrics.accuracy_score(y_trainSample,pred_train))

    #print("Training: Loss: {0}. Accuracy: {1}. Error: {2}".format(train_L[-1],train_Acc[-1], 1 - train_Acc[-1]))
    print("Training: Loss: {0}. Accuracy: {1}".format(train_L[-1],train_Acc[-1]))
    
    return w1,w2,b1,b2

Training The Models

In [43]:
print("Training on Client One")
nodeOneW1,nodeOneW2,nodeOneB1,nodeOneB2 = DNN(x_train1,y_train1)
print("Training on Client Two")
nodeTwoW1,nodeTwoW2,nodeTwoB1,nodeTwoB2 = DNN(x_train2,y_train2)
print("Training on all data")
totalW1,totalW2,totalB1,totalB2 = DNN(x_train,y_train)

Training on Client One
Training: Loss: 0.7145093617748137. Accuracy: 0.44
Training on Client Two
Training: Loss: 0.6985157381751381. Accuracy: 0.505
Training on all data
Training: Loss: 0.7065125499749757. Accuracy: 0.4725


We now have the weights and biases of two models trained individually, lets perform aggregation and see how our new model performs

In [44]:
def simpleFedAvg():
    globalW1 = (nodeOneW1 + nodeOneW1)/2
    globalW2 = (nodeOneW2 + nodeOneW2)/2
    globalB1 = (nodeOneB1 + nodeOneB1)/2
    globalB2 = (nodeOneB2 + nodeOneB2)/2
    return globalW1, globalW2, globalB1, globalB2
    
globalW1, globalW2, globalB1, globalB2 = simpleFedAvg()

Function to run against test data

In [45]:
#running against test data
def testingModel(w1,w2,b1,b2,x_test,y_test): 
    pred_test = np.where(sigmoid(sigmoid(x_test@w1+b1)@w2+b2) > 0.5,1,0)
    return metrics.accuracy_score(y_test,pred_test)

Testing each model. 

In [46]:
nodeOneTestAcc = testingModel(nodeOneW1, nodeOneW2, nodeOneB1, nodeOneB2, x_test1, y_test1)
nodeTwoTestAcc = testingModel(nodeTwoW1, nodeTwoW2, nodeTwoB1, nodeTwoB2, x_test2, y_test2)
fedNodeOneTestAcc = testingModel(globalW1, globalW2, globalB1, globalB2, x_test1, y_test1)
fedNodeTwoTestAcc = testingModel(globalW1, globalW2, globalB1, globalB2, x_test2, y_test2)
nonFedTotalAcc = testingModel(totalW1, totalW2, totalB1, totalB2, x_test, y_test)

avIndividualAcc = (nodeOneTestAcc + nodeTwoTestAcc)/2
avFedAcc = (fedNodeOneTestAcc + fedNodeOneTestAcc)/2

In [47]:
print("Average Non-Federated accuracy across both nodes (locally trained models): {0}\nAverage Federated accuracy across both nodes: {1}\nNon-Federated accuracy across both nodes(Globally trained models): {2}".format(avIndividualAcc, avFedAcc, nonFedTotalAcc))

Average Non-Federated accuracy across both nodes (locally trained models): 0.429171668667467
Average Federated accuracy across both nodes: 0.3877551020408163
Non-Federated accuracy across both nodes(Globally trained models): 0.43


Not getting an improvement using federated learning. Let's Run this again with non-uniform data

In [48]:
from sklearn.model_selection import train_test_split
df = pd.read_csv("Mixcancer.csv")
df = df.values
x = df[:,1:]
y = df[:,0]
np.random.seed(42) 
x = preprocessing.MinMaxScaler().fit_transform(x)
y=y.reshape(len(y),1)

x_train, x_test, y_train, y_test = train_test_split(x, y,test_size=0.5, random_state=0,shuffle = True)
x_train1 = x_train[:int(0.5*x_train.shape[0]),:]
x_train2 = x_train[int(0.5*x_train.shape[0]):,:]
x_test1 = x_test[:int(0.5*x_test.shape[0]),:]
x_test2 = x_test[int(0.5*x_test.shape[0]):,:]


y_train = reshape(y_train)
y_test = reshape(y_test)
y_train1 = reshape(y_train[:int(0.5*y_train.shape[0]),:])
y_test1 = reshape(y_test[:int(0.5*y_test.shape[0]),:])
y_train2 = reshape(y_train[int(0.5*y_train.shape[0]):,:])
y_test2 = reshape(y_test[int(0.5*y_test.shape[0]):,:])

In [50]:
print("Training on Client One")
nodeOneW1,nodeOneW2,nodeOneB1,nodeOneB2 = DNN(x_train1,y_train1)
print("Training on Client Two")
nodeTwoW1,nodeTwoW2,nodeTwoB1,nodeTwoB2 = DNN(x_train2,y_train2)
print("Training on all data")
totalW1,totalW2,totalB1,totalB2 = DNN(x_train,y_train)

globalW1, globalW2, globalB1, globalB2 = simpleFedAvg()

nodeOneTestAcc = testingModel(nodeOneW1, nodeOneW2, nodeOneB1, nodeOneB2, x_test1, y_test1)
nodeTwoTestAcc = testingModel(nodeTwoW1, nodeTwoW2, nodeTwoB1, nodeTwoB2, x_test2, y_test2)
fedNodeOneTestAcc = testingModel(globalW1, globalW2, globalB1, globalB2, x_test1, y_test1)
fedNodeTwoTestAcc = testingModel(globalW1, globalW2, globalB1, globalB2, x_test2, y_test2)
nonFedTotalAcc = testingModel(totalW1, totalW2, totalB1, totalB2, x_test, y_test)

avIndividualAcc = (nodeOneTestAcc + nodeTwoTestAcc)/2
avFedAcc = (fedNodeOneTestAcc + fedNodeOneTestAcc)/2

print("Average Non-Federated accuracy across both nodes (locally trained models): {0}\nAverage Federated accuracy across both nodes: {1}\nNon-Federated accuracy across both nodes(Globally trained models): {2}".format(avIndividualAcc, avFedAcc, nonFedTotalAcc))

Training on Client One
Training: Loss: 0.7038885874603892. Accuracy: 0.448
Training on Client Two
Training: Loss: 0.6995044898832302. Accuracy: 0.48
Training on all data
Training: Loss: 0.7016965386718098. Accuracy: 0.464
Average Non-Federated accuracy across both nodes (locally trained models): 0.46399999999999997
Average Federated accuracy across both nodes: 0.488
Non-Federated accuracy across both nodes(Globally trained models): 0.464


Results still very close to maximum limit (if we view the total accuracy trained across the whole data-set as the maximum potential accuracy) so lets change up our approach so that we have an improvement when using federated learning, in order to achieve more interesting data. Getting greater accuracy than a global model is also unrealistic which is another reason to move on.

To do this we are going to train a CNN model to evaluate the CIFAR-10 dataset

Import tensorflow functionality and download CIFAR-10 dataset

In [51]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer','dog', 'frog', 'horse', 'ship', 'truck']
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0

For experimentation, let's create a function to split our data into N clients

In [52]:
def splittingData(x_train,y_train,x_test,y_test,noClients):
    if (len(x_train) % noClients != 0):
        print("Data does not divide Equally")
        return 0,0,0,0
    trainLen = len(x_train)
    clientSize = trainLen/noClients
    
    x_train_clients = []
    y_train_clients = []
    x_test_clients = []
    y_test_clients = []
    
    x_train_splits = np.split(x_train,noClients)
    y_train_splits = np.split(y_train,noClients)
    x_test_splits = np.split(x_test,noClients)
    y_test_splits = np.split(y_test,noClients)
    
    for i in range(noClients):
        x_train_clients.append(x_train_splits[i])
        y_train_clients.append(y_train_splits[i])
        x_test_clients.append(x_test_splits[i])
        y_test_clients.append(y_test_splits[i])
    
    return x_train_clients, y_train_clients, x_test_clients, y_test_clients

In [53]:
numClients = 8
x_train_clients, y_train_clients, x_test_clients, y_test_clients = splittingData(train_images, train_labels, test_images, test_labels, numClients)

Creating a CNN model

In [54]:
def createModel():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10))
    return model

In [55]:
def trainModels(numClients):
    models = []
    for i in range(numClients):
        print("Creating Model {0}".format(i))
        model = createModel()
        model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy'])
        model.fit(x_train_clients[i], y_train_clients[i], epochs=1, verbose=0)
        models.append(model)
    print("Creating Total Model")
    model = createModel()
    model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=['accuracy'])
    model.fit(train_images, train_labels, epochs=1, verbose=0)
    
    return models, model

In [56]:
models_, totalModel= trainModels(numClients)

Creating Model 0
Creating Model 1
Creating Model 2
Creating Model 3
Creating Model 4
Creating Model 5
Creating Model 6
Creating Model 7
Creating Total Model


In [66]:
def testModels(numClients,models,totalMod):
    localAccs = 0
    globalAccs = 0
    globalOnLocalAccs = 0
    
    print("Testing Local Models on Local Data")
    for i in range(numClients):
        loss, acc = models[i].evaluate(x_test_clients[i],  y_test_clients[i], verbose=0)
        localAccs += acc/numClients
    
    print("Testing Local Models On All Data")
    for i in range(numClients):
        loss,acc = models[i].evaluate(test_images,  test_labels, verbose=0)
        globalAccs += acc/numClients
        
    print("Testing Global model On Local Data")
    for i in range(numClients):
        loss,acc = totalMod.evaluate(x_test_clients[i],  y_test_clients[i], verbose=0)
        globalOnLocalAccs += acc/numClients
    
    print("Testing Global model On Global Data")
    totalLoss, totalAccTotal = totalModel.evaluate(test_images,  test_labels, verbose=0)
        
    return localAccs, globalAccs, globalOnLocalAccs, totalAccTotal

In [67]:
avLocalAcc, avGlobalAcc, avGlobalOnLocalAcc,totalAccTotal = testModels(numClients,models_,totalModel)
print("Average accuracy using local models on local data  = {0}\nAverage accuracy using local models on all data = {1}\nAverage accuracy using all data to train a model on all data = {2}\nAverage accuracy using all data to train a model on local data = {3}\n".format(avLocalAcc, avGlobalAcc, totalAccTotal, avGlobalOnLocalAcc))

Testing On Local Models on Local Data
Testing Local Models On All Data
Testing Global model On Local Data
Testing Global model On Global Data
Average accuracy using local models on local data  = 0.3911999985575676
Average accuracy using local models on all data = 0.3934124931693077
Average accuracy using all data to train a model on all data = 0.6021000146865845
Average accuracy using all data to train a model on local data = 0.6020999997854233

