In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import math

In [3]:
num_classes = 10
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

x_train = np.reshape(x_train, (x_train.shape[0], -1))/255
x_test = np.reshape(x_test, (x_test.shape[0], -1))/255
x_train.shape

(60000, 784)

In [4]:
K = 4  # first convolutional layer output depth
L = 8  # second convolutional layer output depth
M = 12  # third convolutional layer
N = 200  # fully connected layer
stride1 = 1  # output is 28x28
stride2 = 2  # output is 7x7


In [5]:
class User:
    x_in = tf.placeholder(tf.float32,shape=[None, 784])
    y_true = tf.placeholder(tf.float32, [None, 10])
    x_image = tf.reshape(x_in,[-1,28,28,1])
    W1 = tf.Variable(tf.truncated_normal([5, 5, 1, K], stddev=0.1))
    lambda_W1 = tf.zeros([5, 5, 1, K])
    b1 = tf.Variable(tf.ones([K])/10)
    lambda_b1 = tf.zeros([K])
    W2 = tf.Variable(tf.truncated_normal([5, 5, K, L], stddev=0.1))
    lambda_W2 = tf.zeros([5, 5, K, L])
    b2 = tf.Variable(tf.ones([L])/10)
    lambda_b2 = tf.zeros([L])
    W3 = tf.Variable(tf.truncated_normal([4, 4, L, M], stddev=0.1))
    lambda_W3 = tf.zeros([4, 4, L, M])
    b3 = tf.Variable(tf.ones([M])/10)
    lambda_b3 = tf.zeros([M])
    W4 = tf.Variable(tf.truncated_normal([7 * 7 * M, N], stddev=0.1))
    lambda_W4 = tf.zeros([7 * 7 * M, N])
    b4 = tf.Variable(tf.ones([N])/10)
    lambda_b4 = tf.zeros([N])
    W5 = tf.Variable(tf.truncated_normal([N, 10], stddev=0.1))
    lambda_W5 = tf.zeros([N, 10])
    b5 = tf.Variable(tf.ones([10])/10)
    lambda_b5 = tf.zeros([10])
    #The model
    y1 = tf.nn.relu(tf.nn.conv2d(x_image, W1, strides=[1, stride1, stride1, 1], padding='SAME') + b1)
    y2 = tf.nn.relu(tf.nn.conv2d(y1, W2, strides=[1, stride2, stride2, 1], padding='SAME') + b2)
    y3 = tf.nn.relu(tf.nn.conv2d(y2, W3, strides=[1, stride2, stride2, 1], padding='SAME') + b3)
    # reshaping the output from the third convolution for the fully connected layer
    yy = tf.reshape(y3, shape=[-1, 7 * 7 * M])
    y4 = tf.nn.relu(tf.matmul(yy, W4) + b4)
    ylogits = tf.matmul(y4, W5) + b5
    y_pred = tf.nn.softmax(ylogits)
    x = x_train
    y = y_train

    def __init__(self):
        pass

    

In [8]:
users = [] # Creating the list of 10 users
x_data = []
y_data = []
entropies = [] # Cross entropies of all users
numberOfUsers = 10
# Create 100 user objects 
for nums in range(numberOfUsers):
    # Create a user
    user = User()
    entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits= user.ylogits, labels= user.y_true))*100
    xData = user.x[nums:nums+1000]
    yData = user.y[nums:nums+1000]
    users.append(user)
    entropies.append(entropy)
    x_data.append(xData)
    y_data.append(yData)

In [6]:
rho = tf.constant(1e1)

def sum_lambdas_w(user_a, user_b):
    return tf.reduce_sum(tf.compat.v1.linalg.diag_part(tf.math.multiply(user_a.lambda_W1,(user_a.W1-user_b.W1))))+  tf.reduce_sum(tf.compat.v1.linalg.diag_part(tf.math.multiply(user_a.lambda_W2,(user_a.W2-user_b.W2)))) + tf.reduce_sum(tf.compat.v1.linalg.diag_part(tf.math.multiply(user_a.lambda_W3,(user_a.W3-user_b.W3)))) + tf.reduce_sum(tf.compat.v1.linalg.diag_part(tf.math.multiply(user_a.lambda_W4,(user_a.W4-user_b.W4)))) + tf.reduce_sum(tf.compat.v1.linalg.diag_part(tf.math.multiply(user_a.lambda_W5,(user_a.W5-user_b.W5))))


def sum_lambdas_b(user_a, user_b):
    return tf.tensordot(tf.transpose(user_a.lambda_b1), (user_a.b1- user_b.b1),1)+tf.tensordot(tf.transpose(user_a.lambda_b2), (user_a.b2- user_b.b2),1)+tf.tensordot(tf.transpose(user_a.lambda_b3), (user_a.b3- user_b.b3),1)+tf.tensordot(tf.transpose(user_a.lambda_b4), (user_a.b4- user_b.b4),1)+tf.tensordot(tf.transpose(user_a.lambda_b5), (user_a.b5- user_b.b5),1)

def lambdas_w_update(user_a, user_b):    
    first_layer= user_a.lambda_W1 + rho*(user_a.W1-user_b.W1)
    second_layer= user_a.lambda_W2 + rho*(user_a.W2-user_b.W2)
    third_layer= user_a.lambda_W3 + rho*(user_a.W3-user_b.W3)
    fourth_layer= user_a.lambda_W4 + rho*(user_a.W4-user_b.W4)
    fifth_layer= user_a.lambda_W5 + rho*(user_a.W5-user_b.W5)
    
    return first_layer,second_layer,third_layer,fourth_layer,fifth_layer

def lambdas_b_update(user_a, user_b):    
    first_layer= user_a.lambda_b1 + rho*(user_a.b1-user_b.b1)
    second_layer= user_a.lambda_b2 + rho*(user_a.b2-user_b.b2)
    third_layer= user_a.lambda_b3 + rho*(user_a.b3-user_b.b3)
    fourth_layer= user_a.lambda_b4 + rho*(user_a.b4-user_b.b4)
    fifth_layer= user_a.lambda_b5 + rho*(user_a.b5-user_b.b5)
    
    return first_layer,second_layer,third_layer,fourth_layer,fifth_layer
    
regW =0
regb =0
lambda_w = 0
lambda_b = 0

for i in range(numberOfUsers):
    if i== 9:
        break
    user_a = users[i]
    user_b = users[i+1]    
    regW += rho/2*tf.nn.l2_loss(user_a.W1-user_b.W1)+ rho/2*tf.nn.l2_loss(user_a.W2-user_b.W2)+ rho/2*tf.nn.l2_loss(user_a.W3-user_b.W3)+ rho/2*tf.nn.l2_loss(user_a.W4-user_b.W4)+ rho/2*tf.nn.l2_loss(user_a.W5-user_b.W5)
    regb += rho/2*tf.nn.l2_loss(user_a.b1-user_b.b1)+ rho/2*tf.nn.l2_loss(user_a.b2-user_b.b2)+ rho/2*tf.nn.l2_loss(user_a.b3-user_b.b3)+ rho/2*tf.nn.l2_loss(user_a.b4-user_b.b4)+ rho/2*tf.nn.l2_loss(user_a.b5-user_b.b5)
    lambda_w += sum_lambdas_w(user_a, user_b)
    lambda_b += sum_lambdas_b(user_a, user_b)
    
reg = regW+regb
lambdas = lambda_w + lambda_b
loss = cross_entropy + reg + lambdas

In [7]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.005)

In [8]:
# Train data

train_data = []

for i in range(len(users)):
    # Create a user
    user = users[i]
    
    one_training = optimizer.minimize(loss, var_list=[user.W1, user.W2, user.W3, user.W4, user.W5, user.b1, user.b2, user.b3, user.b4, user.b5])
    
    train_data.append(one_training)

In [9]:
def mini_batches(X, Y, mb_size = 100):

    m = X.shape[0]

    perm = list(np.random.permutation(m))
    #perm = perm_init[0:100]
    X_temp = X[perm,:]
    Y_temp = Y[perm,:].reshape((m, Y.shape[1]))
    
    X_r = X_temp[0:mb_size,:]
    Y_r = Y_temp[0:mb_size,:]
    return X_r,Y_r

In [10]:
init = tf.global_variables_initializer()

In [45]:

avgAcc = []

with tf.Session() as sess:
    sess.run(init)
    
    for i in range(100):

        batch_x1 , batch_y1 = mini_batches(x_data[0],y_data[0], 100)
        batch_x2 , batch_y2 = mini_batches(x_data[1],y_data[1], 100)
        batch_x3 , batch_y3 = mini_batches(x_data[2],y_data[2], 100)
        batch_x4 , batch_y4 = mini_batches(x_data[3],y_data[3], 100)
        batch_x5 , batch_y5 = mini_batches(x_data[4],y_data[4], 100)
        batch_x6 , batch_y6 = mini_batches(x_data[5],y_data[5], 100)
        batch_x7 , batch_y7 = mini_batches(x_data[6],y_data[6], 100)
        batch_x8 , batch_y8 = mini_batches(x_data[7],y_data[7], 100)
        batch_x9 , batch_y9 = mini_batches(x_data[8],y_data[8], 100)
        batch_x10 , batch_y10 = mini_batches(x_data[9],y_data[9], 100)
        
        sess.run(train_data[0],feed_dict={users[0].x_in:batch_x1,users[0].y_true:batch_y1})
        sess.run(train_data[2],feed_dict={users[2].x_in:batch_x3,users[2].y_true:batch_y3})
        sess.run(train_data[4],feed_dict={users[4].x_in:batch_x5,users[4].y_true:batch_y5})
        sess.run(train_data[6],feed_dict={users[6].x_in:batch_x7,users[6].y_true:batch_y7})
        sess.run(train_data[8],feed_dict={users[8].x_in:batch_x9,users[8].y_true:batch_y9})

        sess.run(train_data[1],feed_dict={users[1].x_in:batch_x2,users[1].y_true:batch_y2})
        sess.run(train_data[3],feed_dict={users[3].x_in:batch_x4,users[3].y_true:batch_y4})
        sess.run(train_data[5],feed_dict={users[5].x_in:batch_x6,users[5].y_true:batch_y6})
        sess.run(train_data[7],feed_dict={users[7].x_in:batch_x8,users[7].y_true:batch_y8})
        sess.run(train_data[9],feed_dict={users[9].x_in:batch_x10,users[9].y_true:batch_y10})
        

        lambda_w_update = []
        lambda_b_update = []
        
        for i in range(numberOfUsers):
            if i== 9:
                break
            user_a = users[i]
            user_b = users[i+1]
            lwu = lambdas_w_update(user_a,user_b)
            lambda_w_update.append(lwu)
            lbu = lambdas_b_update(user_a,user_b)
            lambda_b_update.append(lbu)
            

        lambda_w_update
        lambda_b_update
        
        matches1 = tf.equal(tf.argmax(users[0].y_pred,1),tf.argmax(users[0].y_true,1))
        acc1 = tf.reduce_mean(tf.cast(matches1,tf.float32)) 
        matches2 = tf.equal(tf.argmax(users[1].y_pred,1),tf.argmax(users[1].y_true,1))
        acc2 = tf.reduce_mean(tf.cast(matches2,tf.float32))
        matches3 = tf.equal(tf.argmax(users[2].y_pred,1),tf.argmax(users[2].y_true,1))
        acc3 = tf.reduce_mean(tf.cast(matches3,tf.float32))
        matches4 = tf.equal(tf.argmax(users[3].y_pred,1),tf.argmax(users[3].y_true,1))
        acc4 = tf.reduce_mean(tf.cast(matches4,tf.float32))
        matches5 = tf.equal(tf.argmax(users[4].y_pred,1),tf.argmax(users[4].y_true,1))
        acc5 = tf.reduce_mean(tf.cast(matches5,tf.float32))
        matches6 = tf.equal(tf.argmax(users[5].y_pred,1),tf.argmax(users[5].y_true,1))
        acc6 = tf.reduce_mean(tf.cast(matches6,tf.float32))
        matches7 = tf.equal(tf.argmax(users[6].y_pred,1),tf.argmax(users[6].y_true,1))
        acc7 = tf.reduce_mean(tf.cast(matches7,tf.float32))
        matches8 = tf.equal(tf.argmax(users[7].y_pred,1),tf.argmax(users[7].y_true,1))
        acc8 = tf.reduce_mean(tf.cast(matches8,tf.float32))
        matches9 = tf.equal(tf.argmax(users[8].y_pred,1),tf.argmax(users[8].y_true,1))
        acc9 = tf.reduce_mean(tf.cast(matches9,tf.float32))
        matches10 = tf.equal(tf.argmax(users[9].y_pred,1),tf.argmax(users[9].y_true,1))
        acc10 = tf.reduce_mean(tf.cast(matches10,tf.float32))
        
        
        TestAcc1 = sess.run(acc1 ,feed_dict={users[0].x_in:x_test,users[0].y_true:y_test})
        TestAcc2 = sess.run(acc2 ,feed_dict={users[1].x_in:x_test,users[1].y_true:y_test})
        TestAcc3 = sess.run(acc3 ,feed_dict={users[2].x_in:x_test,users[2].y_true:y_test})     
        TestAcc4 = sess.run(acc4 ,feed_dict={users[3].x_in:x_test,users[3].y_true:y_test})
        TestAcc5 = sess.run(acc5 ,feed_dict={users[4].x_in:x_test,users[4].y_true:y_test})
        TestAcc6 = sess.run(acc6 ,feed_dict={users[5].x_in:x_test,users[5].y_true:y_test})
        TestAcc7 = sess.run(acc7 ,feed_dict={users[6].x_in:x_test,users[6].y_true:y_test})
        TestAcc8 = sess.run(acc8 ,feed_dict={users[7].x_in:x_test,users[7].y_true:y_test})
        TestAcc9 = sess.run(acc9 ,feed_dict={users[8].x_in:x_test,users[8].y_true:y_test})
        TestAcc10 = sess.run(acc10 ,feed_dict={users[9].x_in:x_test,users[9].y_true:y_test})
        
        avg = (TestAcc1+TestAcc2+TestAcc3+TestAcc4+TestAcc5+TestAcc6+TestAcc7+TestAcc8+TestAcc9+TestAcc10)/10
        avgAcc.append(avg)
        
        regW =0
        regb =0
        lambda_w = 0
        lambda_b = 0

        for i in range(numberOfUsers):
            if i== 9:
                break
            user_a = users[i]
            user_b = users[i+1]    
            regW += rho/2*tf.nn.l2_loss(user_a.W1-user_b.W1)+ rho/2*tf.nn.l2_loss(user_a.W2-user_b.W2)+ rho/2*tf.nn.l2_loss(user_a.W3-user_b.W3)+ rho/2*tf.nn.l2_loss(user_a.W4-user_b.W4)+ rho/2*tf.nn.l2_loss(user_a.W5-user_b.W5)
            regb += rho/2*tf.nn.l2_loss(user_a.b1-user_b.b1)+ rho/2*tf.nn.l2_loss(user_a.b2-user_b.b2)+ rho/2*tf.nn.l2_loss(user_a.b3-user_b.b3)+ rho/2*tf.nn.l2_loss(user_a.b4-user_b.b4)+ rho/2*tf.nn.l2_loss(user_a.b5-user_b.b5)
            lambda_w += sum_lambdas_w(user_a, user_b)
            lambda_b += sum_lambdas_b(user_a, user_b)

        reg = regW+regb
        lambdas = lambda_w + lambda_b
        loss = cross_entropy + reg + lambdas
        train_data
        