In [1]:
import numpy as np

import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
import csv

np.random.seed(0)

In [7]:
def get_client_data(n,d,alpha,beta):

  dimension = d
  NUM_CLASS = 1
  NUM_USER = n

  # samples_per_user = np.random.lognormal(4, 1, (NUM_USER)).astype(int)

  samples_per_user = [30 for i in range(NUM_USER)]

  # print(samples_per_user)
  num_samples = np.sum(samples_per_user)

  X_split = [[] for _ in range(NUM_USER)]
  y_split = [[] for _ in range(NUM_USER)]


  #### define some prior ####
  mean_W = np.random.normal(0, alpha, NUM_USER)
  mean_b = mean_W
  B = np.random.normal(0, beta, NUM_USER)
  mean_x = np.zeros((NUM_USER, dimension))

  diagonal = np.zeros(dimension)

  cov_x = np.eye(d)

  for i in range(NUM_USER):
      mean_x[i] = np.random.normal(B[i], 1, dimension)


  for i in range(NUM_USER):

      W = np.random.normal(mean_W[i], 1, (dimension, NUM_CLASS))

      xx = np.random.multivariate_normal(mean_x[i], cov_x, samples_per_user[i])

      yy = np.dot(xx,W)

      X_split[i] = xx
      y_split[i] = yy.flatten()

      # print("{}-th users has {} examples".format(i, len(y_split[i])))


  return X_split, y_split

In [8]:
A_client, b_client = get_client_data(20,1000,0.1,0.1)

In [9]:
# from re import S


d= 1000
s= 30
n= 20


A= np.zeros((s*n,d))
b = np.zeros((s*n))

w_sol = np.zeros((n,d))


J = []
e = []
c = []
V = []

J_sum = np.zeros((d,d))
e_sum = np.zeros(d)

extra_dim = 0
A_mat = []

for i in range(n):

    A_i = A_client[i]
    b_i = b_client[i][0:s]

    for j in range(s):
      A_i[j] = A_i[j]/np.linalg.norm(A_i[j])

    b_i = b_i/np.linalg.norm(b_i)

    A[i*s:(i+1)*s] = A_i
    b[i*s:(i+1)*s] = b_i

    A_mat.append(A_i)

    u, si, vh = np.linalg.svd(A_i)
    v_t = vh[0:s]


    
    V_t = v_t.T.dot(v_t)


    V.append(V_t)

    H_i = A_i.dot(A_i.T)
    J_i = A_i.T.dot(A_i)

    J_sum += J_i

    e_i = A_i.T.dot(b_i)

    e_sum += e_i

    c_i = np.zeros((d,))

    J.append(J_i)
    e.append(e_i)
    c.append(c_i)


    w_sol_i = A_i.T.dot(np.linalg.pinv(H_i).dot(b_i))

    w_sol[i] = w_sol_i

J_sum = J_sum
e_sum = e_sum


V_sum = np.zeros((d,d))
w_sum = np.zeros((d,))


for i in range(n):
  V_sum += V[i]
  w_sum += w_sol[i]

V_sum = V_sum/n
w_sum = w_sum/n



In [10]:
def do_local_gd(i,tau,w,eta):

  w_0 = np.copy(w)

  for t in range(tau):

    grad = J[i].dot(w_0)-e[i]
    w_0 = w_0 - eta*grad


  return (w-w_0)

In [11]:
def scaffold(i,tau,w,eta,c,c_avg):

  w_0 = np.copy(w)


  for t in range(tau):

    grad = (J[i].dot(w_0)-e[i])
    grad = grad - c[i] + c_avg
    w_0 = w_0 - eta*grad
    
  c[i] = c[i] - c_avg + (w-w_0)/(eta*tau)

  return (w-w_0)

In [12]:
def fedprox(i,tau,w,eta,mu):

  w_0 = np.copy(w)


  for t in range(tau):

    grad = (J[i].dot(w_0)-e[i]) + mu*(w_0-w)
    w_0 = w_0 - eta*grad

  return (w-w_0)

In [13]:
s1 = 'scaffold'
s2 = 'fedavg'
s3 = 'fedexp'
s4 = 'fedadagrad'
s5 = 'fedprox'


In [14]:
loss_alg = {}
w_dist_alg = {}
traj_alg_x = {}
traj_alg_y = {}
w_final = {}
grad_div_alg = {}

In [17]:
T= 200

tau = 20

w_0 = np.zeros((d))


dict_results = {}

filename = "results_"+"linear_regression"
filename_txt = filename + ".txt"


In [None]:
####Hyperparameter optimization code


algs = [s1,s2,s3,s4,s5]

eta_g_alg = {s1:10,s2:10,s3:10, s4:0.1, s5:10}
eta_l_alg = {s1:0.1,s2:0.1,s3:0.1, s4:0.1, s5:0.1}
mu = 0.01



for alg in algs:
  hyp_train_loss_algo = []
  dict_results[alg] = {}
  eta_g = eta_g_alg[alg]
  eta_l = eta_l_alg[alg]
   
  c = np.zeros((n,d))
  w = w_0.copy()
  w_prev = w_0.copy()
  loss = []
  eta_g_var = []
  delta = np.zeros((d,))
  

  for t in range(T):

    c_avg = np.zeros((d,))
    grad_avg = np.zeros((d,))
   

    for i in range(n):
        c_avg += c[i]

    c_avg = c_avg/n

    grad_norm_avg = 0

    if(alg== s3):
      w_loss = (w+w_prev)/2
    else: w_loss = w

    if(t%5==0):
        F = (np.linalg.norm(A.dot(w_loss)-b)**2)/n
        loss.append(F)
        print (alg, t, F)
    
    for i in range(n):
      if(alg==s2 or alg==s3 or alg==s4):
        grad = do_local_gd(i,tau,w,eta_l)
      elif(alg==s1):
        grad = scaffold(i,tau,w,eta_l,c,c_avg)
      elif(alg==s5):
        grad = fedprox(i,tau,w,eta_l,mu)
        

      grad_avg += grad
      grad_norm_avg += np.linalg.norm(grad)**2


    grad_avg = grad_avg/n
    grad_norm_avg = grad_norm_avg/n
    grad_avg_norm = np.linalg.norm(grad_avg)**2


    w_prev = w.copy()
    scale = eta_g

    if(alg == s1 or alg==s2 or alg==s5):

      w = w-eta_g*grad_avg

    if(alg==s4):
      delta = delta + grad_avg**2
      grad_avg = grad_avg/np.sqrt(delta)

      w = w-eta_g*grad_avg

    if (alg ==s3):

      scale = 0.5*grad_norm_avg/(grad_avg_norm)

      w = w - (scale)*grad_avg

    eta_g_var.append(scale)
    
    w_prev = w.copy()


  print (loss)
  dict_results[alg][alg+"_training_loss"] = loss
  dict_results[alg][alg+"_global_learning_rate"] = eta_g_var

  with open(filename_txt, 'w') as f:    
        for i in dict_results.keys():
          for key, value in dict_results[i].items():
            f.write(key+" ")
            f.write(str(value))
            f.write("\n")





In [None]:
x = []

for t in range(T):
    if(t==0 or (t)%5==0):
        x.append(t)
        

  
for alg in dict_results:
  plt.plot(x,dict_results[alg][alg+"_training_loss"], label=alg)
  plt.yscale('log')

plt.legend()
plt.show()