In [1]:
import numpy as np
from numpy.linalg import norm
import pickle
import matplotlib.pyplot as plt
import itertools
from scipy.stats import norm as norm_d
from scipy.stats import expon
from scipy.stats import weibull_min as weibull
from scipy.stats import burr12 as burr
from scipy.stats import randint
from scipy.stats import uniform
from scipy.optimize import minimize
import copy
import math
import time
from scipy.optimize import minimize
from scipy.sparse.linalg import svds
from scipy.linalg import svdvals
import scipy
from sklearn.datasets import load_svmlight_file
import pickle
from pathlib import Path
from functions import *
from utils import *
from sklearn.preprocessing import StandardScaler

# Algorithms
from byz_vr_marina import *
from byz_marina_no_sync import *
from byz_dasha_page import *
from byz_ef21 import *

%matplotlib inline

# Dataset Inspection

In [2]:
%%time
dataset = "phishing"
num_of_workers = 16
num_of_byz = 3
l2 = 0.1
setting = 'NC'

CPU times: total: 0 ns
Wall time: 0 ns


In [3]:
A, y, m, n, sparsity_A = prepare_data(dataset)

In [4]:
%%time

G = num_of_workers - num_of_byz
mul = int(m/G)
A = A[:mul*G]
y = y[:mul*G]

denseA = A.toarray()
clients_A, clients_y = distrib_data(A, y, num_of_workers, num_of_byz)
# clients_A = [A for i in range(num_of_workers)]
# clients_y = [y for i in range(num_of_workers)]
L, average_L, worst_L = compute_L(dataset, A, clients_A, num_of_byz, l2)

print("Datasize: ", m, ", dimension: ", n)
print("Smoothness constant of f         : ", L)
print("Average smoothness const of f_i : ", average_L)
print("Worst smoothness const of f_ij   : ", worst_L)
print("Sparsity of A: ", sparsity_A)

Datasize:  11055 , dimension:  68
Smoothness constant of f         :  0.26256412732071766
Average smoothness const of f_i :  0.266030184485412
Worst smoothness const of f_ij   :  0.3499885367500001
Sparsity of A:  0.4411764705882353
CPU times: total: 0 ns
Wall time: 17.8 ms


# True Solution

In [5]:
filename=dataset+"_x_init_all_ones"
x_init = np.ones(n)
l1 = 0

In [6]:
%%time
param = [A, y, l2, setting, l1]
res_solver = minimize(F, x_init, args = param, jac=logreg_grad_plus_lasso, 
                      method='L-BFGS-B',
                      options={'maxiter':10000, 'disp':True}, tol=1e-30)

print(res_solver.success)

False
CPU times: total: 0 ns
Wall time: 74.5 ms


In [7]:
save_solution(dataset, l2, l1, np.array(res_solver.x), res_solver.fun)

In [8]:
shift_param = 1.0
x_star = read_solution(dataset, l2, l1)[0]
f_star = read_solution(dataset, l2, l1)[1]
x_init = x_star + shift_param*np.ones(n)

In [9]:
print("f(x^0) = ", logreg_loss(x_init, [denseA, y, l2, setting]))
print("f(x^*) = ", logreg_loss(x_star, [denseA, y, l2, setting]))

f(x^0) =  3.8770525500065443
f(x^*) =  0.4871545436722106


# Algorithm Run

In [10]:
filename=dataset+"_x_star_shifted_by_all_ones_shift_"+str(shift_param)+'_'+setting+'_' 
attack = "ROP"
agg = "CM"
T = 5000
save_info_period = 30 
mul = int(m/G)

bits_cieling = 1e4

In [11]:
def gamma_th_ef_f(k):
    R_ef = (2*n/k) * (1 + np.sqrt(8*(256/15)*num_of_byz/num_of_workers)) * average_L
    return 1 / (L + R_ef)

def p_m_f(k):
    omega_m = n/k -1
    return 1 /(omega_m + 1)

def gamma_th_m(k):
    omega_m = n/k - 1
    R_no_sync = omega_m**2 * average_L**2* (np.sqrt(1/G) + np.sqrt(8*(256/15)*num_of_byz/num_of_workers))**2
    return 1 / (L + R_no_sync ** 0.5)

sparsificator_m = randk
sparsificator_ef_up = topK
sparsificator_ef_down = topK

sparsificator_params_m = [1, 'rand-1']
sparsificator_params_ef_up = [1, 'top-1']
sparsificator_params_ef_down = [1, 'top-1']

gamma_th_m_no_sync = gamma_th_m(sparsificator_params_m[0])
p_m = p_m_f(sparsificator_params_m[0])

gamma_th_ef = gamma_th_ef_f(sparsificator_params_ef_up[0])

## MARINA hyperparameters

In [12]:
batch_size = int(0.01*mul)

sparsificator = randk
sparsificator_params = [int(0.1 * n), "rand-1"]

p = min((batch_size / mul), 1/n)
R_m = 12 * max(n/sparsificator_params[0] -1, mul/batch_size)*((average_L**2 + worst_L**2/batch_size)*(n/sparsificator_params[0] -1) + worst_L**2/batch_size) * (1/G + (8*(256/15)*num_of_byz/num_of_workers)/p)+ 24 * max(n/sparsificator_params[0] -1, mul/batch_size) * (4*(256/15)*num_of_byz/num_of_workers)/p
gamma_th_m = 1 / (L + R_m ** 0.5)

bits_per_round = p *n +(1-p)*sparsificator_params[0]
T_m = bits_cieling/bits_per_round

In [18]:
T_m

1518.9421015010723

## DASHA hyperparameters

In [19]:
batch_size = int(0.01*mul)

sparsificator = randk
sparsificator_params = [int(0.1 * n), "rand-1"]

p_d = (batch_size / mul) 
mom = 1/(2*(n/sparsificator_params[0])-1)
omega = n/sparsificator_params[0]-1

R_dp =(12*omega*(2*omega+1)*(average_L**2 + worst_L**2/batch_size) + 2*(mul/batch_size)* worst_L**2/batch_size) * (np.sqrt(1/G) + np.sqrt(8*(256/15)*num_of_byz/num_of_workers))**2
gamma_th_dp = 1 / (L + R_dp ** 0.5)
T_dp = bits_cieling/sparsificator_params[0]

In [22]:
T_dp 

1666.6666666666667

## Finetuning Stepsize

In [23]:
gammas_m_ns = [(2**i) * gamma_th_m_no_sync for i in range(0,10,2)]
gammas_dp = [(2**i) * gamma_th_dp for i in range(0,10,2)]
gammas_m = [(2**(i)) * gamma_th_m for i in range(0,10,2)]
gammas_ef = [(2**i) * gamma_th_ef for i in range(0, 10, 2)]

In [None]:
for gamma in gammas_m_ns:
    byz_vr_marina_no_sync(filename=filename, x_init=x_init, A=denseA, y=y, clients_A=clients_A, clients_y=clients_y, gamma=gamma, num_of_byz=num_of_byz, p=p, 
                    num_of_workers=num_of_workers, attack=attack, agg=agg, sparsificator=sparsificator, 
                    sparsificator_params=sparsificator_params, setting=setting, l2=l2,  T=T, max_t=np.inf, batch_size=batch_size, 
                    save_info_period=save_info_period, x_star=x_star, f_star=f_star)

In [None]:
for gamma in gammas_m:
    byz_vr_marina(filename=filename, x_init=x_init, A=denseA, y=y, clients_A=clients_A, clients_y=clients_y, gamma=gamma, num_of_byz=num_of_byz, p=p, 
                    num_of_workers=num_of_workers, attack=attack, agg=agg, sparsificator=sparsificator, 
                    sparsificator_params=sparsificator_params, setting=setting, l2=l2,  T=T, max_t=np.inf, batch_size=batch_size, 
                    save_info_period=save_info_period, x_star=x_star, f_star=f_star)

In [None]:
for gamma in gammas_dp:
    byz_dasha_page(filename=filename, x_init=x_init, A=denseA, y=y, clients_A=clients_A, clients_y=clients_y, gamma=gamma, num_of_byz=num_of_byz, p=p_d, mom=mom,
                    num_of_workers=num_of_workers, attack=attack, agg=agg, sparsificator=sparsificator, 
                    sparsificator_params=sparsificator_params, setting=setting, l2=l2,  T=T, max_t=np.inf, batch_size=batch_size, 
                    save_info_period=save_info_period, x_star=x_star, f_star=f_star)

In [None]:
for gamma in gammas_ef:
    byz_ef21(filename=filename, x_init=x_init, A=denseA, y=y, clients_A=clients_A, clients_y=clients_y, gamma=gamma, num_of_byz=num_of_byz, p=p_d, mom=mom,
                    num_of_workers=num_of_workers, attack=attack, agg=agg, sparsificator=sparsificator, 
                    sparsificator_params=sparsificator_params, setting=setting, l2=l2,  T=T, max_t=np.inf, batch_size=batch_size, 
                    save_info_period=save_info_period, x_star=x_star, f_star=f_star)

In [None]:
methods = [
    ['Byz_MARINA', [gammas_m_ns[i], l2, p_m, T, num_of_workers, num_of_byz, attack, agg, sparsificator_params_m[0]], None, r"$2^{}$".format(2*i)+"gamma_th"] for i in range(len(gammas_m_ns))
]
mode_y = 'norm_grad'
mode_x = 'bits_passes'
figsize = (12, 8)
title_size = 30
linewidth = 2
markersize = 15
legend_size = 20
xlabel_size = 30
ylabel_size = 40
xticks_size = 20
yticks_size = 30
sizes = [title_size, linewidth, markersize, legend_size, xlabel_size, ylabel_size, xticks_size, yticks_size]
title = dataset+", "+"Byz_MARINA, " + attack
bbox_to_anchor = (1,1)
legend_loc="upper right"


save_fig = [True, filename+"_Byz_VR_MARINA_only_"+attack+"_"+agg+"_l2_"+str(l2)+"_num_of_workers_"+str(num_of_workers)+"_byz_"+str(num_of_byz)+"_iters.pdf"]

args_for_plots = [filename, mode_y, mode_x, figsize, sizes, title, methods, bbox_to_anchor, legend_loc, "finetuning", save_fig]
make_plots(args_for_plots)

## Runtime

In [24]:
#after choosing the best stepsize
gamma_m = gammas_m[4]
gamma_m_no_sync = gammas_m_ns[2]
gamma_dp = gammas_dp[3]
gamma_ef = gammas_ef[4]

In [25]:
attack = 'LF'

In [26]:
%%time
seed=123
np.random.seed(seed)
random.seed(seed)

res = byz_vr_marina(filename=filename, x_init=x_init, A=denseA, y=y, clients_A=clients_A, clients_y=clients_y, gamma=gamma_m, num_of_byz=num_of_byz, p=p, 
                    num_of_workers=num_of_workers, attack=attack, agg=agg, sparsificator=sparsificator, 
                    sparsificator_params=sparsificator_params, setting=setting, l2=l2,  T=int(T/6), max_t=np.inf, batch_size=batch_size, 
                    save_info_period=save_info_period, x_star=x_star, f_star=f_star)

Data distributed correctly
CPU times: total: 3.3 s
Wall time: 3.52 s


In [27]:
%%time
seed=123
np.random.seed(seed)
random.seed(seed)

res_2 = byz_dasha_page(filename=filename, x_init=x_init, A=denseA, y=y, clients_A=clients_A, clients_y=clients_y, gamma=gamma_dp, num_of_byz=num_of_byz, p=p_d, mom=mom,
                    num_of_workers=num_of_workers, attack=attack, agg=agg, sparsificator=sparsificator, 
                    sparsificator_params=sparsificator_params, setting=setting, l2=l2,  T=int(T/6), max_t=np.inf, batch_size=batch_size, 
                    save_info_period=save_info_period, x_star=x_star, f_star=f_star)

Data distributed correctly
CPU times: total: 3.39 s
Wall time: 3.5 s


In [28]:
%%time
seed=123
np.random.seed(seed)
random.seed(seed)

res3 = byz_ef21(filename=filename, x_init=x_init, A=denseA, y=y, clients_A=clients_A, clients_y=clients_y, gamma=gamma_ef,
         num_of_byz=num_of_byz, num_of_workers=num_of_workers, attack=attack, agg=agg, sparsificator=sparsificator_ef_up, 
         sparsificator_params=sparsificator_params_ef_up, setting=setting, l2=l2, T=T, max_t=np.inf, 
         save_info_period=save_info_period, x_star=x_star, f_star=f_star)

Data distributed correctly
CPU times: total: 12.7 s
Wall time: 13.4 s


In [29]:
%%time
seed=123
np.random.seed(seed)
random.seed(seed)

res4 = byz_marina_no_sync(filename=filename, x_init=x_init, A=denseA, y=y, clients_A=clients_A, clients_y=clients_y,
                   gamma=gamma_m_no_sync, num_of_byz=num_of_byz, p=p_m, num_of_workers=num_of_workers, attack=attack,
                   agg=agg, sparsificator=sparsificator_m, sparsificator_params=sparsificator_params_m, setting=setting,
                   l2=l2, T=T, max_t=np.inf, save_info_period=save_info_period, x_star=x_star, f_star=f_star)

Data distributed correctly
CPU times: total: 10.8 s
Wall time: 11.1 s


# Visualisation

In [None]:
methods = [
    ['Byz_EF21', [gamma_ef, l2, T, num_of_workers, num_of_byz, attack, agg, sparsificator_params_ef_up[0]], None, 'Byz-EF21'],
    ['Byz_VR_MARINA', [gamma_m, l2, p, int(T/6), num_of_workers, batch_size, num_of_byz, attack, agg], None, 'Byz-VR-MARINA'],
    ['Byz_DASHA_PAGE', [gamma_dp, l2, p_d, int(T/6), num_of_workers, batch_size, num_of_byz, attack, agg], None, 'Byz-DASHA-PAGE'],
    ['Byz_MARINA', [gamma_m_no_sync, l2, p_m, T, num_of_workers, num_of_byz, attack, agg, sparsificator_params_m[0]], None, 'Byz-VR-MARINA 2.0']
    
]
mode_y = 'norm_grad'
mode_x = 'bits_passes'
figsize = (12, 8)
title_size = 30
linewidth = 2
markersize = 15
legend_size = 20
xlabel_size = 30
ylabel_size = 40
xticks_size = 20
yticks_size = 30
sizes = [title_size, linewidth, markersize, legend_size, xlabel_size, ylabel_size, xticks_size, yticks_size]
title = dataset+", "+attack
bbox_to_anchor = (1,1)
legend_loc="upper right"


save_fig = [True, filename+"_All_"+attack+"_"+agg+"_epochs_"+str(T)+"_l2_"+str(l2)+"_num_of_workers_"+str(num_of_workers)+"_byz_"+str(num_of_byz)+"_batch_"+str(batch_size)+"_iters.pdf"]

args_for_plots = [filename, mode_y, mode_x, figsize, sizes, title, methods, bbox_to_anchor, legend_loc, "comparison", save_fig]
make_plots(args_for_plots)