In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm


training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=transforms.Compose([
            transforms.ToTensor(), # convert the image to tensor
            transforms.Normalize((0.1307,), (0.3081,)) # normalize
    ])
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=transforms.Compose([
            transforms.ToTensor(), # convert the image to tensor
            transforms.Normalize((0.1307,), (0.3081,)) # normalize 
    ]) 
)

In [None]:
X = training_data.data.reshape(-1, 28 * 28).float()
y = training_data.targets 

X_test = test_data.data.reshape(-1, 28 * 28).float()
y_test = test_data.targets

In [None]:
def dist_items(ds, n):
    n_items = np.array([int(d*n) for d in ds])
    prop_difs = ds - n_items/sum(n_items)

    while sum(n_items) <n:
        i = np.argmax(prop_difs)
        n_items[i] += 1
        prop_difs = ds - n_items/sum(n_items)
    return n_items

def distribute_data(X,y,N,alpha,n_labels =10):
    sfl = np.random.permutation(len(y))
    X = X[sfl]
    y = y[sfl]
    
    alphas = alpha*np.ones((N,))
    X_ws = [[] for w in range(N)]
    y_ws = [[] for w in range(N)]
    next_unused = np.zeros((N,))
    
    for l in range(n_labels):
        ds = np.random.dirichlet(alphas)
        indices = y==l

        n_l = sum(indices)
        n_items = dist_items(ds, n_l)
        X_l = X[indices]
        y_l = y[indices]
        next_unused = 0
        for w in range(N):
            if n_items[w] >0:
                X_ws[w].append(X_l[next_unused:next_unused+n_items[w]])
                y_ws[w].append(y_l[next_unused:next_unused+n_items[w]])
            next_unused += n_items[w]
        
    X_workers = [torch.cat(X_ws[w]) for w in range(N) if len(y_ws[w])>0]
    y_workers = [torch.cat(y_ws[w]) for w in range(N) if len(y_ws[w])>0]
    return X_workers, y_workers
def iid_data(X, y,N,m_w):
    
    sfl = np.random.permutation(len(y))
    
    X_workers_iid = [X[sfl][m_w*w:m_w*(w+1)] for w in range(N)]
    y_workers_iid = [y[sfl][m_w*w:m_w*(w+1)] for w in range(N)]
    return X_workers_iid, y_workers_iid


In [None]:
N = 10
m_w = 30
X2 = torch.cat([X[y==l][0:m_w] for l in range(10)])
y2 = torch.cat([y[y==l][0:m_w] for l in range(10)])
X_workers_iid, y_workers_iid = iid_data(X2, y2,N,m_w)


In [None]:
class LogRegression(nn.Module):
        def __init__(self):
            super(LogRegression, self).__init__()
            self.linear1 = nn.Linear(28*28, 10, bias = True)
        def forward(self, x, verbose=False):
            out = self.linear1(x)
            return out

In [None]:
#Function to calculate the accuracy of data of a given model
def calculate_accuracy(mdl, X, y):
    mdl.eval()
    with torch.no_grad():
        y_pred = mdl(X.reshape(len(X), 28*28).float())
        y_pred = torch.softmax(y_pred, 1)
        y_pred = torch.argmax(y_pred, dim = 1)

        accuracy = torch.sum(y_pred == y)/len(y)
    return accuracy.item()

In [None]:
def share_data(X_workers1,y_workers1, p, rf=1):
    N = len(X_workers1)

    X_ws = [X_workers1[w].clone() for w in range(N)]
    y_ws = [y_workers1[w].clone() for w in range(N)]
    
    X = torch.cat(X_ws)
    y = torch.cat(y_ws)
    ms = np.array([len(X_ws[w]) for w in range(N)])
    ms_original = ms.copy()
    m = sum(ms)
    

    c = int(p*m)
    shared_indices = np.random.choice(m, c, replace=False)
    lst = np.array([sum(ms[0:i]) for i in range(1,len(ms)+1)]) #list with the index of first datapoint for the next worker in X
    owners = [np.argmax(lst>i) for i in shared_indices]
    shared_indices_worker = [[] for w in range(N)]
    for i, si in enumerate(shared_indices):
        owner = np.argmax(lst>si) # owner of the shared datapoint
        si_w = si - sum(ms_original[0:owner])
        shared_indices_worker[owner].append(si_w)

        
        possible_receivers = [w for w in range(N) if w!= owner]
        receivers = np.random.choice(possible_receivers, rf,replace = False)

        x_shared = torch.unsqueeze(X[si], 0)
        y_shared = torch.unsqueeze(y[si], 0)
        for receiver in receivers:
            X_ws[receiver] = torch.cat((X_ws[receiver],x_shared.clone()))
            y_ws[receiver] = torch.cat((y_ws[receiver],y_shared.clone()))
            ms[receiver] += 1
    loss_weights_workers = [torch.ones((len(y_ws[w]),)) for w in range(N)]
    for w in range(N):
        if ms[w] != len(y_ws[w]):
            print(ms[w],len(y_ws))
        for si_w in shared_indices_worker[w]:
            loss_weights_workers[w][si_w] = 1/(rf+1)
        if ms[w] > ms_original[w]:
            loss_weights_workers[w][ms_original[w]-ms[w]:] = loss_weights_workers[w][ms_original[w]-ms[w]:]/(rf+1)
            
    return X_ws, y_ws, loss_weights_workers


In [None]:
def gradient_estimator_second_moment(
    model, p, X_workers, y_workers, weights_workers=None, n_iter=25
):
    N = len(X_workers)

    if weights_workers is None:
        weights_workers = [torch.ones(len(X_workers[w])) for w in range(N)]

    m = sum([ws.sum() for ws in weights_workers])

    gradient_squared_norms = []
    criterion = nn.CrossEntropyLoss(reduction="none")
    if True:
        for iteration in range(n_iter):
            model.train()
            choose_ws = [np.random.choice(2, p=[p_straggler, 1 - p_straggler]) for w in range(N)]
            ws = [i for i in range(N) if choose_ws[i] == 1]

            if len(ws) > 0:
                inputs = torch.cat([X_workers[w] for w in ws])
                labels = torch.cat([y_workers[w] for w in ws])
                weights = torch.cat([weights_workers[w] for w in ws])

                model.zero_grad()
                y_pred = model(inputs)
                losses = criterion(y_pred, labels.type(torch.long)) * weights
                loss = losses.sum() / ((1 - p_straggler) * m)
                loss.backward()

                gradient = model.linear1.weight.grad
                gradient_squared_norm = torch.norm(gradient) ** 2

                gradient = model.linear1.bias.grad
                gradient_squared_norm += torch.norm(gradient) ** 2

                gradient_squared_norms.append(gradient_squared_norm)

    return np.mean(gradient_squared_norms)

In [None]:
def run_simulation(p_straggler, X_workers, y_workers, weights_workers):
    accuracies = np.zeros(n_iter)
    moments = np.zeros(n_iter)

    N = len(X_workers)
    m = sum(weights.sum() for weights in weights_workers)
    model = LogRegression()
    optimizer = optim.SGD(model.parameters(), lr=0.05)
    criterion = nn.CrossEntropyLoss(reduction="none")

    for iteration in range(n_iter):
        accuracy = calculate_accuracy(model, X_test, y_test)
        accuracies[iteration] = accuracy
        model.train()
        choose_ws = [np.random.choice(2, p = [p_straggler, 1-p_straggler]) for w in range(N)]
        ws = [i for i in range(N) if choose_ws[i]==1 ]
        if len(ws) > 0:
            inputs = torch.cat([X_workers[w] for w in ws] )
            labels = torch.cat([y_workers[w] for w in ws])
            weights = torch.cat([weights_workers[w] for w in ws])

            model.zero_grad()
            y_pred = model(inputs)
            losses = criterion(y_pred, labels.type(torch.long)) * weights
            loss = losses.sum() / ((1 - p_straggler) * m)
            loss.backward()
            optimizer.step()
        moments[iteration] = gradient_estimator_second_moment(model, p_straggler, X_workers, y_workers, weights_workers)

    return accuracies, moments

In [None]:
#parameters
n_iter = 50
n_sim  = 200
alpha = 0.1
m_w = 30
N = 10
p_straggler_list = [0.3,0.5,0.8]

prfs_comm = [(0.3/4,4),(0.3/3,3),(0.3/2,2), (0.3,1)]
prfs_data = [(0.3,7),(0.3,5),(0.3,2), (0.3,1)]



In [None]:
#Train model for iid data

accuracies_iid = np.zeros((len(p_straggler_list),n_sim,n_iter))
moments_iid = np.zeros((len(p_straggler_list),n_sim,n_iter))

weights_workers = [torch.ones(m_w,) for w in range(N)]

for i_straggler,p_straggler in enumerate(p_straggler_list):
    p_stragglers = np.ones((N,))*p_straggler
    for sim in tqdm(range(n_sim),desc=f"Running simulations for p = {p_straggler}"):
        X_workers_iid, y_workers_iid = iid_data(X2,y2,10,m_w)
        accuracies, moments = run_simulation(p_straggler, X_workers_iid, y_workers_iid, weights_workers)
        accuracies_iid[i_straggler,sim, :] = accuracies
        moments_iid[i_straggler,sim, :] = moments
np.save("thesisdata/accuracies_iid", accuracies_iid)
np.save("thesisdata/moments_iid",moments_iid)


In [None]:
#Train model for non-iid dirichlet data

accuracies_noniid = np.zeros((len(p_straggler_list),n_sim,n_iter))
moments_noniid = np.zeros((len(p_straggler_list),n_sim,n_iter))


for i_straggler,p_straggler in enumerate(p_straggler_list):
    p_stragglers = np.ones((N,))*p_straggler
    for sim in tqdm(range(n_sim),desc=f"Running simulations for p = {p_straggler}"):
        X_workers,y_workers = distribute_data(X2,y2,N,alpha)
        weights_workers = [torch.ones(len(X_workers[w]),) for w in range(len(X_workers))] 
        accuracies, moments = run_simulation(p_straggler, X_workers, y_workers, weights_workers)
        accuracies_noniid[i_straggler,sim, :] = accuracies
        moments_noniid[i_straggler,sim, :] = moments
np.save("thesisdata/accuracies_noniid", accuracies_noniid)
np.save("thesisdata/moments_noniid", moments_noniid)


In [None]:
#Train model with different values of p/rf and dirichlet distributed data

accuracies_comm = np.zeros((len(prfs_comm),len(p_straggler_list),n_sim, int(n_iter)))
moments_comm = np.zeros((len(prfs_comm),len(p_straggler_list),n_sim, int(n_iter)))

for i_straggler,p_straggler in enumerate(p_straggler_list):
    
    for i, prf in enumerate(prfs_comm):
        p,rf = prf
        for sim in tqdm(range(n_sim),desc=f"Running simulations for p,c,d = {p_straggler,p,rf}"):
            X_workers, y_workers = distribute_data(X2,y2,10,alpha)
            X_workers_share, y_workers_share, weights_workers = share_data(X_workers,y_workers, p, rf =rf)
            accuracies, moments = run_simulation(p_straggler, X_workers_share, y_workers_share, weights_workers)
            accuracies_comm[i,i_straggler,sim, :] = accuracies
            moments_comm[i,i_straggler,sim, :] = moments
np.save("thesisdata/accuracies_comm", accuracies_comm)
np.save("thesisdata/moments_comm", moments_comm)



In [None]:
#Train model with different values of p/rf and dirichlet distributed data


accuracies_data = np.zeros((len(prfs_comm),len(p_straggler_list),n_sim, int(n_iter)))
moments_data = np.zeros((len(prfs_comm),len(p_straggler_list),n_sim, int(n_iter)))

for i_straggler,p_straggler in enumerate(p_straggler_list):
    
    for i, prf in enumerate(prfs_data):
        p,rf = prf
        for sim in tqdm(range(n_sim),desc=f"Running simulations for p,c,d = {p_straggler,p,rf}"):
            X_workers, y_workers = distribute_data(X2,y2,10,alpha)
            X_workers_share, y_workers_share, weights_workers = share_data(X_workers,y_workers, p, rf =rf)
            accuracies, moments = run_simulation(p_straggler, X_workers_share, y_workers_share, weights_workers)
            accuracies_data[i,i_straggler,sim, :] = accuracies
            moments_data[i,i_straggler,sim, :] = moments
np.save("thesisdata/accuracies_data", accuracies_data)
np.save("thesisdata/moments_data", moments_data)



In [None]:
#Divide the data without dirichlet distribution
X_workers = []
y_workers = []

for l in range(10):
    X_workers.append(X2[y2 == l].clone())
    y_workers.append(y2[y2 == l].clone())


In [None]:
#Train model for non-iid dirichlet data

accuraciesHom_noniid = np.zeros((len(p_straggler_list),n_sim,n_iter))
momentsHom_noniid = np.zeros((len(p_straggler_list),n_sim,n_iter))


for i_straggler,p_straggler in enumerate(p_straggler_list):
    p_stragglers = np.ones((N,))*p_straggler
    for sim in tqdm(range(n_sim),desc=f"Running simulations for p = {p_straggler}"):
        weights_workers = [torch.ones(len(X_workers[w]),) for w in range(len(X_workers))] 
        accuracies, moments = run_simulation(p_straggler, X_workers, y_workers, weights_workers)
        accuraciesHom_noniid[i_straggler,sim, :] = accuracies
        momentsHom_noniid[i_straggler,sim, :] = moments
np.save("thesisdata/accuraciesHom_noniid", accuraciesHom_noniid)
np.save("thesisdata/momentsHom_noniid", momentsHom_noniid)


In [None]:
#Train model with different values of p/rf

accuraciesHom_comm = np.zeros((len(prfs_comm),len(p_straggler_list),n_sim, int(n_iter)))
momentsHom_comm = np.zeros((len(prfs_comm),len(p_straggler_list),n_sim, int(n_iter)))

for i_straggler,p_straggler in enumerate(p_straggler_list):
    
    for i, prf in enumerate(prfs_comm):
        p,rf = prf
        for sim in tqdm(range(n_sim),desc=f"Running simulations for p,c,d = {p_straggler,p,rf}"):
            X_workers_share, y_workers_share, weights_workers = share_data(X_workers,y_workers, p, rf =rf)
            accuracies, moments = run_simulation(p_straggler, X_workers_share, y_workers_share, weights_workers)
            accuraciesHom_comm[i,i_straggler,sim, :] = accuracies
            momentsHom_comm[i,i_straggler,sim, :] = moments
np.save("thesisdata/accuraciesHom_comm", accuraciesHom_comm)
np.save("thesisdata/momentsHom_comm", momentsHom_comm)



In [None]:
#Train model with different values of p/rf and


accuraciesHom_data = np.zeros((len(prfs_comm),len(p_straggler_list),n_sim, int(n_iter)))
momentsHom_data = np.zeros((len(prfs_comm),len(p_straggler_list),n_sim, int(n_iter)))

for i_straggler,p_straggler in enumerate(p_straggler_list):
    
    for i, prf in enumerate(prfs_data):
        p,rf = prf
        for sim in tqdm(range(n_sim),desc=f"Running simulations for p,c,d = {p_straggler,p,rf}"):
            X_workers_share, y_workers_share, weights_workers = share_data(X_workers,y_workers, p, rf =rf)
            accuracies, moments = run_simulation(p_straggler, X_workers_share, y_workers_share, weights_workers)
            accuraciesHom_data[i,i_straggler,sim, :] = accuracies
            momentsHom_data[i,i_straggler,sim, :] = moments
np.save("thesisdata/accuraciesHom_data", accuraciesHom_data)
np.save("thesisdata/momentsHom_data", momentsHom_data)



In [None]:
#Train model with increased straggler probability with replication

p_straggler = 0.8
accuraciesHom_data_strag = np.zeros((len(prfs_comm),n_sim, int(n_iter)))
momentsHom_data_strag = np.zeros((len(prfs_comm),n_sim, int(n_iter)))


for i, prf in enumerate(prfs_data):
    p,rf = prf
    for sim in tqdm(range(n_sim),desc=f"Running simulations for p,c,d = {p_straggler,p,rf}"):
        X_workers_share, y_workers_share, weights_workers = share_data(X_workers,y_workers, p, rf =rf)
        accuracies, moments = run_simulation(p_straggler+(rf/200), X_workers_share, y_workers_share, weights_workers)
        accuraciesHom_data_strag[i,sim, :] = accuracies
        momentsHom_data_strag[i,sim, :] = moments 
np.save("thesisdata/accuraciesHom_data_strag", accuraciesHom_data_strag)
np.save("thesisdata/momentsHom_data_strag", momentsHom_data_strag)
