In [1]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
from tqdm import tqdm


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
        self.fc1 = nn.Linear(2048, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


class GradientStocker:
    def __init__(self, model_names):
        for item in model_names:
            setattr(self, item, 0)

    def get_attributes(self):
        return self.__dict__

    def add_gradient(self, model):
        for name, param in model.named_parameters():
            setattr(self, name, getattr(self, name) + param.grad.data.cpu())

    def euclidian_distance(self, grad_current_agent):
        """Computes the relative euclidean distance of the flattened tensor between the current model and the global model"""
        flattened_grad_selected = self.flatten(list(self.get_attributes().values()))
        flattened_grad_current = self.flatten(list(grad_current_agent.get_attributes().values()))
        return torch.dist(flattened_grad_selected, flattened_grad_current, 2) / torch.norm(flattened_grad_selected, 2)

    def flatten(self, gradient_list):
        """Returns an aggregated tensor of all the gradients for one model"""
        gradients = list(map(lambda g : torch.flatten(g), gradient_list))
        return torch.cat(gradients, 0)


def client_update(client_model, optimizer, train_loader, epoch=5):
    """Train a client_model on the train_loder data."""
    model_names = []
    for name, param in client_model.named_parameters():
        model_names.append(name)
    gradient_stocker = GradientStocker(model_names)
    
    for e in range(epoch):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            output = client_model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
            gradient_stocker.add_gradient(client_model)
    return loss.item(), gradient_stocker


def weighted_average_gradients(gradients, weights):
    """Compute the weighted average gradient."""
    weighted_averages = {}
    for key in gradients[0].get_attributes().keys():
        weighted_averages[key] = weighted_average_from_key(key, gradients, weights)
    return weighted_averages

def weighted_average_from_key(key, gradients, weights):
    n = 0
    d = 0 
    for idx, g_dict in enumerate(gradients) :
        n += g_dict.get_attributes()[key] * weights[idx]
        d += weights[idx]
    return n / d

def compute_weight(alpha_prev, round, relative_distance, data_size, batch_size, distance_penalty, size_penalty):
    """Computes the weight alpha for round r"""
    size_factor = (1 + size_penalty * math.floor(((round - 1) * batch_size) / data_size)) 
    distance_factor = distance_penalty * relative_distance
    alpha = alpha_prev - size_factor * distance_factor 
    return max(0,alpha)

def update_grad(model, gradient, alpha): 
    for name, param in model.named_parameters():
        param.data -= gradient[name].cuda() * alpha
    return model 

def share_weight_erosion_model(shared_model, client_models):
    for model in client_models:
        model.load_state_dict(shared_model.state_dict())

def evaluate(global_model, data_loader):
    """Compute loss and accuracy of a model on a data_loader."""
    global_model.eval()
    loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in data_loader:
            data, target = data.cuda(), target.cuda()
            output = global_model(data)
            loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    loss /= len(data_loader.dataset)
    acc = correct / len(data_loader.dataset)

    return loss, acc

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [3]:
distance_penalty = 0.05
size_penalty = 2

def runWeightErosion(train_loader,test_loader,num_clients,batch_size,selected_agent_index,num_rounds,epochs):

    print("=== Weight Erosion ===")
    np.set_printoptions(precision=3)

    # Instantiate models and optimizers
    shared_model = Net().cuda()
    client_models = [Net().cuda() for _ in range(num_clients)]
    for model in client_models:
        model.load_state_dict(shared_model.state_dict())

    opt = [optim.SGD(model.parameters(), lr=0.1) for model in client_models]

    grad_vector = [None for _ in range(num_clients)]
    weight_vector = np.ones(num_clients)

    for r in range(num_rounds):

        print('%d-th round' % r)

        # client update
        loss = np.zeros(num_clients)
        for i in range(num_clients):
            loss_tmp, grad_vector[i] = client_update(client_models[i], opt[i], train_loader[i], epoch=epochs)
            loss[i] = loss_tmp
            d_rel = grad_vector[selected_agent_index].euclidian_distance(grad_vector[i])
            weight_vector[i] = compute_weight(weight_vector[i], r + 1, d_rel, len(train_loader[i]), batch_size, distance_penalty, size_penalty)


        # Weight Erosion Scheme
        weighted_mean_gradient = weighted_average_gradients(grad_vector, weight_vector)
        shared_model = update_grad(shared_model, weighted_mean_gradient, 0.1)

        # Share model to all agents
        share_weight_erosion_model(shared_model, client_models)

        # Evalutate on the global test set (for now)
        test_loss, acc = evaluate(shared_model, test_loader)


        print(f"Weight : {weight_vector}")
        print(f"Loss   : {loss}")
        print('Test loss %0.3g | Test acc: %0.3f \n' % (test_loss, acc))

In [4]:
distance_penalty = 0.05
size_penalty = 2

def run_weight_erosion_non_IID(train_loader,test_loader,num_clients,batch_size,selected_agent_index,num_rounds,epochs):

    print("=== Weight Erosion Non-IID ===")
    np.set_printoptions(precision=3)

    # Instantiate models and optimizers
    shared_model = Net().cuda()
    client_models = [Net().cuda() for _ in range(num_clients)]
    for model in client_models:
        model.load_state_dict(shared_model.state_dict())

    opt = [optim.SGD(model.parameters(), lr=0.1) for model in client_models]

    grad_vector = [None for _ in range(num_clients)]
    weight_vector = np.ones(num_clients)

    for r in range(num_rounds):

        print('%d-th round' % r)

        # client update
        loss = np.zeros(num_clients)
        for i in range(num_clients):
            loss_tmp, grad_vector[i] = client_update(client_models[i], opt[i], train_loader[i], epoch=epochs)
            loss[i] = loss_tmp
            d_rel = grad_vector[selected_agent_index].euclidian_distance(grad_vector[i])
            weight_vector[i] = compute_weight(weight_vector[i], r + 1, d_rel, len(train_loader[i]), batch_size, distance_penalty, size_penalty)


        # Weight Erosion Scheme
        weighted_mean_gradient = weighted_average_gradients(grad_vector, weight_vector)
        shared_model = update_grad(shared_model, weighted_mean_gradient, 0.1)

        # Share model to all agents
        share_weight_erosion_model(shared_model, client_models)

        # Evalutate on the global test set
        test_acc = np.zeros(num_clients)
        for idx in range(num_clients):
          test_loss, test_acc[idx] = evaluate(shared_model, test_loader[idx])

        print(f"Weight : {weight_vector}")
        print(f"Loss   : {loss}")
        np.set_printoptions(precision=5)
        print(f"Test acc : {test_acc}")

In [5]:
def runFederated(train_loader,test_loader,num_clients,batch_size,selected_agent_index,num_rounds,epochs):

    print("=== Federated ===")
    np.set_printoptions(precision=3)

    # Instantiate models and optimizers
    shared_model = Net().cuda()
    client_models = [Net().cuda() for _ in range(num_clients)]
    for model in client_models:
        model.load_state_dict(shared_model.state_dict())

    opt = [optim.SGD(model.parameters(), lr=0.1) for model in client_models]

    grad_vector = [None for _ in range(num_clients)]
    weight_vector = np.ones(num_clients)

    for r in range(num_rounds):

        print('%d-th round' % r)

        # client update
        loss = np.zeros(num_clients)
        for i in range(num_clients):
            loss_tmp, grad_vector[i] = client_update(client_models[i], opt[i], train_loader[i], epoch=epochs)
            loss[i] = loss_tmp
            weight_vector[i] = 1/num_clients


        # Weight Erosion Scheme
        weighted_mean_gradient = weighted_average_gradients(grad_vector, weight_vector)
        shared_model = update_grad(shared_model, weighted_mean_gradient, 0.1)

        # Share model to all agents
        share_weight_erosion_model(shared_model, client_models)

        # Evalutate on the global test set (for now)
        test_loss, acc = evaluate(shared_model, test_loader)

        print(f"Loss   : {loss}")
        print('Test loss %0.3g | Test acc: %0.3f\n' % (test_loss, acc))

In [12]:
def runLocal(train_loader,test_loader,num_clients,batch_size,selected_agent_index,epochs):

    print("=== Local ===")
    np.set_printoptions(precision=3)

    # Instantiate models and optimizers
    shared_model = Net().cuda()
    client_models = [Net().cuda() for _ in range(num_clients)]
    for model in client_models:
        model.load_state_dict(shared_model.state_dict())

    opt = [optim.SGD(model.parameters(), lr=0.1) for model in client_models]

    grad_vector = 0
    weight_vector = np.ones(num_clients)

    # client update
    loss = 0

    print('%d-th Client' % selected_agent_index)
    loss_tmp, grad_vector = client_update(client_models[selected_agent_index], opt[selected_agent_index], train_loader[selected_agent_index], epoch=epochs)
    loss = loss_tmp

    # Evalutate on the global test set (for now)
    test_loss, acc = evaluate(client_models[selected_agent_index], test_loader)

    print(f"Loss   : {loss}")
    print('Test loss %0.3g | Test acc: %0.3f\n' % (test_loss, acc))

In [7]:
def get_iid_loader(num_clients,batch_size):
    if homogeneity:
        traindata = datasets.MNIST('./data', train=True, download=True,transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
        traindata_split = torch.utils.data.random_split(traindata, [int(traindata.data.shape[0] / num_clients) for _ in range(num_clients)])
        train_loader = [torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True) for x in traindata_split]
        test_loader = torch.utils.data.DataLoader(datasets.MNIST('./data', train=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,))])), batch_size=batch_size, shuffle=True)

        return train_loader, test_loader

def get_non_iid_loader_distribution(num_clients,batch_size,distribution,selected_agent_index):
    traindata = datasets.MNIST('./data', train=True, download=True,transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
    testdata = datasets.MNIST('./data', train=False, download=True,transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))

    target_labels = torch.stack([traindata.targets == i for i in range(10)])
    target_labels_test = torch.stack([testdata.targets == i for i in range(10)])
    target_labels_split = []
    target_labels_split_test = []

    #divide each target labels in small samples
    target_label_division = 100 #need to check if with this number we have len(target_labels_split) = 10 * target_label_division
    for i in range(10):
        target_labels_data =torch.where(target_labels[i])[0]

        target_labels_split += torch.split(target_labels_data, int((len(target_labels_data)) / (target_label_division-1)))
        target_labels_split_test += torch.split(torch.where(target_labels_test[i%10])[0], int((len(torch.where(target_labels_test[i])[0]))))

        target_labels_split = target_labels_split[:target_label_division*(i+1)] #remove when the split not givin you target_label_division samples but target_label_division +1 samples

    #merge selected samples in each client
    distribution = [target_label_division * x / (max(num_clients,10)/10) for x in distribution]
    samples_used = [0,0,0,0,0,0,0,0,0,0]
    next_samples_used = [0,0,0,0,0,0,0,0,0,0]
    split_client = []
    test_data = torch.tensor([],dtype=torch.long)

    for i in range(num_clients):
        split_client.append(torch.tensor([],dtype=torch.long))
        for n in range(10):
            next_samples_used[n] = samples_used[n] + distribution[n]
        distribution = distribution[1:] + distribution[:1] # shift to left

        for number in range(10):
            if i == selected_agent_index and samples_used[number] < next_samples_used[number]:
                test_data = torch.cat((test_data, target_labels_split_test[number]),0)

            while samples_used[number] < next_samples_used[number]:
                split_client[i] = torch.cat((split_client[i], target_labels_split[number*target_label_division+samples_used[number]]),0)
                samples_used[number] += 1

            if samples_used[number] > next_samples_used[number]:
                samples_used[number] -= 1

    traindata_split = [torch.utils.data.Subset(traindata, tl) for tl in split_client]
    testdata_split = torch.utils.data.Subset(testdata, test_data)
    train_loader = [torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True) for x in traindata_split]
    test_loader = torch.utils.data.DataLoader(testdata_split, batch_size=batch_size, shuffle=True)

    return train_loader, test_loader

def get_specific_non_IID_loader(num_clients,batch_size,homogeneity):

    traindata = datasets.MNIST('./data', train=True, download=True,transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))

    target_labels = torch.stack([traindata.targets == i for i in range(10)])

    target_labels_split = []
    split_size = int(60000 / num_clients)

    for i in range(num_clients):
        target_labels_split += torch.split(torch.where(target_labels[(2 * i):(2 * (i + 1))].sum(0))[0][:split_size], split_size)

    traindata_split = [torch.utils.data.Subset(traindata, tl) for tl in target_labels_split]
    train_loader = []
    test_loader = []
    for x in traindata_split:
      x_size = len(x)
      size_train = int(math.ceil(x_size * 0.7))
      size_test = int(math.floor(x_size * 0.3))
      #print(x_size == size_train + size_test, size_train, size_test)
      train_set, test_set = torch.utils.data.random_split(x, [size_train, size_test])
      train_loader.append(torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True))
      test_loader.append(torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True))

    return train_loader, test_loader

def get_non_IID_loader_digit_pairs(num_clients,batch_size,homogeneity):

        traindata = datasets.MNIST('./data', train=True, download=True,transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
        testdata = datasets.MNIST('./data', train=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
        
        train_target_labels = torch.stack([traindata.targets == i for i in range(10)])
        test_target_labels = torch.stack([testdata.targets == i for i in range(10)])
        
        train_split_size = int(60000 / num_clients)
        test_split_size = int(10000 / num_clients)

        train_target_labels_split = []
        test_target_labels_split = []

        for i in range(num_clients):
            train_target_labels_split += torch.split(torch.where(train_target_labels[(2 * i):(2 * (i + 1))].sum(0))[0][:train_split_size], train_split_size)
            test_target_labels_split += torch.split(torch.where(test_target_labels[(2 * i):(2 * (i + 1))].sum(0))[0][:test_split_size], test_split_size)

        traindata_split = [torch.utils.data.Subset(traindata, tl) for tl in train_target_labels_split]
        testdata_split = [torch.utils.data.Subset(testdata, tl) for tl in test_target_labels_split]

        train_loader = [torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True) for x in traindata_split]
        test_loader = [torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True) for x in testdata_split]

        return train_loader, test_loader

def get_non_IID_loader_digit_trios(num_clients,batch_size,homogeneity):

        traindata = datasets.MNIST('./data', train=True, download=True,transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
        testdata = datasets.MNIST('./data', train=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
        
        train_target_labels = torch.stack([traindata.targets == i for i in range(10)])
        test_target_labels = torch.stack([testdata.targets == i for i in range(10)])
        
        train_split_size = int(60000 / num_clients)
        test_split_size = int(10000 / num_clients)

        train_target_labels_split = []
        test_target_labels_split = []

        triplets = generate_permutations(num_clients)

        for i in range(num_clients):
            i_labels = triplets[i]
            print(f"Agent {i} is assigned labels {i_labels}")
            train_target_labels_split += torch.split(torch.where(train_target_labels[i_labels].sum(0))[0][:train_split_size], train_split_size)
            test_target_labels_split += torch.split(torch.where(test_target_labels[i_labels].sum(0))[0][:test_split_size], test_split_size)

        traindata_split = [torch.utils.data.Subset(traindata, tl) for tl in train_target_labels_split]
        testdata_split = [torch.utils.data.Subset(testdata, tl) for tl in test_target_labels_split]

        train_loader = [torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True) for x in traindata_split]
        test_loader = [torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True) for x in testdata_split]

        return train_loader, test_loader

In [8]:
# We want to give each agent 3 different digits 
# I'd say we do want to have all digitis at least once 
def generate_permutations(nb_agents=5, sample_size=3):
  available_labels = np.array([0,1,2,3,4,5,6,7,8,9])
  triplets = {}

  valid = False 
  while not valid :
    all_digits = []
    for i in range(nb_agents):
      triplets[i] = np.random.choice(available_labels,sample_size,replace=False)
      all_digits.extend(triplets[i])
    valid = len(np.unique(all_digits)) == len(available_labels)
  return triplets
 

      
  
  

In [13]:
!pip install cumulator
from cumulator import base

#=== IID Case 

#=== parameters for Schemes
selected_agent_index = 0
num_rounds = 10
epochs = 1

#=== parameters for training and testing
num_clients = 10 #if num_clients < 10, sum(distribution) should be = 10/num_clients with max 1 at each index
batch_size = 32
homogeneity = False
distribution = [0,0,0,0.25,0.5,0.25,0,0,0,0]


train_loader, test_loader = get_non_iid_loader_distribution(num_clients,batch_size,distribution,selected_agent_index) #lot of change needed

#cumulator not done yet
cumulator = base.Cumulator()
cumulator.on()

#runWeightErosion(train_loader,test_loader,num_clients,batch_size,selected_agent_index,num_rounds,epochs)
#runFederated(train_loader,test_loader,num_clients,batch_size,selected_agent_index,num_rounds,epochs)
runLocal(train_loader,test_loader,num_clients,batch_size,selected_agent_index,num_rounds*epochs)

cumulator.off()
dontknow = cumulator.computation_costs()
print(dontknow)

Defaulting to user installation because normal site-packages is not writeable
=== Local ===
0-th Client
Loss   : 0.0
Test loss 0.0306 | Test acc: 0.991

0.6004105812311172


In [None]:
#=== Non-IID case 
#=== parameters for Schemes
selected_agent_index = 0
num_rounds = 20
epochs = 1

#=== parameters for training and testing
num_clients = 5
batch_size = 32
homogeneity = False

#train_loader, test_loader = get_specific_non_IID_loader(num_clients,batch_size,homogeneity) 
train_loader, test_loader = get_non_IID_loader_digit_trios(num_clients,batch_size,homogeneity)
#cumulator not done yet
cumulator = base.Cumulator()
cumulator.on()

run_weight_erosion_non_IID(train_loader,test_loader,num_clients,batch_size,selected_agent_index,num_rounds,epochs)


Agent 0 is assigned labels [2 5 3]
Agent 1 is assigned labels [4 8 9]
Agent 2 is assigned labels [7 0 8]
Agent 3 is assigned labels [9 1 8]
Agent 4 is assigned labels [7 9 6]
=== Weight Erosion Non-IID ===
0-th round
Weight : [1.    0.933 0.94  0.936 0.936]
Loss   : [0.028 0.043 0.04  0.074 0.013]
Test acc : [0.     0.653  0.3345 0.6225 0.3395]
1-th round
Weight : [1.      0.86565 0.88156 0.87467 0.8754 ]
Loss   : [0.00299 0.00686 0.00049 0.15042 0.00301]
Test acc : [0.0415 0.668  0.5885 0.6665 0.5775]
2-th round
Weight : [1.      0.80024 0.82368 0.81457 0.81449]
Loss   : [0.03506 0.09882 0.00092 0.03042 0.00655]
Test acc : [0.312  0.726  0.809  0.9105 0.758 ]
3-th round
Weight : [1.      0.73307 0.76304 0.75045 0.75146]
Loss   : [0.00168 0.08008 0.008   0.00095 0.00497]
Test acc : [0.6085 0.7665 0.89   0.9575 0.857 ]
4-th round
Weight : [1.      0.66029 0.70133 0.68202 0.68258]
Loss   : [0.01779 0.05878 0.01646 0.00124 0.00314]
Test acc : [0.722  0.857  0.921  0.9775 0.867 ]
5-th roun

In [None]:
# IID case: all the clients have images of all the classes

# Hyperparameters

num_clients = 5
num_rounds = 10
epochs = 1
batch_size = 32
distance_penalty = 0.05
size_penalty = 2
selected_agent_index = 0

# weight_vector

weight_vector = np.ones(num_clients)

# Creating decentralized datasets

traindata = datasets.MNIST('./data', train=True, download=True,
                       transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
                       )
traindata_split = torch.utils.data.random_split(traindata, [int(traindata.data.shape[0] / num_clients) for _ in range(num_clients)])
train_loader = [torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True) for x in traindata_split]
test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
        ), batch_size=batch_size, shuffle=True)


# Instantiate models and optimizers
shared_model = Net().cuda()
client_models = [Net().cuda() for _ in range(num_clients)]
for model in client_models:
    model.load_state_dict(shared_model.state_dict())

opt = [optim.SGD(model.parameters(), lr=0.1) for model in client_models]

grad_vector = [None for _ in range(num_clients)]
# Runnining Weight Erosion 

for r in range(num_rounds):
    # client update
    loss = 0
    for i in range(num_clients):
        loss_tmp, grad_vector[i] = client_update(client_models[i], opt[i], train_loader[i], epoch=epochs)
        loss += loss_tmp
        d_rel = grad_vector[0].euclidian_distance(grad_vector[i])
        weight_vector[i] = compute_weight(weight_vector[i], r + 1, d_rel, len(train_loader[i]), batch_size, distance_penalty, size_penalty)
        print(f"Weight alpha for agent {i} : {weight_vector[i]}")
    
    # Weight Erosion Scheme 
    weighted_mean_gradient = weighted_average_gradients(grad_vector, weight_vector)
    shared_model = update_grad(shared_model, weighted_mean_gradient, 0.1)
    
    # Share model to all agents 
    share_weight_erosion_model(shared_model, client_models)
    
    # Evalutate on the global test set (for now)
    test_loss, acc = evaluate(shared_model, test_loader)

    print('%d-th round' % r)
    print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc))

Weight alpha for agent 0 : 1.0
Weight alpha for agent 1 : 0.9728857278823853
Weight alpha for agent 2 : 0.9722303152084351
Weight alpha for agent 3 : 0.9726162552833557
Weight alpha for agent 4 : 0.9697747826576233
0-th round
average train loss 0.18 | test loss 0.143 | test acc: 0.958
Weight alpha for agent 0 : 1.0
Weight alpha for agent 1 : 0.9249209761619568
Weight alpha for agent 2 : 0.9247506260871887
Weight alpha for agent 3 : 0.9261295199394226
Weight alpha for agent 4 : 0.9224545359611511
1-th round
average train loss 0.181 | test loss 0.0819 | test acc: 0.973
Weight alpha for agent 0 : 1.0
Weight alpha for agent 1 : 0.8674433827400208
Weight alpha for agent 2 : 0.8652442693710327
Weight alpha for agent 3 : 0.8685817718505859
Weight alpha for agent 4 : 0.8642969727516174
2-th round
average train loss 0.148 | test loss 0.0639 | test acc: 0.979
Weight alpha for agent 0 : 1.0
Weight alpha for agent 1 : 0.8044438362121582
Weight alpha for agent 2 : 0.8012921810150146
Weight alpha fo

In [None]:
# NON-IID case: every client has images of two categories chosen from [0, 1], [2, 3], [4, 5], [6, 7], or [8, 9].

# Hyperparameters

num_clients = 5
num_rounds = 10
epochs = 1
batch_size = 32
distance_penalty = 0.05
size_penalty = 2
selected_agent_index = 0

# weight_vector

weight_vector = np.ones(num_clients)

# Creating decentralized datasets

traindata = datasets.MNIST('./data', train=True, download=True,
                       transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
                       )
target_labels = torch.stack([traindata.targets == i for i in range(10)])
target_labels_split = []
for i in range(5):
    target_labels_split += torch.split(torch.where(target_labels[(2 * i):(2 * (i + 1))].sum(0))[0], int(60000 / num_clients))
traindata_split = [torch.utils.data.Subset(traindata, tl) for tl in target_labels_split]
train_loader = [torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True) for x in traindata_split]

test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
        ), batch_size=batch_size, shuffle=True)

# Instantiate models and optimizers

to_share_model = Net().cuda()
client_models = [Net().cuda() for _ in range(num_clients)]
for model in client_models:
    model.load_state_dict(to_share_model.state_dict())

opt = [optim.SGD(model.parameters(), lr=0.1) for model in client_models]


for r in range(num_rounds):
    # client update
    loss = 0
    for i in range(num_clients):
        loss_tmp, grad_vector[i] = client_update(client_models[i], opt[i], train_loader[i], epoch=epochs)
        loss += loss_tmp
        d_rel = relative_distance_vector(grad_vector[selected_agent_index], grad_vector[i])
        weight_vector[i] = compute_weight(weight_vector[i], r + 1, d_rel, len(train_loader[i]), batch_size, distance_penalty, size_penalty)
        print(f"Weight alpha for agent {i} : {weight_vector[i]}")
    
    # Weight Erosion Scheme 
    weighted_mean_gradient = weighted_average_gradients(grad_vector, weight_vector)
    to_share_model = update_grad(to_share_model, weighted_mean_gradient, 0.1)
    
    # Share model to all agents 
    share_weight_erosion_model(to_share_model, client_models)
    
    # Evalutate on the global test set (for now)
    test_loss, acc = evaluate(to_share_model, test_loader)

    print('%d-th round' % r)
    print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc))

NameError: name 'relative_distance_vector' is not defined

In [None]:
# IID case: all the clients have images of all the classes

# Hyperparameters

num_clients = 5
num_rounds = 5
epochs = 1
batch_size = 32

# Communication matrix

comm_matrix = np.ones((num_clients, num_clients)) / num_clients
# comm_matrix = np.eye(num_clients)

# Creating decentralized datasets

traindata = datasets.MNIST('./data', train=True, download=True,
                       transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
                       )
traindata_split = torch.utils.data.random_split(traindata, [int(traindata.data.shape[0] / num_clients) for _ in range(num_clients)])
train_loader = [torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True) for x in traindata_split]

test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])
        ), batch_size=batch_size, shuffle=True)

# Instantiate models and optimizers

global_model = Net().cuda()
client_models = [Net().cuda() for _ in range(num_clients)]
for model in client_models:
    model.load_state_dict(global_model.state_dict())

opt = [optim.SGD(model.parameters(), lr=0.1) for model in client_models]

# Runnining Decentralized training

for r in range(num_rounds):
    # client update
    loss = 0
    for i in range(num_clients):
        loss += client_update(client_models[i], opt[i], train_loader[i], epoch=epochs)
    
    # diffuse params
    diffuse_params(client_models, comm_matrix)

    average_models(global_model, client_models)
    test_loss, acc = evaluate(global_model, test_loader)
    
    print('%d-th round' % r)
    print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_clients, test_loss, acc))