In [1]:
import torch
from torch import nn
from torchvision import datasets
import fastai 
from torchvision.transforms import ToTensor
# from fastai.data.core import DataLoader
from torch.utils.data import DataLoader
from fastai.data.core import DataLoaders
from fastai.callback.core import Callback
from fastai.vision.all import Learner, Metric
from fastai import optimizer
import torch.nn.functional as F
from torch.utils.data import Subset
import copy


In [2]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [3]:
batch_size = 256

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([256, 1, 28, 28])
Shape of y: torch.Size([256]) torch.int64


In [4]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [5]:
num_clients = 5
train_size = len(training_data)
# indices = list(range(train_size))

RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
torch.random.manual_seed(RANDOM_SEED)
indices = torch.randperm(train_size).tolist()

subset_size = train_size // num_clients
client_subsets = [] 
for i in range(num_clients):
    start_idx = i * subset_size
    end_idx = start_idx + subset_size

    if i == num_clients - 1:
        end_idx = train_size

    subset_indices = indices[start_idx:end_idx]
    client_subsets.append(Subset(training_data, subset_indices))

client_loaders = [DataLoader(sub, batch_size=batch_size, shuffle=True) for sub in client_subsets]

In [102]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(), 
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits 

model = NeuralNetwork().to(device)
original_shapes = []
for p in model.parameters():
    original_shapes.append(p.shape)
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [138]:
# We now have 5 different datasets, each with some sort of representation of the data that is unknown, ie, we have no 
# statistical information on the data that each of these clients would have
# We now need to implement variations of the 3 protocols, namely, the encoding protocol, the communication protocol and the decoding protocol

# For communication protocol for fixed size encoder, we set the seed. So the seed is communicated with the values. 
SEED = 41
torch.manual_seed(SEED)

# Encoders
def variable_size_encoder(grad_vectors, mu, p=0.1):
    # Lets take p = 0.1
    new_grad_vectors = []
    with torch.no_grad():
        for i in range(len(grad_vectors)):
            mask = torch.rand_like(grad_vectors[i], device=grad_vectors[i].device) < p
            Y = torch.empty_like(grad_vectors[i], device=grad_vectors[i].device)
            Y[mask] = (grad_vectors[i][mask] - mu[i] * (1-p))/p
            Y[~mask] = mu[i]
            new_grad_vectors.append(Y)
    return new_grad_vectors

def fixed_size_encoder(grad_vectors, mu, k=1000):
    # k can vary
    orig = k
    torch.manual_seed(SEED) 
    new_grad_vectors = []
    with torch.no_grad():
        for i in range(len(grad_vectors)):
            k = orig
            shape = grad_vectors[i].shape
            # Flattening the parameters to permutate over them
            flat_grad = grad_vectors[i].view(-1)
            C = shape[-1]
            # Get the length of the flat_grad array
            d = flat_grad.numel()
            k = min(k, d)
            # print(k, d, C)
            # Shuffle the list [1, 2, ... d] and get the first k elements
            indices = torch.randperm(d, device=flat_grad.device)[:k]
             
            mask = torch.zeros(d, dtype=torch.bool, device=flat_grad.device)
            mask[indices] = True
            
            Y = torch.empty_like(flat_grad)
            # Encode the parameters
            chosen_vals = (d/k)*flat_grad[mask] - ((d-k)/k)*mu[i]
            Y[mask] = chosen_vals
            Y[~mask] = mu[i]
            Y = Y.view(shape)
            new_grad_vectors.append(Y)
    return new_grad_vectors
            
            
# Decoders : I wont be making use of this later on
def averaging_decoder(grad_vectors_list):
    if isinstance(grad_vectors_list, list):
        grad_vectors_list = torch.stack(grad_vectors_list, dim=0)
    return torch.mean(grad_vectors_list, dim=0)

# Communication protocols
def sparse_for_variable_size_encoder(encoded_vectors, mu):
    final_vectors = []
    with torch.no_grad():
        for i in range(len(encoded_vectors)):
            flat_vector = encoded_vectors[i].view(-1)
            mask = flat_vector != mu[i]
            # vals = encoded_vectors[i][mask]
            indices = torch.nonzero(mask, as_tuple=False).view(-1)
            values = flat_vector[mask]
            final_vectors.append(list(zip(indices, values)))

    
    return final_vectors, mu
    
def sparse_for_fixed_size_encoder(encoded_vectors, mu):
    final_vectors = []
    with torch.no_grad():
        for i in range(len(encoded_vectors)):
            flat_vector = encoded_vectors[i].view(-1)
            mask = torch.zeros(len(flat_vector), dtype=torch.bool, device=flat_vector.device)
            mask[flat_vector != mu[i]] = True
            values = flat_vector[mask]
            final_vectors.append(values)

    return final_vectors, mu, SEED

def rebuild_from_protocol_1(final_vectors, mu, original_shapes):
    rebuilt_vectors = []
    with torch.no_grad():
        for i, vec_data in enumerate(final_vectors):
            num_elements = 1
            
            for dim_size in original_shapes[i]:
                num_elements *= dim_size
    
            
            Y_flat = torch.full((num_elements,), mu[i], dtype=torch.float32, device=mu[i].device)
    
            indices = torch.tensor([pair[0] for pair in vec_data], dtype=torch.long, device=Y_flat.device)
            values = torch.tensor([pair[1] for pair in vec_data], dtype=Y_flat.dtype, device=Y_flat.device)
            Y_flat[indices] = values
            Y = Y_flat.view(original_shapes[i])
            rebuilt_vectors.append(Y)
    return rebuilt_vectors

def rebuild_from_protocol_2(final_vectors, mu, SEED, original_shapes):
    rebuilt_vectors = []
    with torch.no_grad():
        for i, values in enumerate(final_vectors):
            
            num_elements = 1
            for dim_size in original_shapes[i]:
                num_elements *= dim_size
    
            torch.manual_seed(SEED)
            k = len(values)  # number of chosen elements
            d = num_elements
            indices = torch.randperm(d, device=device)[:k]
    
            Y_flat = torch.full((num_elements,), float(mu[i]), dtype=torch.float32, device=mu[i].device)
            Y_flat[indices] = values
    
            # Now we have Y, we must invert to get X
            # mask for chosen elements: Y != mu[i]
            chosen_mask = (Y_flat != mu[i])

            # Apply X = (k/d)*Y + ((d-k)/d)*mu if chosen
            X_flat = torch.empty_like(Y_flat)
            X_flat[chosen_mask] = (k/d)*Y_flat[chosen_mask] + ((d-k)/d)*mu[i]
            X_flat[~chosen_mask] = mu[i]
            
            X = X_flat.view(original_shapes[i])
            rebuilt_vectors.append(X)
    return rebuilt_vectors


parameters = list(model.parameters())
mu_1 = []
with torch.no_grad():
    for p in parameters:
        mu_1.append(torch.mean(p))

encoded_vectors = fixed_size_encoder(parameters, mu_1)
final_vectors, mu, SEED = sparse_for_fixed_size_encoder(encoded_vectors, mu_1)
rebuilt_vectors = rebuild_from_protocol_2(final_vectors, mu_1, SEED, original_shapes)

In [140]:
rebuilt_vectors

[tensor([[-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
         [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
         [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
         ...,
         [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
         [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049],
         [-0.0049, -0.0049, -0.0049,  ..., -0.0049, -0.0049, -0.0049]],
        device='cuda:0'),
 tensor([-4.5572e-02, -4.6445e-04, -1.9688e-02, -5.2854e-03, -4.6341e-02,
         -4.0416e-02, -1.3053e-03,  8.8912e-04, -4.5348e-02, -4.6200e-02,
         -2.7820e-02, -2.3195e-02, -3.8668e-04, -3.0886e-02, -4.3926e-02,
          4.9793e-04, -3.3293e-02, -7.5375e-02, -2.8687e-02, -3.1260e-02,
         -3.6790e-02, -1.1273e-01, -4.8944e-03, -2.7433e-02, -3.5393e-02,
         -1.5347e-02, -5.0623e-02,  1.2642e-02,  1.6567e-02, -8.0175e-02,
         -3.8870e-02, -5.3798e-02, -1.4552e-02,  1.2231e-03, -5.8537e-03,
         -2.692

In [8]:
a = torch.FloatTensor([[1, 2, 3], [2, 3, 4], [4, 5, 6]])
averaging_decoder(a)

tensor([2.3333, 3.3333, 4.3333])

In [10]:
# parameters = list(model.parameters())
# variable_size_encoder(parameters)

In [11]:
class ProxSGDWithLinearSearch:
    def __init__(self, params, lr):
        self.params, self.lr = list(params), lr
        self.state = {p: {} for p in self.params}
        self.hypers = [{'lr': lr}]
        self.max_iter = 5
        self.eta = 1e-5
        
    def soft_threshold(self, x, eta):
        # Apply the soft-thresholding operator
        return F.softshrink(x, lambd=eta)
        
    def prox_operator(self, x):
        # Use the soft-thresholding operator as the proximal step
        return self.soft_threshold(x, self.eta)

    def Gt(self, x, step_size, x_grad):
        return (1/step_size) * (x - self.prox_operator(x - step_size * x_grad))
        
    def step(self, *args, **kwargs):
        model = kwargs.get("model")
        loss_fn = kwargs.get("loss_fn")
        X = kwargs.get("X")
        y = kwargs.get("y")
        
        orig_params = [p.data.clone() for p in self.params]
        step_size = self.lr
        with torch.no_grad():
            pred = model(X)
            old_loss = loss_fn(pred, y)
        flag = True
        for _ in range(self.max_iter):
            for p in self.params:
                if p.grad is not None: 
                    Gt_val = self.Gt(p.data, step_size, p.grad.data)
                    p.data = p.data - step_size * Gt_val
            with torch.no_grad():
                pred = model(X)
                new_loss = loss_fn(pred, y)
            if new_loss < old_loss:
                flag = False
                break
            else:
                for i, j in zip(self.params, orig_params):
                    i.data.copy_(j)
                step_size *= 0.5
        if flag: 
            for p in self.params:
                if p.grad is not None: 
                    Gt_val = self.Gt(p.data, step_size, p.grad.data)
                    p.data = p.data - step_size * Gt_val
        else:
            self.lr = step_size
        # print(self.lr)
    def zero_grad(self, *args, **kwargs):
        for p in self.params:
            p.grad = None

    def set_hypers(self, **kwargs):
        if 'lr' in kwargs:
            self.lr = kwargs['lr']
            self.hypers[0]['lr'] = kwargs['lr']


    

In [12]:
loss_fn = nn.CrossEntropyLoss()

In [39]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step(model=model, loss_fn=loss_fn, X=X, y=y)
        optimizer.zero_grad()

        if batch % 100 == 0:
            print(f"BATCH: {batch} of {size/batch_size} batches")
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [14]:
def test(dataloader, model, loss_fn, number="main"):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error for client {number}: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [47]:
encoded_vectors = None

class Client:
    def __init__(self, model, train_dataloader, loss_fn, mu, max_iter=2):
        self.model = model
        # self.optimizer = ProxSGDWithLinearSearch(model.parameters(), 20)
        self.train_dataloader = train_dataloader
        self.max_iter = max_iter
        self.loss_fn = loss_fn
        self.mu = mu
        
    def train(self):
        for i in range(0, self.max_iter):
            train(self.train_dataloader, self.model, self.loss_fn, ProxSGDWithLinearSearch(self.model.parameters(), 20))

    def test(self, test_dataloader, number):
        test(test_dataloader, self.model, self.loss_fn, number)

    def set_parameters(self, model):
        self.model = copy.deepcopy(model)
            
    def get_encoded_1(self, p):
        encoded = variable_size_encoder(list(self.model.parameters()), self.mu, p)
        final_vectors, mu = sparse_for_variable_size_encoder(encoded, self.mu)
        return final_vectors, mu

    def get_encoded_2(self, k):
        encoded = fixed_size_encoder(list(self.model.parameters()), self.mu, k)
        final_vectors, mu, seed = sparse_for_fixed_size_encoder(encoded, self.mu)
        encoded_vectors = encoded
        return final_vectors, mu, seed

    # Built the member function to test if continuous update of the mean is helpful or not
    def update_mean(self):
        with torch.no_grad():
            params = list(self.model.parameters())
            self.mu = [torch.mean(p) for p in params]

In [156]:
decoded_vectors = None
class Master:
    def __init__(self, model, mu, loss_fn):
        self.model = model
        self.original_shapes = [p.shape for p in model.parameters()]
        self.mu = mu
        self.loss_fn = loss_fn

    def set_mean(self, mu):
        self.mu = copy.deepcopy(mu)
        
    def update_global_model_from_protocol_1(self, clients_data):
        # clients_data is a list of tuples (final_vectors, mu, p) from each client
        # Decode each client's parameters and then compute updates
        decoded_params_list = []
        for (final_vectors, mu) in clients_data:
            decoded = rebuild_from_protocol_1(final_vectors, mu, self.original_shapes)
            decoded_params_list.append(decoded)

        # Now decoded_params_list is a list of parameter lists from each client
        # Convert each client's param list into a tensor stack and average updates
        # First, get master_params for reference
        master_params = list(self.model.parameters())
        
        # Compute updates: (client_params - master_params) for each client, then average
        all_updates = []
        with torch.no_grad():
            for decoded_params in decoded_params_list:
                updates = [(dp - mp) for dp, mp in zip(decoded_params, master_params)]
                all_updates.append(updates)

            # Average updates across clients
            # Stack each parameter across clients and mean
            averaged_updates = []
            num_clients = len(all_updates)
            for param_idx in range(len(master_params)):
                # Gather this param_idx from all clients
                stack = torch.stack([all_updates[c][param_idx] for c in range(num_clients)], dim=0)
                avg = torch.mean(stack, dim=0)
                averaged_updates.append(avg)

            # Apply averaged updates to master model
            for mp, au in zip(master_params, averaged_updates):
                mp.data.add_(au)

    def update_global_model_from_protocol_2(self, clients_data):
        # clients_data is a list of tuples (final_vectors, mu, seed, k) from each client
        decoded_params_list = []
        global decoded_vectors
        for (final_vectors, mu, seed) in clients_data:
            decoded = rebuild_from_protocol_2(final_vectors, mu, seed, self.original_shapes)
            decoded_params_list.append(decoded)
            decoded_vectors = decoded
            # print(decoded)

        master_params = list(self.model.parameters())
        all_updates = []
        with torch.no_grad():
            for decoded_params in decoded_params_list:
                updates = [(dp - mp) for dp, mp in zip(decoded_params, master_params)]
                all_updates.append(updates)

            num_clients = len(all_updates)
            averaged_updates = []
            for param_idx in range(len(master_params)):
                stack = torch.stack([all_updates[c][param_idx] for c in range(num_clients)], dim=0)
                avg = torch.mean(stack, dim=0)
                averaged_updates.append(avg)

            # Apply averaged updates to master model
            
            for mp, au in zip(master_params, averaged_updates):
                mp.data.add_(au)

    def test(self, test_dataloader):
        test(test_dataloader, self.model, self.loss_fn, "master")


In [157]:
model = NeuralNetwork().to(device)

parameters = list(model.parameters())
mu_1 = []
with torch.no_grad():
    for p in parameters:
        mu_1.append(torch.mean(p))
# mu_1 = torch.zeros(len(parameters), device=device)        
master = Master(model, mu_1, loss_fn) 
clients = [Client(NeuralNetwork().to(device), client_loaders[i], loss_fn, mu_1, 2) for i in range(num_clients)]


In [94]:
# Encoder 1

for i in range(1):
    # Master sends global parameters to the client
    for client in clients:
        client.set_parameters(master.model)
        # client.update_mean()
    
    # Clients are trained locally
    for client in clients:
        client.train()
    
    clients_data_protocol_1 = []
    for client in clients:
        final_vectors, mu = client.get_encoded_1(p=0.5)
        clients_data_protocol_1.append((final_vectors, mu))
    
    master.update_global_model_from_protocol_1(clients_data_protocol_1)

    master.test(test_dataloader) 

BATCH: 0 of 46.875 batches
loss: 2.305751  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.527659  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 2.302649  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.371232  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 2.300845  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.509848  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 2.307515  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.499726  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 2.303705  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.393872  [  256/12000]


Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f307fbe5910>>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


KeyboardInterrupt: 

In [158]:
# Encoder 2
flag = False
m1 = None
m2 = None
for i in range(2):
    # Master sends global parameters to the client
    for client in clients:
        client.set_parameters(master.model)
        client.update_mean()
    
    # Clients are trained locally
    for client in clients:
        client.train()
    
    clients_data_protocol_2 = []
    for client in clients:
        final_vectors, mu, seed = client.get_encoded_2(k=1000)
        print(seed)
        clients_data_protocol_2.append((final_vectors, mu, seed))
    
    master.update_global_model_from_protocol_2(clients_data_protocol_2)
    if not flag:
        flag = True
        m1 = list(master.model.parameters())
    else:
        m2 = list(master.model.parameters())
    master.test(test_dataloader) 

BATCH: 0 of 46.875 batches
loss: 2.303441  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.696943  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 2.303581  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.554367  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 2.308671  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.626522  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 2.306719  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.509755  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 2.305079  [  256/12000]
BATCH: 0 of 46.875 batches
loss: 0.520982  [  256/12000]
41
41
41
41
41
AU
tensor([[-0.0214,  0.0237,  0.0168,  ..., -0.0039, -0.0227, -0.0193],
        [-0.0155,  0.0242,  0.0104,  ..., -0.0126,  0.0356, -0.0008],
        [ 0.0098,  0.0036,  0.0103,  ..., -0.0341,  0.0119, -0.0239],
        ...,
        [ 0.0245,  0.0310,  0.0008,  ...,  0.0177, -0.0197, -0.0199],
        [ 0.0161,  0.0150,  0.0232,  ...,  0.0238,  0.0290,  0.0313],
        [-0.0201,  0.0144,  0.0347,  ..., -0.0337

In [149]:
m2

[Parameter containing:
 tensor([[2.5442e-05, 2.5442e-05, 2.5442e-05,  ..., 2.5442e-05, 2.5442e-05,
          2.5442e-05],
         [2.5442e-05, 2.5442e-05, 2.5442e-05,  ..., 2.5442e-05, 2.5442e-05,
          2.5442e-05],
         [2.5442e-05, 2.5442e-05, 2.5442e-05,  ..., 2.5442e-05, 2.5442e-05,
          2.5442e-05],
         ...,
         [2.5442e-05, 2.5442e-05, 2.5442e-05,  ..., 2.5442e-05, 2.5442e-05,
          2.5442e-05],
         [2.5442e-05, 2.5442e-05, 2.5442e-05,  ..., 2.5442e-05, 2.5442e-05,
          2.5442e-05],
         [2.5442e-05, 2.5442e-05, 2.5442e-05,  ..., 2.5442e-05, 2.5442e-05,
          2.5442e-05]], device='cuda:0', requires_grad=True),
 Parameter containing:
 tensor([-6.6256e-02, -3.4606e-02, -5.4255e-02, -3.5483e-02, -2.3226e-02,
         -3.5855e-02, -1.0605e-02, -4.2455e-02, -3.7819e-02, -3.6189e-02,
         -4.0422e-02, -9.4494e-03, -5.0297e-02, -1.0827e-02, -2.1747e-02,
         -1.3506e-02, -3.8381e-02, -3.3967e-03, -4.2109e-02, -1.8381e-02,
         -3

In [136]:
decoded_vectors

[tensor([[-0.0119,  0.0179,  0.0064,  ..., -0.0062, -0.0387,  0.0264],
         [-0.0016, -0.0247,  0.0323,  ...,  0.0325, -0.0031,  0.0219],
         [ 0.0069,  0.0186,  0.0192,  ...,  0.0130, -0.0224,  0.0001],
         ...,
         [-0.0089, -0.0162, -0.0201,  ...,  0.0022,  0.0027, -0.0130],
         [ 0.0312, -0.0130, -0.0255,  ..., -0.0427,  0.0132, -0.0063],
         [ 0.0151, -0.0246,  0.0305,  ...,  0.0127,  0.0278,  0.0264]],
        device='cuda:0'),
 tensor([ 1.2529e-02, -4.6513e-02, -2.2060e-02, -1.3401e-01, -6.8725e-02,
          2.1339e-03, -5.9777e-03, -3.7459e-02,  2.2330e-02, -3.8302e-02,
         -1.4649e-05, -4.6191e-02, -9.5482e-02,  2.0120e-02, -9.8750e-03,
         -3.2757e-02, -1.8433e-01, -9.3645e-02, -5.5934e-02, -5.7587e-02,
         -1.1895e-01, -1.2388e-03, -6.8861e-02, -2.0461e-02, -2.1278e-02,
         -8.1022e-02, -1.4796e-02,  1.8898e-04, -3.5278e-02,  3.1165e-02,
         -7.7811e-02,  5.1968e-04,  9.5230e-04, -1.7101e-02, -1.0388e-01,
          6.624

In [60]:
master.test(test_dataloader) 

Test Error for client master: 
 Accuracy: 9.6%, Avg loss: 2.302585 



In [164]:
for i in range(len(clients)):
    clients[i].test(test_dataloader, i)

Test Error for client 0: 
 Accuracy: 93.8%, Avg loss: 0.199948 

Test Error for client 1: 
 Accuracy: 94.3%, Avg loss: 0.184125 

Test Error for client 2: 
 Accuracy: 94.2%, Avg loss: 0.184488 

Test Error for client 3: 
 Accuracy: 94.6%, Avg loss: 0.170945 

Test Error for client 4: 
 Accuracy: 94.0%, Avg loss: 0.196233 



In [110]:
print(list(model.parameters()))

[Parameter containing:
tensor([[-0.0034, -0.0293,  0.0045,  ..., -0.0706, -0.0303, -0.0350],
        [ 0.0277,  0.0182,  0.0111,  ..., -0.0082, -0.0259, -0.0036],
        [-0.0215,  0.0046, -0.0207,  ...,  0.0268,  0.0262, -0.0042],
        ...,
        [-0.0049,  0.0225, -0.0007,  ...,  0.0260, -0.0093,  0.0071],
        [ 0.0043,  0.0010, -0.0133,  ..., -0.0349, -0.0701, -0.0245],
        [-0.0099, -0.0102, -0.0349,  ...,  0.0226,  0.0103, -0.0269]],
       device='cuda:0', requires_grad=True), Parameter containing:
tensor([-9.9580e-02, -1.2118e-02, -4.5072e-02,  2.2311e-02,  1.5570e-02,
        -6.3918e-02, -4.1587e-02, -4.3540e-02,  1.0151e-02, -3.2280e-02,
        -2.6119e-02, -6.0773e-03, -3.0981e-02, -2.0027e-03, -2.6738e-02,
        -7.8128e-02,  3.8500e-02, -6.6534e-03, -3.7936e-02, -5.0390e-02,
        -1.0271e-02, -1.7515e-02,  6.2471e-03, -1.5648e-02, -1.3823e-02,
        -3.6772e-02, -1.3515e-02, -2.6886e-02, -8.3774e-03, -1.7551e-02,
        -9.0413e-03,  1.0268e-02, -4.7