1. Load the MNIST dataset (or any other dataset like HAM 10000)
2. Extract two subsets of 600 data points each (without intersection)
3. Create a simple Convolutional Neural Network (2 convolutional layers and 2 dense layers, for example)
4. Create a function average_model_parameters(models: iterable, average_weight): iterable that takes a list of models as an argument and returns the weighted average of the parameters of each model.
5. Create a function that updates the parameters of a model from a list of values
6. Create a script/code/function that reproduces Algorithm 1, considering that both models are on your machine. Use an average_weight=[1/2, 1/2]. Reuse the same setup as in the article (50 examples per local batch)
7. Train your models without initializing the common parameters and measure the performance on the entire dataset.
8. Train your models with the initialization of common parameters and verify that the performance is better.
9. Reduce the number of data points in each sub-batch. What is the minimum number of data points necessary for the final model to have acceptable performance? Repeat the study on CIFAR-10

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
# 1. Load the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
train_dataset = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root="./data", train=False, transform=transform, download=True)

In [4]:
# 2. Extract two subsets of 600 data points each (without intersection)
subset1, subset2 = random_split(train_dataset, [600, len(train_dataset) - 600])
subset2, rest = random_split(subset2, [600, len(subset2) - 600])

train_loader1 = DataLoader(subset1, batch_size=50, shuffle=True)
train_loader2 = DataLoader(subset2, batch_size=50, shuffle=True)

In [5]:
# 3. Create a simple Convolutional Neural Network (2 convolutional layers and 2 dense layers, for example)
class CNN(nn.Module):
    def __init__(self, input_size=1):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_size, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(1024, 128)
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 1024)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
        
        
def train_cnn(model, train_loader, epochs=10, lr=1e-3):
    model.to(device)
    model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr)
    for epoch in range(epochs):
        avg_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            avg_loss += loss.item()
        avg_loss /= len(train_loader)
        print(f"Epoch {epoch}, avg_loss: {avg_loss}")

def evaluate_cnn(model, test_loader):
    model.to(device)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [6]:
# 4. Create a function average_model_parameters(models: iterable, average_weight): iterable that takes a list of models as an argument and returns the weighted average of the parameters of each model.
def average_model_parameters(models: list, average_weight: list[float]):
    n_models = len(models)
    for param in zip(*[model.parameters() for model in models]):
        tmp = torch.zeros_like(param[0])
        for i in range(n_models):
            tmp += param[i] * average_weight[i]
        yield tmp

In [7]:
# 5. Create a function that updates the parameters of a model from a list of values
def update_model_parameters(model, values):
    for model_params, value in zip(model.parameters(), values):
        model_params.data = value

In [8]:
# 6. Create a script/code/function that reproduces Algorithm 1, considering that both models are on your machine. Use an average_weight=[1/2, 1/2]. Reuse the same setup as in the article (50 examples per local batch)
import random
def server_update(models: list, train_loaders, C = 0.8, K = 10, n_rounds = 2, input_size=1):
    central_model = CNN(input_size)
    for _ in range(n_rounds):
        m = max(int(C * K), 1)
        S = random.sample(range(K), m)
        models_selected = [models[k] for k in S]
        for client_k in S:
            client_update(models, train_loaders, client_k, central_model.state_dict())
        update_model_parameters(central_model, list(average_model_parameters(models_selected, [1/m] * m)))
    return central_model
        
def client_update(models: list, train_loaders: list, k: int, w):
    model = models[k]
    model.load_state_dict(w)
    
    model.to(device)
    model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    for epoch in range(10):
        for images, labels in train_loaders[k]:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

In [9]:
# 7. Train your models without initializing the common parameters and measure the performance on the entire dataset.
cnn1 = CNN()
cnn2 = CNN()
train_cnn(cnn1, train_loader1, epochs=20)
train_cnn(cnn2, train_loader2, epochs=20)

acc_1 = evaluate_cnn(cnn1, DataLoader(test_dataset, batch_size=50))
acc_2 = evaluate_cnn(cnn2, DataLoader(test_dataset, batch_size=50))
print(f"Accuracy of model 1: {acc_1}")
print(f"Accuracy of model 2: {acc_2}")

average_model = CNN()
update_model_parameters(average_model, list(average_model_parameters([cnn1, cnn2], [0.5, 0.5])))
acc_avg = evaluate_cnn(average_model, DataLoader(test_dataset, batch_size=50))
print(f"Accuracy of the average model: {acc_avg}")

Epoch 0, avg_loss: 2.0143082439899445
Epoch 1, avg_loss: 0.999852642416954
Epoch 2, avg_loss: 0.5145983522137006
Epoch 3, avg_loss: 0.36232953642805416
Epoch 4, avg_loss: 0.2512669339776039
Epoch 5, avg_loss: 0.1779667946199576
Epoch 6, avg_loss: 0.14279506107171377
Epoch 7, avg_loss: 0.09343061254670222
Epoch 8, avg_loss: 0.06588266743347049
Epoch 9, avg_loss: 0.044175000550846256
Epoch 10, avg_loss: 0.032474757016946874
Epoch 11, avg_loss: 0.023645147603626054
Epoch 12, avg_loss: 0.014716748303423325
Epoch 13, avg_loss: 0.011454202137732258
Epoch 14, avg_loss: 0.008813771981901178
Epoch 15, avg_loss: 0.007679475781818231
Epoch 16, avg_loss: 0.005148227928051104
Epoch 17, avg_loss: 0.004345070008033265
Epoch 18, avg_loss: 0.003678150572037945
Epoch 19, avg_loss: 0.0032373669770701476
Epoch 0, avg_loss: 2.0800111095110574
Epoch 1, avg_loss: 1.0061410864194233
Epoch 2, avg_loss: 0.5042655194799105
Epoch 3, avg_loss: 0.3519003912806511
Epoch 4, avg_loss: 0.22582276413838068
Epoch 5, avg_

Simple averaging is bad.

In [10]:
# 8. Train your models with the initialization of common parameters and verify that the performance is better.
K = 2
models = [CNN() for _ in range(K)]
train_loaders = [train_loader1, train_loader2]
server_model = server_update(models, train_loaders, C=0.8, K=K, n_rounds=2)
acc_server = evaluate_cnn(server_model, DataLoader(test_dataset, batch_size=50))
client_models_acc = [evaluate_cnn(model, DataLoader(test_dataset, batch_size=50)) for model in models]

In [11]:
print(f"Accuracy of model 1: {client_models_acc[0]}")
print(f"Accuracy of model 2: {client_models_acc[1]}")
print(f"Accuracy of the server model: {acc_server}")

Accuracy of model 1: 0.9539
Accuracy of model 2: 0.9351
Accuracy of the server model: 0.9539


Much better!

In [12]:
# 9. Reduce the number of data points in each sub-batch. What is the minimum number of data points necessary for the final model to have acceptable performance?
def test_different_batch_sizes(K=2, C=0.8, n_rounds=2):
    batch_sizes = [1, 5, 10, 20, 50]
    models = [CNN() for _ in range(K)]
    for batch_size in batch_sizes:
        train_loaders = [DataLoader(subset, batch_size=batch_size, shuffle=True) for subset in [subset1, subset2]]
        server_model = server_update(models, train_loaders, C=C, K=K, n_rounds=n_rounds)
        acc_server = evaluate_cnn(server_model, DataLoader(test_dataset, batch_size=50))
        print(f"Accuracy of the server model with batch size {batch_size}: {acc_server}")

test_different_batch_sizes()

Accuracy of the server model with batch size 1: 0.9192
Accuracy of the server model with batch size 5: 0.9498
Accuracy of the server model with batch size 10: 0.9512
Accuracy of the server model with batch size 20: 0.9559
Accuracy of the server model with batch size 50: 0.9555


Reducing the sub-batch size doesn't seem to affect the performance whatsoever.

In [13]:
# Repeat the study with cifar10
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform, download=True)

subset1, subset2 = random_split(train_dataset, [600, len(train_dataset) - 600])
subset2, rest = random_split(subset2, [600, len(subset2) - 600])

train_loader1 = DataLoader(subset1, batch_size=50, shuffle=True)
train_loader2 = DataLoader(subset2, batch_size=50, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [14]:
class Cifar10CNN(nn.Module):
    def __init__(self):
        super(Cifar10CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(1600, 128)
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 1600)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [15]:
def server_update(models: list, train_loaders, C = 0.8, K = 10, n_rounds = 2, input_size=1):
    central_model = Cifar10CNN()
    for _ in range(n_rounds):
        m = max(int(C * K), 1)
        S = random.sample(range(K), m)
        models_selected = [models[k] for k in S]
        for client_k in S:
            client_update(models, train_loaders, client_k, central_model.state_dict())
        update_model_parameters(central_model, list(average_model_parameters(models_selected, [1/m] * m)))
    return central_model
        
def client_update(models: list, train_loaders: list, k: int, w):
    model = models[k]
    model.load_state_dict(w)
    
    model.to(device)
    model.train()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    for epoch in range(10):
        for images, labels in train_loaders[k]:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

In [16]:
def test_different_batch_sizes_cifar(K=2, C=0.8, n_rounds=2):
    batch_sizes = [1, 5, 10, 20, 50]
    models = [Cifar10CNN() for _ in range(K)]
    for batch_size in batch_sizes:
        train_loaders = [DataLoader(subset, batch_size=batch_size, shuffle=True) for subset in [subset1, subset2]]
        server_model = server_update(models, train_loaders, C=C, K=K, n_rounds=n_rounds)
        acc_server = evaluate_cnn(server_model, DataLoader(test_dataset, batch_size=50))
        print(f"Accuracy of the server model with batch size {batch_size}: {acc_server}")

test_different_batch_sizes_cifar()

Accuracy of the server model with batch size 1: 0.3002
Accuracy of the server model with batch size 5: 0.3386
Accuracy of the server model with batch size 10: 0.4101
Accuracy of the server model with batch size 20: 0.3678
Accuracy of the server model with batch size 50: 0.3644


In [17]:
simple_model = Cifar10CNN()
train_cnn(simple_model, train_loader1, epochs=20)
acc_simple = evaluate_cnn(simple_model, DataLoader(test_dataset, batch_size=50))
print(f"Accuracy of the simple model: {acc_simple}")

Epoch 0, avg_loss: 2.253656884034475
Epoch 1, avg_loss: 2.0900201201438904
Epoch 2, avg_loss: 1.9536284903685253
Epoch 3, avg_loss: 1.8264440695444744
Epoch 4, avg_loss: 1.6993524432182312
Epoch 5, avg_loss: 1.5973156690597534
Epoch 6, avg_loss: 1.4910090565681458
Epoch 7, avg_loss: 1.3618240753809612
Epoch 8, avg_loss: 1.2309393286705017
Epoch 9, avg_loss: 1.1016557763020198
Epoch 10, avg_loss: 1.0436801314353943
Epoch 11, avg_loss: 0.9547400325536728
Epoch 12, avg_loss: 0.8242685794830322
Epoch 13, avg_loss: 0.6593535989522934
Epoch 14, avg_loss: 0.5840179920196533
Epoch 15, avg_loss: 0.4907771398623784
Epoch 16, avg_loss: 0.4069172367453575
Epoch 17, avg_loss: 0.3123160886267821
Epoch 18, avg_loss: 0.20530709127585092
Epoch 19, avg_loss: 0.1638720209399859
Accuracy of the simple model: 0.3461


The performance are worse than for mnist but training the model as is without client-server, perform the same?