In [1]:
from torch.utils.data import DataLoader
from benchmark.toolkits import CustomDataset
from benchmark.mnist.model.mlp import Model
import torch, json, os, numpy as np, copy
from torchvision import datasets, transforms

### MNIST data

In [2]:
training_data = datasets.MNIST(
    root="./benchmark/mnist/data",
    train=True,
    download=False,
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
)
testing_data = datasets.MNIST(
    root="./benchmark/mnist/data",
    train=False,
    download=False,
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
)

### CIFAR10 data

In [None]:
training_data = datasets.CIFAR10(
    "./benchmark/cifar10/data", 
    train=True, 
    download=False, 
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
)
testing_data = datasets.CIFAR10(
    "./benchmark/cifar10/data",  
    train=False, 
    download=False, 
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
)

### Training

In [3]:
def train(dataloader, model, loss_fn, optimizer, device):   
    model.train()
    losses = []
        
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        
    return losses


def test(model, testing_data, device="cuda"):
    test_loader = DataLoader(testing_data, batch_size=32, shuffle=True, drop_last=False)
    model = model.to(device)

    loss_fn = torch.nn.CrossEntropyLoss()

    size = len(test_loader.dataset)
    num_batches = len(test_loader)
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    return correct

In [33]:
alpha = 0.1
nclient = 100
folder = f"./dataset_idx/mnist/dirichlet/dir({alpha})_sparse/{nclient}client"

def read_data(folder_path):
    training_jsons = json.load(open(f"{folder_path}/mnist_sparse.json", 'r'))
    testing_jsons = json.load(open(f"{folder_path}/mnist_sparse_test.json", 'r'))
    
    training_set = []
    testing_set = []
    
    num_client = 0
    for client_id in training_jsons.keys():
        num_client += 1
        training_set.append(CustomDataset(training_data, training_jsons[client_id]))
        testing_set.append(CustomDataset(testing_data, testing_jsons[client_id]))
        
    singleset_json = []
    for client_id in training_jsons.keys():
        singleset_json += training_jsons[client_id]
    singleset = CustomDataset(training_data, singleset_json)
        
    return num_client, training_set, testing_set, singleset

In [34]:
num_client, training_set, testing_set, singleset = read_data(folder)

In [35]:
device = 'cuda:1'
global_model = Model().to(device)
batch_size = 4
epochs = 500
accs = []

for client_id in range(num_client):
    print("    Client {} training... ".format(client_id), end="")
    # Training process
    my_training_dataset = training_set[client_id]
    my_testing_dataset = testing_set[client_id]
    
    local_model = copy.deepcopy(global_model).to(device)
    
    train_dataloader = DataLoader(my_training_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(local_model.parameters(), lr=1e-3)
    
    epoch_loss = []
    for t in range(epochs):
        epoch_loss.append(np.mean(train(train_dataloader, local_model, loss_fn, optimizer, device)))
        
    # Testing the local_model to its own data
    acc = test(local_model, my_testing_dataset, device=device)
    print(f"Done! Aver. round loss: {np.mean(epoch_loss):>.3f}, acc {acc:>.3f}")
    accs.append(acc)

    Client 0 training... Done! Aver. round loss: 0.093, acc 0.392
    Client 1 training... Done! Aver. round loss: 0.139, acc 0.250
    Client 2 training... Done! Aver. round loss: 0.223, acc 0.331
    Client 3 training... Done! Aver. round loss: 0.141, acc 0.250
    Client 4 training... Done! Aver. round loss: 0.109, acc 0.328
    Client 5 training... Done! Aver. round loss: 0.113, acc 0.357
    Client 6 training... Done! Aver. round loss: 0.097, acc 0.343
    Client 7 training... Done! Aver. round loss: 0.329, acc 0.143
    Client 8 training... Done! Aver. round loss: 0.202, acc 0.354
    Client 9 training... Done! Aver. round loss: 0.197, acc 0.224
    Client 10 training... Done! Aver. round loss: 0.122, acc 0.338
    Client 11 training... Done! Aver. round loss: 0.108, acc 0.313
    Client 12 training... Done! Aver. round loss: 0.291, acc 0.151
    Client 13 training... Done! Aver. round loss: 0.107, acc 0.323
    Client 14 training... Done! Aver. round loss: 0.136, acc 0.268
    C

In [36]:
np.mean(accs), np.std(accs)

(0.2886185161511579, 0.07459735865549506)