In [9]:
from torch.utils.data import DataLoader
from benchmark.toolkits import CustomDataset
from benchmark.cifar10.model.mlp import Model
import torch, json, os, numpy as np, copy
from torchvision import datasets, transforms

### MNIST data

In [2]:
training_data = datasets.MNIST(
    root="./benchmark/mnist/data",
    train=True,
    download=False,
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
)
testing_data = datasets.MNIST(
    root="./benchmark/mnist/data",
    train=False,
    download=False,
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
)

### CIFAR10 data

In [2]:
training_data = datasets.CIFAR10(
    "./benchmark/cifar10/data", 
    train=True, 
    download=False, 
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
)
testing_data = datasets.CIFAR10(
    "./benchmark/cifar10/data",  
    train=False, 
    download=False, 
    transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
)

### Training

In [3]:
def train(dataloader, model, loss_fn, optimizer, device):   
    model.train()
    losses = []
        
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        
    return losses


def test(model, testing_data, device="cuda"):
    test_loader = DataLoader(testing_data, batch_size=32, shuffle=True, drop_last=False)
    model = model.to(device)

    loss_fn = torch.nn.CrossEntropyLoss()

    size = len(test_loader.dataset)
    num_batches = len(test_loader)
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    return correct

In [4]:
def read_data(folder_path):
    training_jsons = json.load(open(f"{folder_path}/cifar10_sparse.json", 'r'))
    testing_jsons = json.load(open(f"{folder_path}/cifar10_sparse_test.json", 'r'))
    
    training_set = []
    testing_set = []
    
    num_client = 0
    for client_id in training_jsons.keys():
        num_client += 1
        training_set.append(CustomDataset(training_data, training_jsons[client_id]))
        testing_set.append(CustomDataset(testing_data, testing_jsons[client_id]))
        
    singleset_json = []
    for client_id in training_jsons.keys():
        singleset_json += training_jsons[client_id]
    singleset = CustomDataset(training_data, singleset_json)
        
    return num_client, training_set, testing_set, singleset

In [7]:
alpha = 1
nclient = 100
folder = f"./dataset_idx/cifar10/dirichlet/dir_{alpha}_sparse/{nclient}client"

num_client, training_set, testing_set, singleset = read_data(folder)

In [10]:
device = 'cuda:1'
batch_size = 4
epochs = 200
accs = []

for client_id in range(num_client):
    print("    Client {} training... ".format(client_id), end="")
    # Training process
    my_training_dataset = training_set[client_id]
    my_testing_dataset = testing_set[client_id]
    
    local_model = Model().to(device)
    
    train_dataloader = DataLoader(my_training_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(local_model.parameters(), lr=1e-3)
    
    epoch_loss = []
    for t in range(epochs):
        epoch_loss.append(np.mean(train(train_dataloader, local_model, loss_fn, optimizer, device)))
        
    # Testing the local_model to its own data
    acc = test(local_model, my_testing_dataset, device=device)
    print(f"Done! Aver. round loss: {np.mean(epoch_loss):>.3f}, acc {acc:>.3f}")
    accs.append(acc)

    Client 0 training... Done! Aver. round loss: 0.182, acc 0.500
    Client 1 training... Done! Aver. round loss: 0.104, acc 0.806
    Client 2 training... Done! Aver. round loss: 0.160, acc 0.682
    Client 3 training... Done! Aver. round loss: 0.162, acc 0.545
    Client 4 training... Done! Aver. round loss: 0.183, acc 0.528
    Client 5 training... Done! Aver. round loss: 0.120, acc 0.950
    Client 6 training... Done! Aver. round loss: 0.167, acc 0.375
    Client 7 training... Done! Aver. round loss: 0.140, acc 0.797
    Client 8 training... Done! Aver. round loss: 0.214, acc 0.600
    Client 9 training... Done! Aver. round loss: 0.148, acc 1.000
    Client 10 training... Done! Aver. round loss: 0.144, acc 0.712
    Client 11 training... Done! Aver. round loss: 0.188, acc 0.607
    Client 12 training... Done! Aver. round loss: 0.070, acc 0.985
    Client 13 training... Done! Aver. round loss: 0.130, acc 0.850
    Client 14 training... Done! Aver. round loss: 0.089, acc 0.812
    C

In [11]:
np.mean(accs), np.std(accs)

(0.6889037238668623, 0.1818306580866311)