# This tutorial is extracted from  [Preserving Data Privacy in Deep Learning | Part 1](https://towardsdatascience.com/preserving-data-privacy-in-deep-learning-part-1-a04894f78029)


## 1. importing libraries


In [2]:
import os
import random
from tqdm import tqdm
import numpy as np
import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data.dataset import Dataset, Subset

torch.backends.cudnn.benchmark = True


## 2. Hyperparameters for federated learning 


In [None]:
num_clients = 20
num_selected = (
    6  # Number of randomly selected clients from num_clients during communication round
)
num_rounds = 20
epochs = 5  # Total number of local training rounds on each selected client’s device.
batch_size = 32

In [3]:
#cuda recommended
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


## 3. Creating desired data distribution among clients 

in this case, the distribution is balanced among clients

In [None]:
# Image augmentation
transform_train = transforms.Compose(
    [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)

# Loading CIFAR10 using torchvision.datasets
traindata = datasets.CIFAR10(
    "./experiments/data",
    train=True,
    download=True,
    transform=transform_train,
)
length_dataset = 1000
indices = np.random.permutation(len(traindata))[:length_dataset]
traindata = Subset(traindata, indices)

# Dividing the training data into num_clients, with each client having equal number of images
traindata_split = torch.utils.data.random_split(
    traindata, [int(length_dataset / num_clients) for _ in range(num_clients)]
)

# Creating a pytorch loader for a Deep Learning model
train_loader = [
    torch.utils.data.DataLoader(x, batch_size=batch_size, shuffle=True)
    for x in traindata_split
]

# Normalizing the test images
transform_test = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)

# Loading the test iamges and thus converting them into a test_loader
test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(
        "./experiments/data",
        train=False,
        transform=transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize(
                    (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
                ),
            ]
        ),
    ),
    batch_size=batch_size,
    shuffle=True,
)



## 4. Neural Network model 


In [None]:
cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])  # feature extractor
        self.classifier = nn.Sequential(  # fully connected layers
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        output = F.log_softmax(out, dim=1)
        return output

    def _make_layers(self, cfg):
        "creates the feature extractor"
        layers = []
        in_channels = 3
        for x in cfg:
            if x == "M":
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [
                    nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                    nn.BatchNorm2d(x),
                    nn.ReLU(inplace=True),
                ]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)


## 5. Helper functions for Federated training


In [None]:
def client_update(client_model, optimizer, train_loader, epoch=5):
    """
    This function updates/trains client model on client data
    """
    model.train()
    for e in range(epoch):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = client_model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
    return loss.item()


def server_aggregate(global_model, client_models):
    """
    This function has aggregation method 'mean'
    """
    ### This will take simple mean of the weights of models ###
    global_dict = global_model.state_dict()
    for k in global_dict.keys():
        global_dict[k] = torch.stack(
            [
                client_models[i].state_dict()[k].float()
                for i in range(len(client_models))
            ],
            0,
        ).mean(0)
    global_model.load_state_dict(global_dict)
    for model in client_models:
        model.load_state_dict(global_model.state_dict())


def test(global_model, test_loader):
    """This function test the global model on test data and returns test loss and test accuracy """
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = global_model(data)
            test_loss += F.nll_loss(
                output, target, reduction="sum"
            ).item()  # sum up batch loss
            pred = output.argmax(
                dim=1, keepdim=True
            )  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    acc = correct / len(test_loader.dataset)

    return test_loss, acc


## 6. Initializing models and optimizer 


In [None]:
#### global model ##########
global_model = VGG("VGG11").to(device)

############## client models ##############
client_models = [VGG("VGG11").to(device) for _ in range(num_selected)]
for model in client_models:
    model.load_state_dict(
        global_model.state_dict()
    )  ### initial synchronizing with global model

############### optimizers ################
opt = [optim.SGD(model.parameters(), lr=0.1) for model in client_models]


# 7. Training

In [None]:
###### List containing info about learning #########
losses_train = []
losses_test = []
acc_train = []
acc_test = []

# Runnining FL
for r in range(num_rounds):
    # select random clients
    client_idx = np.random.permutation(num_clients)[:num_selected]
    # client update
    loss = 0
    for i in tqdm(range(num_selected)):
        loss += client_update(
            client_models[i], opt[i], train_loader[client_idx[i]], epoch=epochs
        )

    losses_train.append(loss)
    # server aggregate
    server_aggregate(global_model, client_models)

    test_loss, acc = test(global_model, test_loader)
    losses_test.append(test_loss)
    acc_test.append(acc)
    print("%d-th round" % r)
    print(
        "average train loss %0.3g | test loss %0.3g | test acc: %0.3f"
        % (loss / num_selected, test_loss, acc)
    )
