In [None]:
!pip3 install 'torch'
!pip3 install 'torchvision'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'




PARAMETERS

In [None]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 10   

BATCH_SIZE = 100     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 0.001      # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 30   # Total number of training epochs (iterations over dataset)
STEP_SIZE = 5    # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 30

MODEL = "LeNet5"   #change model (LeNet5, mobilenetV2)

NUM_CLIENTS = 100

NUM_SELECTED_CLIENTS = 3 #NUM CLIENTS FOR ROUND

ROUNDS = 6

PREPARE DATASET

In [None]:
from torchvision import transforms as transforms
import torchvision
import torch.utils.data

train_transform =  transforms.Compose([transforms.ToTensor()])
test_transform = transforms.Compose([transforms.ToTensor()])

train_set = torchvision.datasets.CIFAR10(root='./CIFAR10', train=True, download=True, transform=train_transform)
test_set = torchvision.datasets.CIFAR10(root='./CIFAR10', train=False, download=True, transform=test_transform)

train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=False)

CLASSES = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_set)))
print('Test Dataset: {}'.format(len(test_set)))


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./CIFAR10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./CIFAR10/cifar-10-python.tar.gz to ./CIFAR10
Files already downloaded and verified
Train Dataset: 50000
Test Dataset: 10000


**DEFINE MODELS**

*Here we define the networks (LeNet5, mobilenetV2)*

In [None]:
import torch.backends.cudnn as cudnn
import numpy as np
import torch.nn as nn
import argparse
from statistics import mean 
from tqdm import tqdm
import torch.nn.functional as func

#DEFINE NETWORKS
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = func.relu(self.conv1(x))
        x = func.max_pool2d(x, 2)
        x = func.relu(self.conv2(x))
        x = func.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = func.relu(self.fc1(x))
        x = func.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def mobilenetV2(pretrain=True):
  model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=pretrain)
  return model

def get_net():
  if(MODEL=="LeNet5"):
    return LeNet5()
  elif (MODEL=="mobilenetV2"):
    return mobilenetV2()

    
#DEFINE TEST FUNCTION
def evaluate(net, criterion,dataloader,  print_tqdm = True):
  with torch.no_grad():
    net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
    net.train(False) # Set Network to evaluation mode
    running_corrects = 0
    #iterable = tqdm(dataloader) if print_tqdm else dataloader
    losses = []
    for images, labels in dataloader: 
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)
      # Forward Pass
      outputs = net(images)
      loss = criterion(outputs, labels)
      losses.append(loss.item())
      # Get predictions
      _, preds = torch.max(outputs.data, 1)
      # Update Corrects
      running_corrects += torch.sum(preds == labels.data).data.item()
    # Calculate Accuracy
    accuracy = running_corrects / float(len(dataloader.dataset))
  return mean(losses),accuracy

**CENTRALIZED MODEL**

experiment with a standard approach. Create a model and train

In [None]:
#STANDARD APPROACH
import torch.nn as nn
import torch.nn.functional as F
from torch.backends import cudnn
import torch.optim as optim
from tqdm import tqdm
from statistics import mean 
import torch.nn.functional as func

centralized_model = get_net()

centralized_optimizer = torch.optim.SGD(centralized_model.parameters(), lr=LR, momentum=MOMENTUM)
centralized_criterion = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.StepLR(centralized_optimizer, step_size=STEP_SIZE, gamma=GAMMA)

centralized_model = centralized_model.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime


# Start iterating over the epochs
for epoch in range(0,NUM_EPOCHS):
  print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_last_lr()))
  # Iterate over the dataset
  for images, labels in train_loader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    centralized_model.train() # Sets module in training mode

    centralized_optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs = centralized_model(images)
    # Compute loss based on output and ground truth
    loss = centralized_criterion(outputs, labels)

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    centralized_optimizer.step() # update weights based on accumulated gradients
  # Step the scheduler
  scheduler.step()
  loss,accuracy = evaluate(centralized_model,centralized_criterion, test_loader)
  print('\nTest Accuracy: {}'.format(accuracy)) 

centralized_accuracy=accuracy

DIRICHLET DISTRIBUTION

Here we have the split of dataset based on dirichlet distribution

https://github.com/google-research/google-research/tree/master/federated_vision_datasets

In [None]:
import collections
import csv
from os import path
import os
import urllib.request 
import zipfile
import random


def cifar_parser(line, is_train=True):
  if is_train:
    user_id, image_id, class_id = line
    return user_id, image_id, class_id
  else:
    image_id, class_id = line
    return image_id, class_id


def dirichlet_distribution(alpha):    # generate trainset split from csv
  #download csv files
  url="http://storage.googleapis.com/gresearch/federated-vision-datasets/cifar10_v1.1.zip"
  dir="/content/cifar10_csv"
  try:
    os.mkdir(dir)
  except:
    print("Folder already exist")

  urllib.request.urlretrieve(url, "/content/cifar10_csv/cifar.zip")
  with zipfile.ZipFile("/content/cifar10_csv/cifar.zip","r") as zip_ref:
      zip_ref.extractall("/content/cifar10_csv")

  train_file="cifar10_csv/federated_train_alpha_"+alpha+".csv"
  """Inspects the federated train split."""
  print('Train file: %s' % train_file)
  if not path.exists(train_file):
    print('Error: file does not exist.')
    return
  user_images={}
  with open(train_file) as f:
    reader = csv.reader(f)
    next(reader)  # skip header.
    for line in reader:
      user_id, image_id, class_id = cifar_parser(line, is_train=True)
      if(user_id not in user_images.keys()):
        user_images[user_id]=[]
      user_images[user_id].append(int(image_id))
  return user_images

def cifar_iid(): # all clients have all classes with the same data distribution
  user_images={}
  classes_dict={}
  for i in range(len(train_set)):
    label=train_set[i][1]
    if(label not in classes_dict.keys()):
      classes_dict[label]=[]
    classes_dict[label].append(i)
  classes_index=[]
  for label in classes_dict.keys():
    classes_index+=classes_dict[label]

  count=0
  for i in classes_index:
    if(str(count) not in user_images.keys()):
      user_images[str(count)]=[]
    user_images[str(count)].append(i)
    count+=1
    if(count==NUM_CLIENTS):
      count=0
  return user_images

def cifar_noniid(): # all clients have a number of class beetwen 1 and 4 with the same data distribution
  user_images=cifar_iid()
  for key in user_images.keys():
    n_classes=random.randint(1,4)
    list_of_class=random.sample(range(0, NUM_CLASSES), n_classes)
    new_index_list=[]
    for i in user_images[key]:
      label=int(train_set[i][1])
      if(label in list_of_class):
        new_index_list.append(i)
    user_images[key]=new_index_list
  return user_images


#CHOOSE ONE OF THEM DISTRIBUTION
distribution=3

if(distribution==1):  # https://github.com/google-research/google-research/tree/master/federated_vision_datasets
  train_user_images=dirichlet_distribution(alpha='0.10') #'0.00', '0.05', '0.10', '0.20', '0.50', '1.00', '10.00', '100.00'

elif(distribution==2):
  train_user_images=cifar_iid()

elif(distribution==3):  # https://towardsdatascience.com/preserving-data-privacy-in-deep-learning-part-2-6c2e9494398b
  train_user_images=cifar_noniid()




train_loader_list={}   #TRAIN LOADER DICT
for user_id in train_user_images.keys():
  dataset_ = torch.utils.data.Subset(train_set, train_user_images[user_id])
  dataloader = torch.utils.data.DataLoader(dataset=dataset_, batch_size=BATCH_SIZE, shuffle=False)
  train_loader_list[user_id]=dataloader

DEFINE CLIENTS

Creation of pool of clients

In [None]:
def generated_test_distribution(classes, num_samples):    # generate testset with specific class and size
  test_user_images=[]
  count=0
  for c in classes:
    count+=1
    for i in range(len(test_set)):
      if(test_set[i][1]==c):
        test_user_images.append(i)
      if(len(test_user_images)==int(num_samples/len(classes)+1)*count):
        break
  dataset_ = torch.utils.data.Subset(test_set, test_user_images)
  dataloader = torch.utils.data.DataLoader(dataset=dataset_, batch_size=BATCH_SIZE, shuffle=False)
  return dataloader


#Client datastructure
class Client():
    def __init__(self, id,net,train_lr,optimizer,criterion):
        self.id = id
        self.net=net
        self.train_loader=train_lr

        classes=[]
        for data in train_lr.dataset:
          if(data[1] not in classes):
            classes.append(data[1])
        self.test_loader=generated_test_distribution(classes, int(len(train_lr.dataset)*0.15)) #specific testset for each clients

        self.optimizer=optimizer
        self.criterion=criterion


#Istance list of clients
clients_list=[]
for i in range(NUM_CLIENTS):
  net=get_net()
  opt = torch.optim.SGD(net.parameters(), lr=LR, momentum=MOMENTUM)
  crt=nn.CrossEntropyLoss()
  client=Client(i,net,train_loader_list[str(i)],opt,crt)
  clients_list.append(client)




ind=4
l=[0,0,0,0,0,0,0,0,0,0]
for i in range(len(clients_list[ind].train_loader.dataset)):
  index=clients_list[ind].train_loader.dataset[i][1]
  l[index]+=1
print("Distribution of trainset for client "+str(ind),l)

l=[0,0,0,0,0,0,0,0,0,0]
for i in range(len(clients_list[ind].test_loader.dataset)):
  index=clients_list[ind].test_loader.dataset[i][1]
  l[index]+=1
print("Distribution of testset for client  "+str(ind),l)

Distribution of trainset for client 4 [0, 0, 0, 0, 50, 0, 0, 0, 0, 0]
Distribution of testset for client  4 [0, 0, 0, 0, 8, 0, 0, 0, 0, 0]


FEDERATED AVG FUNCTIONS

The functions to send and calculate the avg models.

In [None]:
import copy
import random
#https://github.com/AshwinRJ/Federated-Learning-PyTorch/blob/master/src/utils.py

#Calculate AVG for each clients layers
def average_weights(w,clients):
    """
    Returns the average of the weights.
    """

    '''
    #IMPLEMENTATION OF FEDVC
    w_avg = copy.deepcopy(w[0])
    for key in w_avg.keys():
        for i in range(1, len(w)):
            w_avg[key] += w[i][key]
        w_avg[key] = torch.div(w_avg[key], len(w))
    '''
    #IMPLEMENTATION OF FEDAVG
    total_samples=0
    for c in clients:
      total_samples+=len(c.train_loader.dataset)

    w_avg = copy.deepcopy(w[0])
    for key in w_avg.keys():
        w_avg[key] = w_avg[key]*0

    for key in w_avg.keys():
        for i in range(0, len(w)):
            w_avg[key] += w[i][key]*len(clients[i].train_loader.dataset)
        w_avg[key] = torch.div(w_avg[key], total_samples)

    #print(w[0]["conv1.weight"][0][0][0][0], w_avg["conv1.weight"][0][0][0][0],"---------------------------------------------------") 
    #print(w[1]["conv1.weight"][0][0][0][0], w_avg["conv1.weight"][0][0][0][0],"---------------------------------------------------") 
    return w_avg


#CLIENTS -> MAIN MODEL & AVERAGE
def set_averaged_weights_as_main_model_weights_and_update_main_model(main_model,clients):
  local_weights=[]
  for c in clients:
    w=c.net.state_dict()
    local_weights.append(copy.deepcopy(w))

  global_weights = average_weights(local_weights,clients) # update global weights
  main_model.load_state_dict(global_weights)
  return main_model



#MAIN MODEL -> CLIENTS
def send_main_model_to_nodes_and_update_clients(main_model, clients):
    with torch.no_grad():
      w=main_model.state_dict()
      for i in range(len(clients)):
        clients[i].net.load_state_dict(copy.deepcopy(w))
    return clients



#TRAIN ALL CLIENTS
def start_train_nodes(clients):
    for i in range(len(clients)): 

        clients[i].net = clients[i].net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

        cudnn.benchmark # Calling this optimizes runtime
        for epoch in range(NUM_EPOCHS):    
            for images, labels in clients[i].train_loader:
              # Bring data over the device of choice
              images = images.to(DEVICE)
              labels = labels.to(DEVICE)

              clients[i].net.train() # Sets module in training mode

              clients[i].optimizer.zero_grad() # Zero-ing the gradients
              # Forward pass to the network
              outputs = clients[i].net(images)
              # Compute loss based on output and ground truth
              loss = clients[i].criterion(outputs, labels)

              # Compute gradients for each layer and update weights
              loss.backward()  # backward pass: computes gradients
              clients[i].optimizer.step() # update weights based on accumulated gradients
    return clients


#WEIGHTED ACCURACY
def weighted_accuracy(clients):
  sum=0
  num_samples=0
  for i in range(len(clients)):
    test_loss, test_accuracy = evaluate(clients[i].net,clients[i].criterion, clients[i].test_loader)
    w=len(clients[i].train_loader.dataset)
    num_samples+=w

    sum=sum+test_accuracy*w
  return sum/num_samples


#SELECT CLIENTS
def selectClients(clients):
  for i in range(random.randint(2,7)):
    random.shuffle(clients)

  round_clients_list=[]
  for i in range(NUM_SELECTED_CLIENTS):
    round_clients_list.append(clients[i])
  return round_clients_list

TRAIN THE SYSTEM AND TEST

In [None]:
#INSTANCE MAIN MODEL
main_model=get_net()

main_optimizer = torch.optim.SGD(main_model.parameters(), lr=LR, momentum=MOMENTUM)
main_criterion = nn.CrossEntropyLoss()


for i in range(ROUNDS):
  #SELECT CLEINTS
  round_clients_list=selectClients(clients_list)

  #MAIN MODEL -> CLIENTS
  round_clients_list=send_main_model_to_nodes_and_update_clients(main_model, round_clients_list)

  #TRAIN CLIENTS
  round_clients_list=start_train_nodes(round_clients_list)

  #CLIENTS -> MAIN MODEL & AVERAGE
  main_model= set_averaged_weights_as_main_model_weights_and_update_main_model(main_model,round_clients_list)

  #TEST
  test_loss, main_model_accuracy = evaluate(main_model,main_criterion, test_loader)
  w_accuracy=weighted_accuracy(round_clients_list)
  print("After round "+str(i+1)+" main model accuracy: "+str(main_model_accuracy)+"   weighted accuracy: "+str(w_accuracy))

print("-----------------------------------------")
print("Final Accuracy of Main Model Federated Learning: "+str(main_model_accuracy))
print("Final Weighted Accuracy of Federated Learning: "+str(w_accuracy))
print("Final Accuracy of standard approach: "+str(centralized_accuracy))


After round 1 main model accuracy: 0.0957   weighted accuracy: 0.5
After round 2 main model accuracy: 0.1   weighted accuracy: 0.40625
After round 3 main model accuracy: 0.1   weighted accuracy: 0.40625
After round 4 main model accuracy: 0.1   weighted accuracy: 0.42857142857142855
After round 5 main model accuracy: 0.1   weighted accuracy: 0.390625
After round 6 main model accuracy: 0.1   weighted accuracy: 0.42857142857142855
-----------------------------------------
Final Accuracy of Main Model Federated Learning: 0.1
Final Weighted Accuracy of Federated Learning: 0.42857142857142855
Final Accuracy of standard approach: 0.2745
