In [None]:
!pip install opendatasets

In [None]:
!pip install tensorflow

In [None]:
!pip install tensorflow_federated

In [None]:
!nvidia-smi

In [None]:
!pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio===0.9.0 -f https://download.pytorch.org/whl/torch_stable.html

In [1]:
import opendatasets as od
from tensorflow import keras
import numpy as np

In [2]:
od.download("https://www.kaggle.com/prashant268/chest-xray-covid19-pneumonia")
od.download("https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset")

Skipping, found downloaded files in ".\chest-xray-covid19-pneumonia" (use force=True to force download)
Skipping, found downloaded files in ".\novel-corona-virus-2019-dataset" (use force=True to force download)


In [3]:
LEARNING_RATE = 0.001 # 0.0001
MAX_EPOCHS = 25
TARGET_FOLDER = "weights"

In [4]:
import torch
import torchvision
from torchvision import datasets, transforms

In [5]:
transform = transforms.Compose([transforms.Resize((244, 244))
                                , transforms.ToTensor()]
                               #, transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # find mean and std of dataset
                              )

test_set = datasets.ImageFolder('chest-xray-covid19-pneumonia/Data/test', transform=transform)

train_set = dataset = datasets.ImageFolder('chest-xray-covid19-pneumonia/Data/train', transform=transform)

In [6]:
def label_preparation(labels):
    labels = np.array(labels)
    labels[labels > 0] = 1
    return list(labels)

def label_preparation_tensor(labels):
    labels[labels > 0] = 1
    return labels

train_set.targets = label_preparation(train_set.targets)

test_set.targets = label_preparation(test_set.targets)

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f"Current device: {device}")

Current device: cuda:0


In [8]:
def calc_accuracy(result, labels):
    result = torch.sigmoid(result).round()
    
    correct_results_sum = (result == labels).sum().float()
    acc = correct_results_sum/labels.shape[0]
    acc *= 100
    
    return acc


# Federated
https://blog.openmined.org/upgrade-to-federated-learning-in-10-lines/

i don't think we need syft if we just simulate the federation on one machine

In [9]:
from collections import OrderedDict

In [10]:
number_of_clients = 4

In [11]:
federated_model = torchvision.models.resnet18(pretrained=False, num_classes=1)
# federated_model.to(device)

In [12]:
client_models = [torchvision.models.resnet18(pretrained=False, num_classes=1) for _ in range(number_of_clients)]
client_training_loader = [torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True, num_workers=0, pin_memory=True) for _ in range(number_of_clients)]

federated_test_loader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=False, num_workers=0)

In [13]:
def update_client(federated_model, client):
    client.load_state_dict(federated_model.state_dict(), True)
    return client
    
def federated_average(federated_model, client_models):
    average_weights = OrderedDict()

    for client_model in client_models:
        for key, value in client_model.state_dict().items():
            if key in average_weights:
                average_weights[key] += (1./number_of_clients) * value.clone()
            else:
                average_weights[key] = (1./number_of_clients) * value.clone()
                
                
    federated_model.load_state_dict(average_weights, True)
    return federated_model
    

In [14]:
def train_federated(model, data_loader, optimizer, loss):
    """
    model -- neural net
    data_loader -- dataloader for train images
    optimizer -- optimizer
    """
    model.train()
    
    accuracy = 0
    for step, [images, labels] in enumerate(data_loader, 1):
        images = images.to(device)
        labels = label_preparation_tensor(labels.to(device))

        optimizer.zero_grad()
        
        result = model(images)
        targets = labels.unsqueeze(1).float()

        loss_value = loss(result.float(), targets)

        # backpropagation
        loss_value.backward()
        optimizer.step()
                                    
        if step % 10 == 0:
            accuracy += calc_accuracy(result, labels.unsqueeze(1))
            print(f"TRAINING - Step: {step}, loss: {loss_value}, rolling accuracy: {accuracy*10/step}")

In [15]:
def test_federated(model, test_loader, loss):
    """    
    model -- neural net 
    test_loader -- dataloader of test images
    epoch -- current epoch
    """
    model.eval()
    model.to(device)
    
    with torch.no_grad():
        loss_value = 0
        accuracy = 0
        for step, [images, labels] in enumerate(test_loader, 1):
            images = images.to(device)
            labels = label_preparation_tensor(labels.to(device))

            result = model(images)
            targets = labels.detach().unsqueeze(1).float()

            loss_value += loss(result.detach(), targets)
            accuracy += calc_accuracy(result.detach(), labels.detach().unsqueeze(1))

        loss_value /= step
        accuracy /=  step
        
        if device.type == "cuda": 
            torch.cuda.empty_cache()
      
    model.to("cpu")
    print(f"TESTING - Loss: {loss_value}, Accuracy: {accuracy}")
    return accuracy

In [16]:
def run_federated_training(federated_model, client_models, client_training_loader):
    # use pos weights because of unbalanced data set
    federated_loss = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([1./10])).to(device) # binary crossentropy
    # federated_loss = torch.nn.CrossEntropyLoss(weight=torch.tensor([1./10])).to(device) # sparce categorical crossentropy (federated)

    # start training
    for epoch in range(MAX_EPOCHS):
        for client_idx in range (number_of_clients):
            print(f"+++ FEDERATED MODEL {client_idx}, EPOCH: {epoch+1} +++++++++")

            client_model = client_models[client_idx]
            client_model.to(device)
            client_model = update_client(federated_model, client_model)
            client_optimizer = torch.optim.Adam(client_model.parameters())

            train_federated(client_model, client_training_loader[client_idx], client_optimizer, federated_loss)

            if device.type == "cuda": 
                torch.cuda.empty_cache()
            
            client_model.to("cpu")
            
            # save interim weights
            #torch.save(client_model.state_dict(), f'./{TARGET_FOLDER}/client_model_{client_idx}_epoch_{epoch}.ckpt')

        federated_model = federated_average(federated_model, client_models)
        
        # save interim weights
        #torch.save(federated_model.state_dict(), f'./{TARGET_FOLDER}/epoch_{epoch}.ckpt')
        
        if test_federated(federated_model, federated_test_loader, federated_loss) > 97 and epoch > 4:
          print("Early return: SUCCESS")
          break


In [None]:
run_federated_training(federated_model, client_models, client_training_loader)