In [1]:
from openfl.interface.interactive_api.federation import Federation
from openfl.interface.interactive_api.experiment import ModelInterface, FLExperiment
import torch
import torch.nn as nn
import torch.optim as optim
from copy import deepcopy

from gear_shard_dataset import GearSD
from kvasir_shard_dataset import KvasirSD

from loss import *
from models import *
from tasks import Task

import matplotlib.pyplot as plt
import time
import os
import copy

NUM_CLASSES=1
ROUND_TO_TRAIN=4

client_id = 'frontend'
director_node_fqdn = 'localhost'
director_port = 50053
experiment_name = 'gear_test_experiment'
shard_name = 'one'
LEARNING_RATE=5e-4
TRAIN_BS=4
VALID_BS=8

CRITERION=soft_dice_loss
CRITERION_VAL=soft_dice_coef


## don't forget to launch envoy service
### bash start_envoy.sh env_on localhost

In [2]:
# please use the same identificator that was used in signed certificate
federation = Federation(
    client_id=client_id,
    director_node_fqdn=director_node_fqdn,
    director_port=director_port,
    tls=False
)
shard_registry = federation.get_shard_registry()
shard_registry
federation.target_shape
dummy_shard_desc = federation.get_dummy_shard_descriptor(size=10)
dummy_shard_dataset = dummy_shard_desc.get_dataset('train')
sample, target = dummy_shard_dataset[0]
f"Sample shape: {sample.shape}, target shape: {target.shape}"

fed_dataset = KvasirSD(train_bs=TRAIN_BS, valid_bs=VALID_BS)
fed_dataset.shard_descriptor = dummy_shard_desc
for i, (sample, target) in enumerate(fed_dataset.get_train_loader()):
    print("Sample shape : "+str(sample.shape))
    print("Target shape : "+str(target.shape))



Sample shape : torch.Size([4, 3, 332, 332])
Target shape : torch.Size([4, 1, 332, 332])
Sample shape : torch.Size([4, 3, 332, 332])
Target shape : torch.Size([4, 1, 332, 332])
Sample shape : torch.Size([1, 3, 332, 332])
Target shape : torch.Size([1, 1, 332, 332])


In [3]:
d = DeepLabv3()
model= d.build_deeplab(NUM_CLASSES, alpha=0.7)
# take low learning rate for Tversky loss and to not change so much the current trained weights
optimizer_adam = optim.Adam(model.parameters(), lr=LEARNING_RATE)

framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin'
MI = ModelInterface(model=model, optimizer=optimizer_adam, framework_plugin=framework_adapter)
# Save the initial model state
initial_model = deepcopy(model)

TI, validate = Task.createTask(CRITERION, CRITERION_VAL, d)

# create an experimnet in federation
fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name)

# The following command zips the workspace and python requirements to be transfered to collaborator nodes
fl_experiment.start(model_provider=MI, 
                    task_keeper=TI,
                    data_loader=fed_dataset,
                    rounds_to_train=ROUND_TO_TRAIN,
                    opt_treatment='CONTINUE_GLOBAL',
                    device_assignment_policy='CUDA_PREFERRED')

[*] Changing head for 1 classes and removing aux classifier
[!] This model will be trained using alpha freezing coef = 0.7 meaning 135/193 layers will be freeze


In [4]:
# we can stream metrics 
#fl_experiment.stream_metrics()
best_model = fl_experiment.get_best_model()
# We remove exremove_experiment_datamove_experiment_datamove_experiment_datariment data from director
fl_experiment.remove_experiment_data()

# Compare initial model 
validate(initial_model, fed_dataset.get_valid_loader(), 'cpu')

# With the best model 
validate(best_model, fed_dataset.get_valid_loader(), 'cpu')

# We can save the best and use it on runtime 
# TODO save ..

  new_state[k] = pt.from_numpy(tensor_dict.pop(k)).to(device)


# We can also improve the model 

In [None]:
MI = ModelInterface(model=best_model, optimizer=optimizer_adam, framework_plugin=framework_adapter)
fl_experiment.start(model_provider=MI, task_keeper=TI, data_loader=fed_dataset, rounds_to_train=4, \
                              opt_treatment='CONTINUE_GLOBAL')