# Fedbiomed Researcher

Use for developing (autoreloads changes made across packages)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from fedbiomed.researcher.requests import Requests
req = Requests()
req.list(verbose=True)

2021-12-10 17:19:58,146 fedbiomed INFO - Component environment:
2021-12-10 17:19:58,147 fedbiomed INFO - - type = ComponentType.RESEARCHER
2021-12-10 17:19:58,817 fedbiomed INFO - Messaging researcher_3f89ab24-0886-42c3-b625-d0ade13b2a44 successfully connected to the message broker, object = <fedbiomed.common.messaging.Messaging object at 0x10f8c0910>
2021-12-10 17:19:58,896 fedbiomed INFO - Listing available datasets in all nodes... 
2021-12-10 17:19:58,904 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / DEBUG - Message received: {'researcher_id': 'researcher_3f89ab24-0886-42c3-b625-d0ade13b2a44', 'command': 'list'}
2021-12-10 17:19:58,906 fedbiomed INFO - log from: node_9261632d-ca98-4d57-81a1-8c109560d8bd / DEBUG - Message received: {'researcher_id': 'researcher_3f89ab24-0886-42c3-b625-d0ade13b2a44', 'command': 'list'}
2021-12-10 17:19:58,907 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / DEBUG - Message received: {'researcher_id': 'res

{'node_5ef29a9f-9647-4c43-b45a-37a67ce9b237': [{'name': 'mednist',
   'data_type': 'images',
   'tags': ['mednist'],
   'description': 'bla',
   'shape': [18000, 3, 64, 64]}],
 'node_9261632d-ca98-4d57-81a1-8c109560d8bd': [{'name': 'mednist',
   'data_type': 'images',
   'tags': ['mednist'],
   'description': 'bla',
   'shape': [16954, 3, 64, 64]}],
 'node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a': [{'name': 'mednist',
   'data_type': 'images',
   'tags': ['mednist'],
   'description': 'bla',
   'shape': [18000, 3, 64, 64]}]}

## Setting the client up
It is necessary to previously configure a node:
1. `./scripts/fedbiomed_run node add`
  * Select option 2 (default) to add MNIST to the client
  * Confirm default tags by hitting "y" and ENTER
  * Pick the folder where MNIST is downloaded (this is due torch issue https://github.com/pytorch/vision/issues/3549)
  * Data must have been added (if you get a warning saying that data must be unique is because it's been already added)
  
2. Check that your data has been added by executing `./scripts/fedbiomed_run node list`
3. Run the node using `./scripts/fedbiomed_run node run`. Wait until you get `Connected with result code 0`. it means you are online.

## Create an experiment to train a model on the data found

Declare a torch.nn MyTrainingPlan class to send for training on the node

In [3]:
from fedbiomed.researcher.environ import environ
import tempfile
tmp_dir_model = tempfile.TemporaryDirectory(dir=environ['TMP_DIR']+'/')
model_file = tmp_dir_model.name + '/class_export_mnist.py'

In [4]:
from monai.apps import download_and_extract
from monai.config import print_config
from monai.data import decollate_batch
from monai.metrics import ROCAUCMetric
from monai.networks.nets import DenseNet121
from monai.transforms import (
    Activations,
    AddChannel,
    AsDiscrete,
    Compose,
    LoadImage,
    RandFlip,
    RandRotate,
    RandZoom,
    ScaleIntensity,
    EnsureType,
)
from monai.utils import set_determinism

Note : write **only** the code to export in the following cell

In [5]:
%%writefile "$model_file"

import os
import numpy as np
import torch
import torch.nn as nn
from fedbiomed.common.torchnn import TorchTrainingPlan
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

from monai.apps import download_and_extract
from monai.config import print_config
from monai.data import decollate_batch
from monai.metrics import ROCAUCMetric
from monai.networks.nets import DenseNet121
from monai.transforms import (
    Activations,
    AddChannel,
    AsDiscrete,
    Compose,
    LoadImage,
    RandFlip,
    RandRotate,
    RandZoom,
    ScaleIntensity,
    EnsureType,
)
from monai.utils import set_determinism


# Here we define the model to be used. 
# You can use any class name (here 'Net')
class MyTrainingPlan(TorchTrainingPlan):
    def __init__(self, kwargs):
        super(MyTrainingPlan, self).__init__()
        
        # Here we define the custom dependencies that will be needed by our custom Dataloader
        # In this case, we need the torch DataLoader classes
        # Since we will train on MNIST, we need datasets and transform from torchvision
        deps = ["import numpy as np",
                "import os",
                "from torch.utils.data import DataLoader",
                "from monai.apps import download_and_extract",
                "from monai.config import print_config",
                "from monai.data import decollate_batch",
                "from monai.metrics import ROCAUCMetric",
                "from monai.networks.nets import DenseNet121",
                "from monai.transforms import ( Activations, AddChannel, AsDiscrete, Compose, LoadImage, RandFlip, RandRotate, RandZoom, ScaleIntensity, EnsureType, )",
                "from monai.utils import set_determinism",]
        self.add_dependency(deps)
         
        self.num_class =  kwargs['num_class']  
        self.model = DenseNet121(spatial_dims=2, in_channels=1,
                    out_channels = self.num_class)
        
        self.loss_function = torch.nn.CrossEntropyLoss()

    def forward(self, x):
        return self.model(x)

    class MedNISTDataset(torch.utils.data.Dataset):
            def __init__(self, image_files, labels, transforms):
                self.image_files = image_files
                self.labels = labels
                self.transforms = transforms

            def __len__(self):
                return len(self.image_files)

            def __getitem__(self, index):
                return self.transforms(self.image_files[index]), self.labels[index]
    
    def parse_data(self, path):
        print(self.dataset_path)
        class_names = sorted(x for x in os.listdir(path)
                     if os.path.isdir(os.path.join(path, x)))
        num_class = len(class_names)
        image_files = [
                        [
                            os.path.join(path, class_names[i], x)
                            for x in os.listdir(os.path.join(path, class_names[i]))
                        ]
                        for i in range(num_class)
                      ]
        
        return image_files, num_class
    
    def training_data(self, batch_size = 48):
        self.image_files, num_class = self.parse_data(self.dataset_path)
        
        if self.num_class!=num_class:
                raise Exception('number of available classes does not match declared classes')
        
        num_each = [len(self.image_files[i]) for i in range(self.num_class)]
        image_files_list = []
        image_class = []
        
        for i in range(self.num_class):
            image_files_list.extend(self.image_files[i])
            image_class.extend([i] * num_each[i])
        num_total = len(image_class)
        
        
        length = len(image_files_list)
        indices = np.arange(length)
        np.random.shuffle(indices)

        val_split = int(1. * length) 
        train_indices = indices[:val_split]

        train_x = [image_files_list[i] for i in train_indices]
        train_y = [image_class[i] for i in train_indices]
#         val_x = [image_files_list[i] for i in val_indices]
#         val_y = [image_class[i] for i in val_indices]
#         test_x = [image_files_list[i] for i in test_indices]
#         test_y = [image_class[i] for i in test_indices]


        train_transforms = Compose(
            [
                LoadImage(image_only=True),
                AddChannel(),
                ScaleIntensity(),
                RandRotate(range_x=np.pi / 12, prob=0.5, keep_size=True),
                RandFlip(spatial_axis=0, prob=0.5),
                RandZoom(min_zoom=0.9, max_zoom=1.1, prob=0.5),
                EnsureType(),
            ]
        )

        val_transforms = Compose(
            [LoadImage(image_only=True), AddChannel(), ScaleIntensity(), EnsureType()])

        y_pred_trans = Compose([EnsureType(), Activations(softmax=True)])
        y_trans = Compose([EnsureType(), AsDiscrete(to_onehot=num_class)])

        print(
            f"Training count: {len(train_x)}")
        
        
        train_ds = self.MedNISTDataset(train_x, train_y, train_transforms)
        train_loader = torch.utils.data.DataLoader(
            train_ds, batch_size, shuffle=True)
        
        return train_loader
    
    def training_step(self, data, target):
        output = self.forward(data)
        loss   = self.loss_function(output, target)
        return loss


Writing /Users/mlorenzi/works/temp/fedbiomed/var/tmp/tmp2dunummj/class_export_mnist.py


This group of arguments correspond respectively:
* `model_args`: a dictionary with the arguments related to the model (e.g. number of layers, features, etc.). This will be passed to the model class on the client side.
* `training_args`: a dictionary containing the arguments for the training routine (e.g. batch size, learning rate, epochs, etc.). This will be passed to the routine on the client side.

**NOTE:** typos and/or lack of positional (required) arguments will raise error. 🤓

In [6]:
model_args = {'num_class':6,}

training_args = {
    'batch_size': 20, 
    'lr': 1e-5, 
    'epochs': 1, 
    'dry_run': False,  
    'batch_maxnum':250 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}





Define an experiment
- search nodes serving data for these `tags`, optionally filter on a list of client ID with `clients`
- run a round of local training on nodes with model defined in `model_path` + federation with `aggregator`
- run for `rounds` rounds, applying the `client_selection_strategy` between the rounds

In [7]:
from fedbiomed.researcher.experiment import Experiment
from fedbiomed.researcher.aggregators.fedavg import FedAverage

tags =  ['mednist']
rounds = 3

exp = Experiment(tags=tags,
                 #clients=None,
                 model_path=model_file,
                 model_args=model_args,
                 model_class='MyTrainingPlan',
                 training_args=training_args,
                 rounds=rounds,
                 aggregator=FedAverage(),
                 node_selection_strategy=None
                )

2021-12-10 17:20:12,076 fedbiomed INFO - Searching dataset with data tags: ['mednist'] for all nodes
2021-12-10 17:20:12,082 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / DEBUG - Message received: {'researcher_id': 'researcher_3f89ab24-0886-42c3-b625-d0ade13b2a44', 'tags': ['mednist'], 'command': 'search'}
2021-12-10 17:20:12,083 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / DEBUG - Message received: {'researcher_id': 'researcher_3f89ab24-0886-42c3-b625-d0ade13b2a44', 'tags': ['mednist'], 'command': 'search'}
2021-12-10 17:20:12,084 fedbiomed INFO - log from: node_9261632d-ca98-4d57-81a1-8c109560d8bd / DEBUG - Message received: {'researcher_id': 'researcher_3f89ab24-0886-42c3-b625-d0ade13b2a44', 'tags': ['mednist'], 'command': 'search'}
2021-12-10 17:20:22,082 fedbiomed INFO - Node selected for training -> node_5ef29a9f-9647-4c43-b45a-37a67ce9b237
2021-12-10 17:20:22,083 fedbiomed INFO - Node selected for training -> node_84ef4966-1dae-

Let's start the experiment.

By default, this function doesn't stop until all the `rounds` are done for all the clients

In [8]:
exp.run()

2021-12-10 17:20:24,710 fedbiomed INFO - Sampled nodes in round 0 ['node_5ef29a9f-9647-4c43-b45a-37a67ce9b237', 'node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a', 'node_9261632d-ca98-4d57-81a1-8c109560d8bd']
2021-12-10 17:20:24,711 fedbiomed INFO - Send message to node node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 - {'researcher_id': 'researcher_3f89ab24-0886-42c3-b625-d0ade13b2a44', 'job_id': 'c68e1cc2-3f28-4830-9b0a-35712ba3137f', 'training_args': {'batch_size': 20, 'lr': 1e-05, 'epochs': 1, 'dry_run': False, 'batch_maxnum': 250}, 'model_args': {'num_class': 6}, 'command': 'train', 'model_url': 'http://localhost:8844/media/uploads/2021/12/10/my_model_6e197884-6d27-4284-ae17-9c993e69e9c4.py', 'params_url': 'http://localhost:8844/media/uploads/2021/12/10/my_model_22bf4d36-e779-4d5b-b0a6-50d54898a1c7.pt', 'model_class': 'MyTrainingPlan', 'training_data': {'node_5ef29a9f-9647-4c43-b45a-37a67ce9b237': ['dataset_f07755c7-cde0-474d-8a87-7ae542957c4b']}}
2021-12-10 17:20:24,712 fedbiomed DEBUG - resea

2021-12-10 17:20:27,216 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / DEBUG - Dataset_path/Users/mlorenzi/works/temp/MedNIST/client_1


2021-12-10 17:24:21,695 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - mqtt+console ERROR message
2021-12-10 17:24:21,697 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - YYY-FIND_THIS_IN_TEMPFILE-XXX
2021-12-10 17:24:23,717 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - researcher_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:24:23,721 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - node_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:24:23,728 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - errnum: '<class 'bool'>' instead of '<enum 'ErrorNumbers'>'
2021-12-10 17:24:23,733 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - extra_msg: '<class 'list'>' instead of '<class 'str'>'
2021-12-10 17:24:23,738 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - extra_msg: '<class 'list'>' in

2021-12-10 17:24:23,839 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - command: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:24:23,841 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - count: '<class 'str'>' instead of '<class 'int'>'
2021-12-10 17:24:23,843 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - researcher_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:24:23,844 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - node_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:24:23,846 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - command: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:24:23,847 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b / ERROR - databases: '<class 'str'>' instead of '<class 'list'>'
2021-12-10 17:24:23,849 fedbiomed INFO - log from: 7ca15267-7673-45a7-98d2-03c70729b49b /

2021-12-10 17:25:08,936 fedbiomed INFO - log from: node_9261632d-ca98-4d57-81a1-8c109560d8bd / DEBUG - Reached 250 batches for this epoch, ignore remaining data
2021-12-10 17:25:09,314 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / DEBUG - Reached 250 batches for this epoch, ignore remaining data
2021-12-10 17:25:09,879 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / DEBUG - Reached 250 batches for this epoch, ignore remaining data
2021-12-10 17:25:13,706 fedbiomed INFO - log from: node_9261632d-ca98-4d57-81a1-8c109560d8bd / INFO - results uploaded successfully 
2021-12-10 17:25:14,247 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / INFO - results uploaded successfully 
2021-12-10 17:25:14,526 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / INFO - results uploaded successfully 
2021-12-10 17:25:19,880 fedbiomed INFO - Downloading model params after training on node_9261632d-ca98-4d57-81a1-8c109

2021-12-10 17:25:24,519 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / DEBUG - [TASKS QUEUE] Item:{'researcher_id': 'researcher_3f89ab24-0886-42c3-b625-d0ade13b2a44', 'job_id': 'c68e1cc2-3f28-4830-9b0a-35712ba3137f', 'params_url': 'http://localhost:8844/media/uploads/2021/12/10/researcher_params_b9bb68d0-208e-49dd-af08-1cc137b46dcd.pt', 'training_args': {'batch_size': 20, 'lr': 1e-05, 'epochs': 1, 'dry_run': False, 'batch_maxnum': 250}, 'training_data': {'node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a': ['dataset_b6fecfe7-8211-4319-b669-7a8abad28173']}, 'model_args': {'num_class': 6}, 'model_url': 'http://localhost:8844/media/uploads/2021/12/10/my_model_6e197884-6d27-4284-ae17-9c993e69e9c4.py', 'model_class': 'MyTrainingPlan', 'command': 'train'}
2021-12-10 17:25:26,285 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / INFO - {'monitor': <fedbiomed.node.history_monitor.HistoryMonitor object at 0x1373d68e0>, 'batch_size': 20, 'lr': 1e-05, 'epochs'

2021-12-10 17:25:52,408 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / ERROR - researcher_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:25:52,410 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / ERROR - job_id: '<class 'int'>' instead of '<class 'str'>'
2021-12-10 17:25:52,411 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / ERROR - model_url: '<class 'int'>' instead of '<class 'str'>'
2021-12-10 17:25:52,414 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / ERROR - command: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:25:52,415 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / ERROR - researcher_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:25:52,416 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / ERROR - node_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:25:52,418 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f

2021-12-10 17:25:53,466 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / INFO - Listing available datasets in all nodes... 
2021-12-10 17:25:53,471 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / INFO - Listing available datasets in all nodes... 
2021-12-10 17:25:53,472 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / INFO - Listing available datasets in all nodes... 
2021-12-10 17:25:53,499 fedbiomed INFO - log from: ea0b831b-3b5f-4e96-9169-a710619f8994 / DEBUG - torchnn saved model filename: ./tmp_model.py


2021-12-10 17:30:31,374 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - mqtt+console ERROR message
2021-12-10 17:30:31,375 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - YYY-FIND_THIS_IN_TEMPFILE-XXX
2021-12-10 17:30:33,389 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - researcher_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:30:33,392 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - node_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:30:33,396 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - errnum: '<class 'bool'>' instead of '<enum 'ErrorNumbers'>'
2021-12-10 17:30:33,404 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - extra_msg: '<class 'list'>' instead of '<class 'str'>'
2021-12-10 17:30:33,410 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - extra_msg: '<class 'list'>' in

2021-12-10 17:30:33,511 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - command: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:30:33,513 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - command: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:30:33,514 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - count: '<class 'str'>' instead of '<class 'int'>'
2021-12-10 17:30:33,515 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - researcher_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:30:33,517 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - node_id: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:30:33,519 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / ERROR - command: '<class 'bool'>' instead of '<class 'str'>'
2021-12-10 17:30:33,521 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / E

2021-12-10 17:30:37,120 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / INFO - Recreating hashing for : pytorch-usedcars.txt 	 model_f1df4dcb-aa72-4880-9fcd-3a00610d29a2
2021-12-10 17:30:37,156 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / INFO - Recreating hashing for : pytorch-celaba.txt 	 model_f07257a6-dad2-47a7-b7e1-267e4504e7be
2021-12-10 17:30:37,212 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / INFO - Recreating hashing for : sklearn-sgdregressor.txt 	 model_3ddca86c-f227-495d-82ff-fdd5abd9dd39
2021-12-10 17:30:37,223 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / INFO - Recreating hashing for : sklearn-perceptron.txt 	 model_da36a82f-6cd4-4f24-bfb3-d9d9e6869697
2021-12-10 17:30:37,233 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / INFO - Recreating hashing for : pytorch-csv.txt 	 model_d1f31f0f-1279-46e1-a667-52ca6b85fc43
2021-12-10 17:30:37,263 fedbiomed INFO - log from: 71e8ab41-7

2021-12-10 17:30:37,961 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / INFO - Recreating hashing for : sklearn-perceptron.txt 	 model_da36a82f-6cd4-4f24-bfb3-d9d9e6869697
2021-12-10 17:30:37,973 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / INFO - Recreating hashing for : pytorch-csv.txt 	 model_d1f31f0f-1279-46e1-a667-52ca6b85fc43
2021-12-10 17:30:38,004 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / INFO - Recreating hashing for : pytorch-mnist.txt 	 model_b6f7f930-305a-4834-bf97-99b3db42c733
2021-12-10 17:30:38,032 fedbiomed INFO - log from: 71e8ab41-7f86-4521-be43-fef6e0afe647 / INFO - {'name': 'variational-autoencoder.txt', 'description': 'Default model', 'hash': '18121f88d204c57dc88abcac58f999c49e355c15d65bf775f937101ba128e06e0572d8b910c1e6b35802df70def59b3b0c4e60d08542dfc51ef6c2f9a0a20aef', 'model_path': '/tmp/default_models/variational-autoencoder.txt', 'model_id': 'model_52aedf24-d922-4969-9215-fa412c709192', 'model_typ

2021-12-10 17:30:52,375 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / DEBUG - Reached 250 batches for this epoch, ignore remaining data
2021-12-10 17:30:52,821 fedbiomed INFO - log from: node_9261632d-ca98-4d57-81a1-8c109560d8bd / DEBUG - Reached 250 batches for this epoch, ignore remaining data
2021-12-10 17:30:53,446 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / DEBUG - Reached 250 batches for this epoch, ignore remaining data
2021-12-10 17:30:56,269 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / INFO - results uploaded successfully 
2021-12-10 17:30:57,060 fedbiomed INFO - log from: node_9261632d-ca98-4d57-81a1-8c109560d8bd / INFO - results uploaded successfully 
2021-12-10 17:30:57,715 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / INFO - results uploaded successfully 
2021-12-10 17:31:04,721 fedbiomed INFO - Downloading model params after training on node_5ef29a9f-9647-4c43-b45a-37a67

2021-12-10 17:31:09,318 fedbiomed INFO - log from: node_9261632d-ca98-4d57-81a1-8c109560d8bd / DEBUG - [TASKS QUEUE] Item:{'researcher_id': 'researcher_3f89ab24-0886-42c3-b625-d0ade13b2a44', 'job_id': 'c68e1cc2-3f28-4830-9b0a-35712ba3137f', 'params_url': 'http://localhost:8844/media/uploads/2021/12/10/researcher_params_3f26dfd2-f93d-4e32-94b8-2e6e039c07c3.pt', 'training_args': {'batch_size': 20, 'lr': 1e-05, 'epochs': 1, 'dry_run': False, 'batch_maxnum': 250}, 'training_data': {'node_9261632d-ca98-4d57-81a1-8c109560d8bd': ['dataset_132eb8e2-aa42-45db-b35e-cfc490157683']}, 'model_args': {'num_class': 6}, 'model_url': 'http://localhost:8844/media/uploads/2021/12/10/my_model_6e197884-6d27-4284-ae17-9c993e69e9c4.py', 'model_class': 'MyTrainingPlan', 'command': 'train'}
2021-12-10 17:31:11,445 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / INFO - {'monitor': <fedbiomed.node.history_monitor.HistoryMonitor object at 0x139f80310>, 'batch_size': 20, 'lr': 1e-05, 'epochs'

2021-12-10 17:39:30,732 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / DEBUG - Reached 250 batches for this epoch, ignore remaining data
2021-12-10 17:39:31,461 fedbiomed INFO - log from: node_9261632d-ca98-4d57-81a1-8c109560d8bd / DEBUG - Reached 250 batches for this epoch, ignore remaining data
2021-12-10 17:39:31,916 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / DEBUG - Reached 250 batches for this epoch, ignore remaining data
2021-12-10 17:39:35,060 fedbiomed INFO - log from: node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a / INFO - results uploaded successfully 
2021-12-10 17:39:36,026 fedbiomed INFO - log from: node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 / INFO - results uploaded successfully 
2021-12-10 17:39:36,163 fedbiomed INFO - log from: node_9261632d-ca98-4d57-81a1-8c109560d8bd / INFO - results uploaded successfully 
2021-12-10 17:39:42,099 fedbiomed INFO - Downloading model params after training on node_84ef4966-1dae-4d55-aff3-d2bf1

Local training results for each round and each node are available in `exp.training_replies` (index 0 to (`rounds` - 1) ).

For example you can view the training results for the last round below.

Different timings (in seconds) are reported for each dataset of a node participating in a round :
- `rtime_training` real time (clock time) spent in the training function on the node
- `ptime_training` process time (user and system CPU) spent in the training function on the node
- `rtime_total` real time (clock time) spent in the researcher between sending the request and handling the response, at the `Job()` layer

In [9]:
print("\nList the training rounds : ", exp.training_replies.keys())

print("\nList the clients for the last training round and their timings : ")
round_data = exp.training_replies[rounds - 1].data
for c in range(len(round_data)):
    print("\t- {id} :\
    \n\t\trtime_training={rtraining:.2f} seconds\
    \n\t\tptime_training={ptraining:.2f} seconds\
    \n\t\trtime_total={rtotal:.2f} seconds".format(id = round_data[c]['node_id'],
        rtraining = round_data[c]['timing']['rtime_training'],
        ptraining = round_data[c]['timing']['ptime_training'],
        rtotal = round_data[c]['timing']['rtime_total']))
print('\n')
    
exp.training_replies[rounds - 1].dataframe


List the training rounds :  dict_keys([0, 1, 2])

List the clients for the last training round and their timings : 
	- node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a :    
		rtime_training=326.65 seconds    
		ptime_training=417.12 seconds    
		rtime_total=340.24 seconds
	- node_5ef29a9f-9647-4c43-b45a-37a67ce9b237 :    
		rtime_training=327.90 seconds    
		ptime_training=418.85 seconds    
		rtime_total=341.09 seconds
	- node_9261632d-ca98-4d57-81a1-8c109560d8bd :    
		rtime_training=327.39 seconds    
		ptime_training=418.22 seconds    
		rtime_total=342.03 seconds




Unnamed: 0,success,msg,dataset_id,node_id,params_path,params,timing
0,True,,dataset_b6fecfe7-8211-4319-b669-7a8abad28173,node_84ef4966-1dae-4d55-aff3-d2bf17c3d68a,/Users/mlorenzi/works/temp/fedbiomed/var/tmp/m...,{'model.features.conv0.weight': [[tensor([[-0....,"{'rtime_training': 326.645386779, 'ptime_train..."
1,True,,dataset_f07755c7-cde0-474d-8a87-7ae542957c4b,node_5ef29a9f-9647-4c43-b45a-37a67ce9b237,/Users/mlorenzi/works/temp/fedbiomed/var/tmp/m...,{'model.features.conv0.weight': [[tensor([[-0....,"{'rtime_training': 327.9035163699999, 'ptime_t..."
2,True,,dataset_132eb8e2-aa42-45db-b35e-cfc490157683,node_9261632d-ca98-4d57-81a1-8c109560d8bd,/Users/mlorenzi/works/temp/fedbiomed/var/tmp/m...,{'model.features.conv0.weight': [[tensor([[-0....,"{'rtime_training': 327.39025077200006, 'ptime_..."


Federated parameters for each round are available in `exp.aggregated_params` (index 0 to (`rounds` - 1) ).

For example you can view the federated parameters for the last round of the experiment :

In [10]:
print("\nList the training rounds : ", exp.aggregated_params.keys())

print("\nAccess the federated params for the last training round :")
print("\t- params_path: ", exp.aggregated_params[rounds - 1]['params_path'])
print("\t- parameter data: ", exp.aggregated_params[rounds - 1]['params'].keys())



List the training rounds :  dict_keys([0, 1, 2])

Access the federated params for the last training round :
	- params_path:  /Users/mlorenzi/works/temp/fedbiomed/var/tmp/researcher_params_2273403c-8048-4b25-828a-0dded721e9d4.pt
	- parameter data:  odict_keys(['model.features.conv0.weight', 'model.features.norm0.weight', 'model.features.norm0.bias', 'model.features.norm0.running_mean', 'model.features.norm0.running_var', 'model.features.norm0.num_batches_tracked', 'model.features.denseblock1.denselayer1.layers.norm1.weight', 'model.features.denseblock1.denselayer1.layers.norm1.bias', 'model.features.denseblock1.denselayer1.layers.norm1.running_mean', 'model.features.denseblock1.denselayer1.layers.norm1.running_var', 'model.features.denseblock1.denselayer1.layers.norm1.num_batches_tracked', 'model.features.denseblock1.denselayer1.layers.conv1.weight', 'model.features.denseblock1.denselayer1.layers.norm2.weight', 'model.features.denseblock1.denselayer1.layers.norm2.bias', 'model.features

## Testing


In [11]:
import os
import shutil
import tempfile
import PIL
import torch
import numpy as np
from sklearn.metrics import classification_report

from monai.apps import download_and_extract
from monai.config import print_config
from monai.data import decollate_batch
from monai.metrics import ROCAUCMetric
from monai.networks.nets import DenseNet121
from monai.transforms import (
    Activations,
    AddChannel,
    AsDiscrete,
    Compose,
    LoadImage,
    RandFlip,
    RandRotate,
    RandZoom,
    ScaleIntensity,
    EnsureType,
)
from monai.utils import set_determinism

print_config()

data_dir = '/Users/mlorenzi/works/temp/MedNIST_testing'

MONAI version: 0.8.0
Numpy version: 1.19.1
Pytorch version: 1.10.0
MONAI flags: HAS_EXT = False, USE_COMPILED = False
MONAI rev id: 714d00dffe6653e21260160666c4c201ab66511b

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
Nibabel version: NOT INSTALLED or UNKNOWN VERSION.
scikit-image version: NOT INSTALLED or UNKNOWN VERSION.
Pillow version: 8.4.0
Tensorboard version: 2.7.0
gdown version: 4.2.0
TorchVision version: 0.11.1
tqdm version: 4.62.3
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: NOT INSTALLED or UNKNOWN VERSION.
pandas version: 1.3.4
einops version: NOT INSTALLED or UNKNOWN VERSION.
transformers version: NOT INSTALLED or UNKNOWN VERSION.
mlflow version: NOT INSTALLED or UNKNOWN VERSION.

For details about installing the optional dependencies, please visit:
    https://docs.monai.io/en/latest/installation.html#installing-the-recommended-dependencies



In [12]:
class_names = sorted(x for x in os.listdir(data_dir)
                     if os.path.isdir(os.path.join(data_dir, x)))
num_class = len(class_names)
image_files = [
    [
        os.path.join(data_dir, class_names[i], x)
        for x in os.listdir(os.path.join(data_dir, class_names[i]))
    ]
    for i in range(num_class)
]

num_each = [len(image_files[i]) for i in range(num_class)]
image_files_list = []

image_class = []
for i in range(num_class):
    image_files_list.extend(image_files[i])
    image_class.extend([i] * num_each[i])
num_total = len(image_class)
image_width, image_height = PIL.Image.open(image_files_list[0]).size

print(f"Total image count: {num_total}")
print(f"Image dimensions: {image_width} x {image_height}")
print(f"Label names: {class_names}")
print(f"Label counts: {num_each}")

Total image count: 6000
Image dimensions: 64 x 64
Label names: ['AbdomenCT', 'BreastMRI', 'CXR', 'ChestCT', 'Hand', 'HeadCT']
Label counts: [1000, 1000, 1000, 1000, 1000, 1000]


In [13]:
length = len(image_files_list)
indices = np.arange(length)
np.random.shuffle(indices)


test_split = int(0.1 * length)
test_indices = indices[:test_split]

test_x = [image_files_list[i] for i in test_indices]
test_y = [image_class[i] for i in test_indices]

val_transforms = Compose(
    [LoadImage(image_only=True), AddChannel(), ScaleIntensity(), EnsureType()])

y_pred_trans = Compose([EnsureType(), Activations(softmax=True)])
y_trans = Compose([EnsureType(), AsDiscrete(to_onehot=num_class)])

In [14]:
class MedNISTDataset(torch.utils.data.Dataset):
    def __init__(self, image_files, labels, transforms):
        self.image_files = image_files
        self.labels = labels
        self.transforms = transforms

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        return self.transforms(self.image_files[index]), self.labels[index]


test_ds = MedNISTDataset(test_x, test_y, val_transforms)
test_loader = torch.utils.data.DataLoader(
    test_ds, batch_size=300)

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DenseNet121(spatial_dims=2, in_channels=1,
                    out_channels=num_class).to(device)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), 1e-5)
max_epochs = 4
val_interval = 1
auc_metric = ROCAUCMetric()

In [16]:
model = exp.model_instance
model.load_state_dict(exp.aggregated_params[rounds - 1]['params'])

<All keys matched successfully>

In [17]:
y_true = []
y_pred = []
with torch.no_grad():
    for test_data in test_loader:
        test_images, test_labels = (
            test_data[0].to(device),
            test_data[1].to(device),
        )
        pred = model(test_images).argmax(dim=1)
        for i in range(len(pred)):
            y_true.append(test_labels[i].item())
            y_pred.append(pred[i].item())


In [18]:
print(classification_report(
    y_true, y_pred, target_names=class_names, digits=4))

              precision    recall  f1-score   support

   AbdomenCT     0.9914    0.9829    0.9871       117
   BreastMRI     0.9881    1.0000    0.9940        83
         CXR     1.0000    0.9890    0.9945        91
     ChestCT     0.9798    1.0000    0.9898        97
        Hand     0.9900    1.0000    0.9950        99
      HeadCT     1.0000    0.9823    0.9911       113

    accuracy                         0.9917       600
   macro avg     0.9915    0.9924    0.9919       600
weighted avg     0.9918    0.9917    0.9917       600

