# MedNIST dataset using DenseNet Transfer Learning¶

In [2]:
import torch
import torch.nn as nn
from fedbiomed.common.training_plans import TorchTrainingPlan
from fedbiomed.common.data import DataManager
from torchvision import datasets, transforms
from torchvision.models import densenet121
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset
from fedbiomed.researcher.experiment import Experiment
from fedbiomed.researcher.aggregators.fedavg import FedAverage

In [11]:
from fedbiomed.researcher.requests import Requests
req  = Requests()
req.list()

2024-01-24 11:16:26,736 fedbiomed DEBUG - Node: NODE_7842724a-cafa-49cc-862d-149288bbbb22 polling for the tasks


{'NODE_7842724a-cafa-49cc-862d-149288bbbb22': [{'name': 'Cancer image labels',
   'data_type': 'csv',
   'tags': ['cancer'],
   'description': 'This dataset keeps the img path for all classes of cancer images',
   'shape': [4600, 4],
   'dataset_id': 'dataset_eefc95a8-dd37-476c-90d0-0ee33bcee8c9',
   'dataset_parameters': None},
  {'name': '',
   'data_type': 'flamby',
   'tags': ['ixi'],
   'description': '',
   'shape': [249, 1, 48, 60, 48],
   'dataset_id': 'dataset_557acc1b-481a-4cfa-8b84-bc74f69f2ce5',
   'dataset_parameters': None,
   'dlp_id': 'dlp_29d946c8-e6e5-43a6-b4e0-a4089f7c8609'},
  {'name': 'MEDNIST',
   'data_type': 'mednist',
   'tags': ['#MEDNIST', '#dataset'],
   'description': 'MEDNIST dataset',
   'shape': [1000, 3, 64, 64],
   'dataset_id': 'dataset_7170504c-a818-441d-bab4-40dc5d13be65',
   'dataset_parameters': None}]}

## Change only classification's layer 

In [12]:
class MyTrainingPlan2(TorchTrainingPlan):

    def init_model(self, model_args):
        # Load the pre-trained DenseNet model
        model = models.densenet121(pretrained=True)
        #model = densenet121(pretrained=True)
        #model = models.densenet121(pretrained=True, weights= 'imagenet')
        # remove the classification layer 
        for param in model.features[:-1].parameters():
            param.requires_grad = False
        
        num_classes = model_args['num_classes'] # Change this to the number of classes in your dataset
        num_ftrs = model.classifier.in_features
        model.classifier= nn.Sequential(
            #nn.Linear(model[-1].in_features, 1024),
            nn.Linear(num_ftrs, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1024, num_classes)
        )
      
        return model

    def init_dependencies(self):
        return [
            "import pandas as pd",
            "from PIL import Image",
            "from torchvision import datasets, transforms, models",
            "from torch.utils.data import Dataset",
            "import torch.optim as optim",
            "from torchvision.models import densenet121"
        ]


    def init_optimizer(self, optimizer_args):        
        return optim.Adam(self.model().parameters(), lr=optimizer_args["lr"])

    # training data
    

    def training_data(self):
        # Custom torch Dataloader for MedNIST data
        print("dataset path",self.dataset_path)
        preprocess = transforms.Compose([
                transforms.Resize((224,224)),  
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomVerticalFlip(p=0.5),
                transforms.RandomRotation(30),
                transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
                transforms.ToTensor(),
                transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
           ])
    
        train_data = datasets.ImageFolder(self.dataset_path,transform = preprocess)
        train_kwargs = { 'shuffle': True}
        return DataManager(dataset=train_data, **train_kwargs)

    def training_step(self, data, target):
        output = self.model().forward(data)
        loss_func = nn.CrossEntropyLoss()
        loss   = loss_func(output, target)
        return loss




In [13]:
training_args = {
    'loader_args': { 'batch_size': 32, }, 
    'optimizer_args': {'lr': 1e-4}, 
    'epochs': 1, 
    'dry_run': False,  
    'batch_maxnum': 100 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}

model_args = {
    'num_classes': 6
}

In [14]:


tags =  ['#MEDNIST', '#dataset']
rounds = 1

exp = Experiment(tags=tags,
                 training_plan_class=MyTrainingPlan2,
                 model_args=model_args,
                 training_args=training_args,
                 round_limit=rounds,
                 aggregator=FedAverage())

from fedbiomed.common.metrics import MetricTypes
exp.set_test_ratio(.1)
exp.set_test_on_local_updates(True)
exp.set_test_metric(MetricTypes.ACCURACY)

exp.set_tensorboard(True)


2024-01-24 11:16:34,486 fedbiomed DEBUG - Node: NODE_7842724a-cafa-49cc-862d-149288bbbb22 polling for the tasks
2024-01-24 11:16:34,487 fedbiomed INFO - Node selected for training -> NODE_7842724a-cafa-49cc-862d-149288bbbb22
2024-01-24 11:16:34,489 fedbiomed DEBUG - Model file has been saved: /home/ebirgy/development/fedbiomed_github/fedbiomed/var/experiments/Experiment_0034/model_984c29ac-5d12-4b90-8b4b-95193e98d0a2.py
2024-01-24 11:16:34,665 fedbiomed DEBUG - using native torch optimizer
2024-01-24 11:16:34,668 fedbiomed DEBUG - Experimentation training_args updated for `job`
2024-01-24 11:16:34,668 fedbiomed DEBUG - Experimentation training_args updated for `job`
2024-01-24 11:16:34,669 fedbiomed DEBUG - Experimentation training_args updated for `job`


True

In [28]:
from fedbiomed.researcher.environ import environ
tensorboard_dir = environ['TENSORBOARD_RESULTS_DIR']

In [None]:
exp.run()

2024-01-24 11:16:44,512 fedbiomed INFO - Sampled nodes in round 0 ['NODE_7842724a-cafa-49cc-862d-149288bbbb22']
2024-01-24 11:16:44,520 fedbiomed INFO - [1mSending request[0m 
					[1m To[0m: NODE_7842724a-cafa-49cc-862d-149288bbbb22 
					[1m Request: [0m: TRAIN
 -----------------------------------------------------------------
2024-01-24 11:16:44,793 fedbiomed DEBUG - Node: NODE_7842724a-cafa-49cc-862d-149288bbbb22 polling for the tasks
2024-01-24 11:16:46,915 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_7842724a-cafa-49cc-862d-149288bbbb22 
					 Round 1 Epoch: 1 | Iteration: 1/100 (1%) | Samples: 32/3200
 					 Loss: [1m1.911242[0m 
					 ---------
2024-01-24 11:17:07,610 fedbiomed INFO - [1mTRAINING[0m 
					 NODE_ID: NODE_7842724a-cafa-49cc-862d-149288bbbb22 
					 Round 1 Epoch: 1 | Iteration: 10/100 (10%) | Samples: 320/3200
 					 Loss: [1m1.597413[0m 
					 ---------
2024-01-24 11:17:23,828 fedbiomed DEBUG - Node: NODE_7842724a-cafa-49cc-862d-149288bbbb2

In [None]:
## idem avec entrainement des derniers layers 

## Training of last model's layers

In [None]:
class MyTrainingPlan3(TorchTrainingPlan):

    def init_model(self, model_args):
        model = models.densenet121(pretrained=True)
        # For example, let's freeze all layers up to the second-to-last dense block
        for param in model.features[:-3].parameters():
            param.requires_grad = False

        # Modify the classifier to match the number of classes in your dataset
        num_ftrs = model.classifier.in_features
        num_classes = model_args['num_classes'] 
        model.classifier = nn.Sequential(
            nn.Linear(num_ftrs, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(128, num_classes)
           
            )
        
        return model

    def init_dependencies(self):
        return [
            "import pandas as pd",
            "from PIL import Image",
            "from torchvision import datasets, transforms, models",
            "from torch.utils.data import Dataset",
            "import torch.optim as optim"
        ]


    def init_optimizer(self, optimizer_args):        
        return optim.Adam(self.model().parameters(), lr=optimizer_args["lr"])

    # training data
    class CustomDatasetFedBioMed(Dataset):
        def __init__(self, dataset_path: str, transform=None):
            self.dataframe = pd.read_csv(dataset_path, index_col=0)
            
            self.transform = transforms.Compose([
                transforms.Resize((224,224)),  
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomVerticalFlip(p=0.5),
                transforms.RandomRotation(30),
                transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
                transforms.ToTensor(),
                transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
           ])
    
        def __len__(self):
            return len(self.dataframe)
    
        def __getitem__(self, idx):
            
            img_path = self.dataframe.iloc[idx, 0]  # the file path is in the first column
            print(img_path)
            image = Image.open(img_path).convert("RGB")
    
            label = self.dataframe.iloc[idx, 1]  # the class label is in the second column
    
            if self.transform:
                image = self.transform(image)
    
            label = torch.tensor(label, dtype=torch.long)  # if it returns a tuple instead of a tensor
    
            return image, label 

    def training_data(self):
        dataset = self.CustomDatasetFedBioMed(self.dataset_path)
        return DataManager(dataset)

    def training_step(self, data, target):
        output = self.model().forward(data)
        loss_func = nn.CrossEntropyLoss()
        loss   = loss_func(output, target)
        return loss




In [None]:
training_args = {
    'loader_args': { 'batch_size': 32, }, 
    'optimizer_args': {'lr': 1e-4}, 
    'epochs': 1, 
    'dry_run': False,  
    'batch_maxnum': 100 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}
model_args={
    'num_classes': 6
}
tags =  ["'med'"]
rounds = 1

exp = Experiment(tags=tags,
                 training_plan_class=MyTrainingPlan2,
                 model_args=model_args,
                 training_args=training_args,
                 round_limit=rounds,
                 aggregator=FedAverage())

from fedbiomed.common.metrics import MetricTypes
#exp.set_test_ratio(.1)
exp.set_test_on_local_updates(True)
exp.set_test_metric(MetricTypes.ACCURACY)

exp.set_tensorboard(True)
    

In [None]:
exp.run()

In [None]:
from fedbiomed.researcher.environ import environ
tensorboard_dir = environ['TENSORBOARD_RESULTS_DIR']

In [None]:
#save model, results 
exp.training_plan().export_model('./training_plan2_densenet')

In [None]:
print("\nList the training rounds : ", exp.training_replies().keys())

print("\nList the nodes for the last training round and their timings : ")
round_data = exp.training_replies()[rounds - 1]
for r in round_data.values():
    print("\t- {id} :\
    \n\t\trtime_training={rtraining:.2f} seconds\
    \n\t\tptime_training={ptraining:.2f} seconds\
    \n\t\trtime_total={rtotal:.2f} seconds".format(id = r['node_id'],
        rtraining = r['timing']['rtime_training'],
        ptraining = r['timing']['ptime_training'],
        rtotal = r['timing']['rtime_total']))
print('\n')
