MedNIST dataset using DenseNet Transfer Learning¶

In [1]:
import torch
import torch.nn as nn
from fedbiomed.common.training_plans import TorchTrainingPlan
from fedbiomed.common.data import DataManager
from torchvision import datasets, transforms
from torchvision.models import densenet121

from torch.utils.data import Dataset
from fedbiomed.researcher.experiment import Experiment
from fedbiomed.researcher.aggregators.fedavg import FedAverage

In [3]:
from fedbiomed.researcher.requests import Requests
req  = Requests()
req.list()

2024-01-23 14:53:09,347 fedbiomed INFO - Starting researcher service...
2024-01-23 14:53:09,347 fedbiomed INFO - Waiting 3s for nodes to connect...
E0123 14:53:09.349416840  233303 chttp2_server.cc:1063]                UNKNOWN:No address added out of total 2 resolved for 'localhost:50051' {created_time:"2024-01-23T14:53:09.349372113+01:00", children:[UNKNOWN:Unable to configure socket {created_time:"2024-01-23T14:53:09.349332838+01:00", fd:70, children:[UNKNOWN:Address already in use {syscall:"bind", os_error:"Address already in use", errno:98, created_time:"2024-01-23T14:53:09.349313499+01:00"}]}, UNKNOWN:Unable to configure socket {created_time:"2024-01-23T14:53:09.349365522+01:00", fd:70, children:[UNKNOWN:Address already in use {syscall:"bind", os_error:"Address already in use", errno:98, created_time:"2024-01-23T14:53:09.349361896+01:00"}]}]}
2024-01-23 14:53:09,349 fedbiomed ERROR - Researcher gRPC server has stopped. Please try to restart: Failed to bind to address localhost:500

FedbiomedCommunicationError: ErrorNumbers.FB628: Communication client is not initialized.

## Change only classification's layer

In [None]:
class MyTrainingPlan2(TorchTrainingPlan):

    def init_model(self, model_args):
        # Define your custom classification layer
        class CustomClassifier(nn.Module):
            def __init__(self, in_features, num_classes):
                super(CustomClassifier, self).__init__()
                self.fc = nn.Linear(in_features, num_classes)
        
            def forward(self, x):
                x = self.fc(x)
                 # add a softmax last layer 
                x = F.softmax(x, dim=1)
                return x
        
        # Load the pre-trained DenseNet model
        model = models.densenet121(pretrained=True)
        
        # Modify the classifier layer
        in_features = model.classifier.in_features
        num_classes = model_args['num_classes']  # Change this to the number of classes in your dataset
        custom_classifier = CustomClassifier(in_features, num_classes)
        
        # Replace the classifier
        model.classifier = custom_classifier
        
        return model

    def init_dependencies(self):
        return [
            "import pandas as pd",
            "from PIL import Image",
            "from torchvision import datasets, transforms, models",
            "from torch.utils.data import Dataset",
            "import torch.optim as optim",
            "from torchvision.models import densenet121"
        ]


    def init_optimizer(self, optimizer_args):        
        return optim.Adam(self.model().parameters(), lr=optimizer_args["lr"])

    # training data
    class CustomDatasetFedBioMed(Dataset):
        def __init__(self, transform=None):
            #self.dataframe = pd.read_csv(dataset_path, index_col=0)
            
            self.transform = transforms.Compose([
                transforms.Resize((224,224)),  
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomVerticalFlip(p=0.5),
                transforms.RandomRotation(30),
                transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
                transforms.ToTensor(),
                transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
           ])
    

    def training_data(self):
        # Custom torch Dataloader for MedNIST data
        print("dataset path",self.dataset_path)
        preprocess = transforms.Compose([
                transforms.Resize((224,224)),  
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomVerticalFlip(p=0.5),
                transforms.RandomRotation(30),
                transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
                transforms.ToTensor(),
                transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
           ])
    
        train_data = datasets.ImageFolder(self.dataset_path,transform = preprocess)
        train_kwargs = { 'shuffle': True}
        return DataManager(dataset=train_data, **train_kwargs)

    def training_step(self, data, target):
        output = self.model().forward(data)
        loss_func = self.CrossEntropyLoss()
        loss   = loss_func(output, target)
        return loss




In [None]:
training_args = {
    'loader_args': { 'batch_size': 32, }, 
    'optimizer_args': {'lr': 1e-4}, 
    'epochs': 1, 
    'dry_run': False,  
    'batch_maxnum': 100 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}

model_args = {
    'num_classes': 6
}

In [None]:


tags =  ["'med'"]
rounds = 1

exp = Experiment(tags=tags,
                 training_plan_class=MyTrainingPlan2,
                 model_args=model_args,
                 training_args=training_args,
                 round_limit=rounds,
                 aggregator=FedAverage())

from fedbiomed.common.metrics import MetricTypes
#exp.set_test_ratio(.1)
exp.set_test_on_local_updates(True)
exp.set_test_metric(MetricTypes.ACCURACY)

exp.set_tensorboard(True)


In [None]:
from fedbiomed.researcher.environ import environ
tensorboard_dir = environ['TENSORBOARD_RESULTS_DIR']

In [None]:
exp.run()

In [None]:
#save model, results 
exp.training_plan().export_model('./training_plan2_densenet')

In [None]:
## idem avec entrainement des derniers layers 

## Training of last model's layers

In [None]:
class MyTrainingPlan3(TorchTrainingPlan):

    def init_model(self, model_args):
        model = models.densenet121(pretrained=True)
        # For example, let's freeze all layers up to the second-to-last dense block
        for param in model.features[:-2].parameters():
            param.requires_grad = False

        # Modify the classifier to match the number of classes in your dataset
        num_ftrs = model.classifier.in_features
        num_classes = model_args['num_classes'] 
        model.classifier = nn.Sequential(
            nn.Linear(num_ftrs, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, num_classes)
            # add softmax ....
            )
        
        return model

    def init_dependencies(self):
        return [
            "import pandas as pd",
            "from PIL import Image",
            "from torchvision import datasets, transforms, models",
            "from torch.utils.data import Dataset",
            "import torch.optim as optim"
        ]


    def init_optimizer(self, optimizer_args):        
        return optim.Adam(self.model().parameters(), lr=optimizer_args["lr"])

    # training data
    class CustomDatasetFedBioMed(Dataset):
        def __init__(self, dataset_path: str, transform=None):
            self.dataframe = pd.read_csv(dataset_path, index_col=0)
            
            self.transform = transforms.Compose([
                transforms.Resize((224,224)),  
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomVerticalFlip(p=0.5),
                transforms.RandomRotation(30),
                transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
                transforms.ToTensor(),
                transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])
           ])
    
        def __len__(self):
            return len(self.dataframe)
    
        def __getitem__(self, idx):
            
            img_path = self.dataframe.iloc[idx, 0]  # the file path is in the first column
            print(img_path)
            image = Image.open(img_path).convert("RGB")
    
            label = self.dataframe.iloc[idx, 1]  # the class label is in the second column
    
            if self.transform:
                image = self.transform(image)
    
            label = torch.tensor(label, dtype=torch.long)  # if it returns a tuple instead of a tensor
    
            return image, label 

    def training_data(self):
        dataset = self.CustomDatasetFedBioMed(self.dataset_path)
        return DataManager(dataset)

    def training_step(self, data, target):
        output = self.model().forward(data)
        loss_func = nn.CrossEntropyLoss()
        loss   = loss_func(output, target)
        return loss




In [None]:
training_args = {
    'loader_args': { 'batch_size': 32, }, 
    'optimizer_args': {'lr': 1e-4}, 
    'epochs': 1, 
    'dry_run': False,  
    'batch_maxnum': 100 # Fast pass for development : only use ( batch_maxnum * batch_size ) samples
}
model_args={
    'num_classes': 6
}
tags =  ["'med'"]
rounds = 1

exp = Experiment(tags=tags,
                 training_plan_class=MyTrainingPlan2,
                 model_args=model_args,
                 training_args=training_args,
                 round_limit=rounds,
                 aggregator=FedAverage())

from fedbiomed.common.metrics import MetricTypes
#exp.set_test_ratio(.1)
exp.set_test_on_local_updates(True)
exp.set_test_metric(MetricTypes.ACCURACY)

exp.set_tensorboard(True)
    

In [None]:
exp.run()

In [None]:
from fedbiomed.researcher.environ import environ
tensorboard_dir = environ['TENSORBOARD_RESULTS_DIR']