In [None]:
# !pip install --force-reinstall torch torchvision pytorch-lightning nni medmnist

In [1]:
import nni
import torch
from torchvision import transforms

# We'll be adding a few data augmentations to the training loop
train_transformations = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),  
    transforms.ColorJitter(brightness=0.2), 
    transforms.ToTensor(),
    transforms.Normalize(0.5, 0.5), # We're training on GrayScale images so we'll standardise to 0.5
])


val_transformations = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(.5, .5),
])


In [2]:
from torch.utils.data import Dataset

# MedMNIST datasets outputs label of [batch_size, num_classes] shape, so we reduce 1 dimension for the CrossEntropy Loss
class CorrectDimensions(Dataset):
    def __init__(self, dataset, **kwargs):
        self.data = dataset(**kwargs)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]
        return image, label.squeeze()

In [None]:
from medmnist import PneumoniaMNIST, BreastMNIST

# Wrap MedMNIST datasets with our dataset class
pneu_train_data = CorrectDimensions(PneumoniaMNIST, split='train', transform=train_transformations, download=True)
pneu_val_data = CorrectDimensions(PneumoniaMNIST, split='val', transform=val_transformations, download=True)
pneu_test_data = CorrectDimensions(PneumoniaMNIST, split='test', transform=val_transformations, download=True)

breast_train_data = CorrectDimensions(BreastMNIST, split='train', transform=train_transformations, download=True)
breast_val_data = CorrectDimensions(BreastMNIST, split='val', transform=val_transformations, download=True)
breast_test_data = CorrectDimensions(BreastMNIST, split='test', transform=val_transformations, download=True)

In [4]:
import torch.nn as nn
import torch.nn.functional as F
from nni.nas.nn.pytorch import LayerChoice, ModelSpace, MutableLinear

# This will be our most basic building block with just 2 convolutions and 1 pooling layer
class SimpleConvolutionBlock(nn.Module):
    def __init__(self, input_channels):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, 16, 3, 1, 1) # 28x28
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 14x14
        self.conv2 = nn.Conv2d(16, 32, 3, 1, 1) # 32x14x14

    def forward(self, x):
        x = F.relu(self.conv1(x))
        output = self.conv2(self.pool(x))
        return output
    

# We'll compare it with similar layer but a larger kernel for the first convolution layer
# We'll be using padding of 2 to keep the same dimensions as DART expects them to be equal
class BiggerKernelBlock(nn.Module):
    def __init__(self, input_channels):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, 16, 5, 1, 2) # 28x28
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 14x14
        self.conv2 = nn.Conv2d(16, 32, 3, 1, 1) # 32x14x14

    def forward(self, x):
        x = F.relu(self.conv1(x))
        output = self.conv2(self.pool(x))
        return output

# Similarly to Dart model space we'll also test depths of 3 and 4 for our convolutions
class Depth3ConvolutionBlock(nn.Module):
    def __init__(self, input_channels):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, 16, 3, 1, 1) # 28x28
        self.conv2 = nn.Conv2d(16, 32, 3, 1, 1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 14x14
        self.conv3 = nn.Conv2d(32, 32, 3, 1, 1) # 32x14x14

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        output = self.conv3(self.pool(x))
        return output
    
# Following a standard practise we'll be increased the width of the network as the depth of the network increases
# We'll reduce the width of the last convolution to match that of fully connected layers proceeding the block
class Depth4ConvolutionBlock(nn.Module):
    def __init__(self, input_channels):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, 16, 3, 1, 1) # 28x28
        self.conv2 = nn.Conv2d(16, 32, 3, 1, 1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) # 14x14
        self.conv3 = nn.Conv2d(32, 64, 3, 1, 1) # 14x14
        self.conv4 = nn.Conv2d(64, 32, 3, 1, 1) # 32x14x14

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        output = self.conv4(x)
        return output



class CNNModelSpace(ModelSpace):
    def __init__(self, input_channels = 1,  num_classes = 2):
        super().__init__()
        
        self.convolution_block = LayerChoice([
            SimpleConvolutionBlock(input_channels=input_channels),
            BiggerKernelBlock(input_channels=input_channels),
            Depth3ConvolutionBlock(input_channels=input_channels),
            Depth4ConvolutionBlock(input_channels=input_channels)
        ], label='convolution_block')

        # Additionally we'll also test different widths of the fully connected layers
        # Due to DartStrategy being one-shot we won't be using DropOut, instead we'll apply weight decay as regularization technique
        feature = nni.choice('feature', [64, 128])
        self.fc1 = MutableLinear(32*14*14, feature)
        self.fc2 = MutableLinear(feature, num_classes)

    def forward(self, x):
        x = self.convolution_block(x)
        x = torch.flatten(x, 1)
        output = self.fc2(F.relu(self.fc1(x)))
        return output # pl.Classification expects the output to be logits (not probabilties) so we won't be applying a Sigmoid
    

model_space = CNNModelSpace()
model_space

CNNModelSpace(
  (convolution_block): LayerChoice(
    label='convolution_block'
    (0): SimpleConvolutionBlock(
      (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (1): BiggerKernelBlock(
      (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (2): Depth3ConvolutionBlock(
      (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (conv3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1)

# PneumoniaMNIST Experiment

In [5]:
from nni.nas.evaluator import FunctionalEvaluator
from nni.nas.strategy import DARTS as DartsStrategy
import nni.nas.strategy as strategy
import nni.nas.evaluator.pytorch.lightning as pl

# Initiate our model space to search through
pneu_model_space = CNNModelSpace()

# Initiate our search strategy
pneu_search_strategy = DartsStrategy()

# Initiate our evaluator
pneu_evaluator = pl.Classification(
  num_classes=2,
  learning_rate=1e-3,
  weight_decay=1e-4,
  train_dataloaders=pl.DataLoader(pneu_train_data, batch_size=32),
  val_dataloaders=pl.DataLoader(pneu_val_data, batch_size=32),
  max_epochs=10,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [6]:
from nni.nas.experiment import NasExperiment
pneu_exp = NasExperiment(pneu_model_space, pneu_evaluator, pneu_search_strategy)

pneu_exp.config.max_trial_number = 3   # for reasons of limited computing power we won't be doing more than 3 trials
pneu_exp.config.trial_concurrency = 1  # will run 1 trial concurrently
pneu_exp.config.trial_gpu_number = 0   # will not use GPU

[2024-03-06 11:43:56] [32mConfig is not provided. Will try to infer.[0m
[2024-03-06 11:43:56] [32mStrategy is found to be a one-shot strategy. Setting execution engine to "sequential" and format to "raw".[0m


In [7]:
pneu_exp.run(port = 8001)

[2024-03-06 11:44:00] [32mCheckpoint saved to /home/azureuser/nni-experiments/ap2ybetj/checkpoint.[0m
[2024-03-06 11:44:00] [32mExperiment initialized successfully. Starting exploration strategy...[0m


2024-03-06 11:44:06.928185: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-06 11:44:11.290363: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

  | Name            | Type                 | Params
---------------------------------------------------------
0 | training_module | ClassificationModule | 868 K 
---------------------------------------------------------
868 K     Trainable params
0         Non-trainable params
868 K     Total params
3.475     Total estimated model params size (MB)


Training: |                                                                                                   …

  img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
`Trainer.fit` stopped: `max_epochs=10` reached.


[2024-03-06 11:48:35] [32mWaiting for models submitted to engine to finish...[0m
[2024-03-06 11:48:35] [32mExperiment is completed.[0m


True

## Training the best model for PneumoniaMNIST

In [8]:
pneu_model_dict = pneu_exp.export_top_models(formatter='dict')[0]

pneu_best_model = CNNModelSpace().freeze(pneu_model_dict)

print(f'Best model for Pneumonia dataset:\n {pneu_best_model}')

Best model for Pneumonia dataset:
 CNNModelSpace(
  (convolution_block): SimpleConvolutionBlock(
    (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (fc1): Linear(in_features=6272, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)


In [10]:
pneu_evaluator = pl.Classification(
  num_classes=2,
  learning_rate=1e-3,
  weight_decay=1e-4,
  train_dataloaders=pl.DataLoader(pneu_train_data, batch_size=32),
  val_dataloaders=pl.DataLoader(pneu_val_data, batch_size=32),
  max_epochs=10,
)

pneu_evaluator.fit(pneu_best_model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | metrics   | ModuleDict       | 0     
2 | _model    | CNNModelSpace    | 808 K 
-----------------------------------------------
808 K     Trainable params
0         Non-trainable params
808 K     Total params
3.232     Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

Training: |                                                                                                   …

Validation: |                                                                                                 …

[2024-03-06 11:49:39] [32mIntermediate result: 0.9541984796524048  (Index 0)[0m


Validation: |                                                                                                 …

[2024-03-06 11:49:44] [32mIntermediate result: 0.9580152630805969  (Index 1)[0m


Validation: |                                                                                                 …

[2024-03-06 11:49:53] [32mIntermediate result: 0.9618320465087891  (Index 2)[0m


Validation: |                                                                                                 …

[2024-03-06 11:50:01] [32mIntermediate result: 0.9618320465087891  (Index 3)[0m


Validation: |                                                                                                 …

[2024-03-06 11:50:07] [32mIntermediate result: 0.9580152630805969  (Index 4)[0m


Validation: |                                                                                                 …

[2024-03-06 11:50:13] [32mIntermediate result: 0.9599236845970154  (Index 5)[0m


Validation: |                                                                                                 …

[2024-03-06 11:50:20] [32mIntermediate result: 0.9599236845970154  (Index 6)[0m


Validation: |                                                                                                 …

[2024-03-06 11:50:26] [32mIntermediate result: 0.9618320465087891  (Index 7)[0m


Validation: |                                                                                                 …

[2024-03-06 11:50:31] [32mIntermediate result: 0.9580152630805969  (Index 8)[0m


Validation: |                                                                                                 …

[2024-03-06 11:50:37] [32mIntermediate result: 0.9751908183097839  (Index 9)[0m


`Trainer.fit` stopped: `max_epochs=10` reached.


[2024-03-06 11:50:38] [32mFinal result: 0.9751908183097839[0m


# BreastMNIST Experiment

In [11]:
# Initiate our model space to search through
breast_model_space = CNNModelSpace()

# Initiate our search strategy
breast_search_strategy = DartsStrategy()

# Initiate our evaluator
breast_evaluator = pl.Classification(
  num_classes=2,
  learning_rate=1e-3,
  weight_decay=1e-5,
  train_dataloaders=pl.DataLoader(breast_train_data, batch_size=32, shuffle=True),
  val_dataloaders=pl.DataLoader(breast_val_data, batch_size=32, shuffle=True),
  max_epochs=10,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [12]:
breast_exp = NasExperiment(breast_model_space, breast_evaluator, breast_search_strategy)

breast_exp.config.max_trial_number = 3   # spawn 3 trials at most
breast_exp.config.trial_concurrency = 1  # will run 1 trial concurrently
breast_exp.config.trial_gpu_number = 0   # will not use GPU

[2024-03-06 11:50:51] [32mConfig is not provided. Will try to infer.[0m
[2024-03-06 11:50:51] [32mStrategy is found to be a one-shot strategy. Setting execution engine to "sequential" and format to "raw".[0m


In [13]:
breast_exp.run(port = 8001)

[2024-03-06 11:50:53] [32mCheckpoint saved to /home/azureuser/nni-experiments/pvnwa492/checkpoint.[0m
[2024-03-06 11:50:53] [32mExperiment initialized successfully. Starting exploration strategy...[0m



  | Name            | Type                 | Params
---------------------------------------------------------
0 | training_module | ClassificationModule | 868 K 
---------------------------------------------------------
868 K     Trainable params
0         Non-trainable params
868 K     Total params
3.475     Total estimated model params size (MB)
/anaconda/envs/azureml_py38/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (18) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

`Trainer.fit` stopped: `max_epochs=10` reached.


[2024-03-06 11:51:21] [32mWaiting for models submitted to engine to finish...[0m
[2024-03-06 11:51:21] [32mExperiment is completed.[0m


True

In [14]:
breast_model_dict = breast_exp.export_top_models(formatter='dict')[0]

breast_best_model = CNNModelSpace().freeze(breast_model_dict)

print(f'Best model for Breast dataset:\n {breast_best_model}')

Best model for Breast dataset:
 CNNModelSpace(
  (convolution_block): SimpleConvolutionBlock(
    (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (fc1): Linear(in_features=6272, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)


In [15]:
breast_evaluator = pl.Classification(
  num_classes=2,
  learning_rate=1e-3,
  weight_decay=1e-5,
  train_dataloaders=pl.DataLoader(breast_train_data, batch_size=32, shuffle=True),
  val_dataloaders=pl.DataLoader(breast_val_data, batch_size=32, shuffle=True),
  max_epochs=10,
)

breast_evaluator.fit(breast_best_model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | metrics   | ModuleDict       | 0     
2 | _model    | CNNModelSpace    | 808 K 
-----------------------------------------------
808 K     Trainable params
0         Non-trainable params
808 K     Total params
3.232     Total estimated model params size (MB)


Sanity Checking: |                                                                                            …

/anaconda/envs/azureml_py38/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


Training: |                                                                                                   …

Validation: |                                                                                                 …

[2024-03-06 11:51:54] [32mIntermediate result: 0.7307692170143127  (Index 10)[0m


Validation: |                                                                                                 …

[2024-03-06 11:51:55] [32mIntermediate result: 0.8205128312110901  (Index 11)[0m


Validation: |                                                                                                 …

[2024-03-06 11:51:57] [32mIntermediate result: 0.7564102411270142  (Index 12)[0m


Validation: |                                                                                                 …

[2024-03-06 11:51:58] [32mIntermediate result: 0.7692307829856873  (Index 13)[0m


Validation: |                                                                                                 …

[2024-03-06 11:51:59] [32mIntermediate result: 0.8205128312110901  (Index 14)[0m


Validation: |                                                                                                 …

[2024-03-06 11:52:00] [32mIntermediate result: 0.807692289352417  (Index 15)[0m


Validation: |                                                                                                 …

[2024-03-06 11:52:01] [32mIntermediate result: 0.807692289352417  (Index 16)[0m


Validation: |                                                                                                 …

[2024-03-06 11:52:02] [32mIntermediate result: 0.7435897588729858  (Index 17)[0m


Validation: |                                                                                                 …

[2024-03-06 11:52:03] [32mIntermediate result: 0.807692289352417  (Index 18)[0m


Validation: |                                                                                                 …

[2024-03-06 11:52:04] [32mIntermediate result: 0.8589743375778198  (Index 19)[0m


`Trainer.fit` stopped: `max_epochs=10` reached.


[2024-03-06 11:52:04] [32mFinal result: 0.8589743375778198[0m


# Analysing Results

In [16]:
pneu_test_dataloader = torch.utils.data.DataLoader(pneu_test_data, batch_size = len(pneu_test_data), shuffle = False)
breast_test_dataloader = torch.utils.data.DataLoader(breast_test_data, batch_size = len(breast_test_data), shuffle = False)

In [17]:
from sklearn.metrics import roc_auc_score, accuracy_score
import numpy as np

def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_targets = []
    all_probabilities = []
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs
            labels = labels
            outputs = model(inputs)

            predictions = torch.argmax(outputs, dim=1).numpy() # Converting probabilities to 1's and 0's
            probs = torch.sigmoid(outputs)[:, 1].numpy() # Convert logits to probabilities
            
            all_probabilities.extend(probs)
            all_preds.extend(predictions)
            all_targets.extend(labels.numpy())
    
    
    # Calculate metrics
    accuracy = accuracy_score(all_targets, all_preds)
    auc_roc = roc_auc_score(all_targets, all_probabilities)  # Use probabilities of the positive class
    
    return accuracy, auc_roc



In [18]:
# Evaluate Pneumonia model on the test set
pneu_accuracy, pneu_auc_roc = evaluate_model(pneu_best_model, pneu_test_dataloader)
print(f"Pneumonia Model - Test Accuracy: {pneu_accuracy:.4f}, Test AUC-ROC: {pneu_auc_roc:.4f}")

# Evaluate Breast Cancer model on the test set
breast_accuracy, breast_auc_roc = evaluate_model(breast_best_model, breast_test_dataloader)
print(f"Breast Cancer Model - Test Accuracy: {breast_accuracy:.4f}, Test AUC-ROC: {breast_auc_roc:.4f}")

Pneumonia Model - Test Accuracy: 0.8510, Test AUC-ROC: 0.9340
Breast Cancer Model - Test Accuracy: 0.8013, Test AUC-ROC: 0.8283
