## Try out our model here.

We test our mutli-modal Faster R-CNN with MIMIC dataset here.

In [1]:
import pandas as pd
import numpy as np

import utils.print as print_f

from utils.coco_eval import get_eval_params_dict
from utils.engine import xami_train_one_epoch, xami_evaluate
from utils.plot import plot_loss, plot_train_val_evaluators, plot_evaluator

from models.setup import ModelSetup
from models.build import create_model_from_setup
from models.train import TrainingInfo
from utils.save import check_best, end_train
from data.load import get_datasets, get_dataloaders

from IPython.display import clear_output
from utils.eval import get_ar_ap
from utils.train import get_optimiser
from utils.init import reproducibility, clean_memory_get_device
from utils.constants import full_iou_thrs
from data.constants import DEFAULT_REFLACX_LABEL_COLS, XAMI_MIMIC_PATH
from  datetime import datetime


## Suppress the assignement warning from pandas.r
pd.options.mode.chained_assignment = None  # default='warn'

## Supress user warning
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

%matplotlib inline

  assert (


In [2]:
device = clean_memory_get_device()
reproducibility()

This notebook will running on device: [CUDA]


### Define your MIMIC folde path here.

In [3]:
use_iobb = True
io_type_str = "IoBB" if use_iobb else "IoU"

model_setup = ModelSetup(
    name="forward_testing",
    use_clinical=True,
    use_custom_model=True,
    use_early_stop_model=True,
    backbone="mobilenet_v3",  # [mobilenet_v3]
    optimiser="sgd",
    lr=1e-2,
    pretrained=True,
    dataset_mode="unified",
    image_size=256,
    weight_decay=1e-3,
    record_training_performance=True,
    using_fpn=False,
    backbone_out_channels=16,  # shrink size test [16, 32]
    representation_size=32,  # shrink size test [32, 64, 128]
    # mask_hidden_layers=64,
    use_mask=False,
    batch_size=4,
    box_head_dropout_rate=0,  # [0, 0.1, 0.2, 0.3]
    warmup_epochs=0,
    lr_scheduler="ReduceLROnPlateau",  # [ReduceLROnPlateau, MultiStepLR]
    reduceLROnPlateau_factor=0.1,
    reduceLROnPlateau_patience=3,
    multiStepLR_milestones=[30, 50, 70, 90],
    multiStepLR_gamma=0.1,

    clinical_conv_channels=32,
    fuse_conv_channels=32,
    fuse_depth=4,
)



# Initiate datasets and dataloaders
The batch size is also defined in this section. For testing purpose, we only set it as 2.

In [4]:
labels_cols = [
    "Enlarged cardiac silhouette",
    "Atelectasis",
    "Pleural abnormality",
    "Consolidation",
    "Pulmonary edema",
    #  'Groundglass opacity', # 6th disease.
]

dataset_params_dict = {
    "XAMI_MIMIC_PATH": XAMI_MIMIC_PATH,
    "with_clinical": model_setup.use_clinical,
    "dataset_mode": model_setup.dataset_mode,
    "bbox_to_mask": True,
    "labels_cols": labels_cols,
}

detect_eval_dataset, train_dataset, val_dataset, test_dataset = get_datasets(
    dataset_params_dict=dataset_params_dict
)

train_dataloader, val_dataloader, test_dataloader = get_dataloaders(
    train_dataset, val_dataset, test_dataset, batch_size=4
)

In [5]:
print(f"We used to have {len(detect_eval_dataset.df.dicom_id)}, after unifying, we will have {len(detect_eval_dataset.df.dicom_id.unique())}.")

We used to have 590, after unifying, we will have 590.


## Example instance from dataset
We show what's inside a single instance. It will provide:

- Images
- Clinical data
- Targets (Dictionary)

And, inside the target, there're:

- boxes (bounding boxes of abnormality)
- lable (disease index (Note: the class **0** means the background))
- image_id (idx to get that image)
- area (the areas that bouding boxes contain)
- iscrowd (if it's a place with multiple bouding boxes, we assume all the the bouding boxes are not crowd.)

In [6]:
train_dataset[0]

(tensor([[[0.5608, 0.5608, 0.5569,  ..., 0.0000, 0.0000, 0.0000],
          [0.5569, 0.5647, 0.5647,  ..., 0.0000, 0.0000, 0.0000],
          [0.5490, 0.5569, 0.5608,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.7686, 0.7686, 0.7647,  ..., 1.0000, 1.0000, 1.0000],
          [0.7608, 0.7647, 0.7608,  ..., 1.0000, 1.0000, 1.0000],
          [0.7529, 0.7569, 0.7569,  ..., 1.0000, 1.0000, 1.0000]],
 
         [[0.5608, 0.5608, 0.5569,  ..., 0.0000, 0.0000, 0.0000],
          [0.5569, 0.5647, 0.5647,  ..., 0.0000, 0.0000, 0.0000],
          [0.5490, 0.5569, 0.5608,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.7686, 0.7686, 0.7647,  ..., 1.0000, 1.0000, 1.0000],
          [0.7608, 0.7647, 0.7608,  ..., 1.0000, 1.0000, 1.0000],
          [0.7529, 0.7569, 0.7569,  ..., 1.0000, 1.0000, 1.0000]],
 
         [[0.5608, 0.5608, 0.5569,  ..., 0.0000, 0.0000, 0.0000],
          [0.5569, 0.5647, 0.5647,  ..., 0.0000, 0.0000, 0.0000],
          [0.5490, 0.5569, 0.5608,  ...,

## Define Model.

We define he models here. Two backbone examples are in the below code section. The MobileNet is a light weight network, and ResNet is heavier, but usually perform better. In our case, the calculation is not the most important factor; therefore, we chose ResNet with feature pyramid networks (FPN) backbone.

In [7]:
model = create_model_from_setup(
    labels_cols,
    model_setup,
    rpn_nms_thresh=0.3,
    box_detections_per_img=10,
    box_nms_thresh=0.2,
    rpn_score_thresh=0.0,
    box_score_thresh=0.05,
    # image_size=model_setup.image_size,
    # clinical_conv_channels=64,
)

model.to(device)
model.train()


Load custom model
Using pretrained backbone. mobilenet_v3


MultimodalMaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
    

In [8]:
from utils.train import print_params_setup
print_params_setup(model)

[model]: 1,223,303
[model.backbone]: 1,009,968
[model.rpn]: 3,595
[model.roi_heads]: 27,166
[model.roi_heads.box_head]: 26,176
[model.roi_heads.box_head.fc6]: 25,120
[model.roi_heads.box_head.fc7]: 1,056
[model.roi_heads.box_predictor]: 990
[model.clinical_convs]: 41,904
[model.fuse_convs]: 32,592


## Prepare data to feed

We prepare three main data to test the model:

- CXR image
- Clinical data
- Target

And, for each data, we adjust the format to what the model expect.

In [9]:
data = next(iter(train_dataloader))
data = train_dataset.prepare_input_from_data(data, device)

# Test Feedforawrd (Training)

In [10]:
# labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
# import torch

In [11]:
data[-1][0]['labels']

tensor([2, 1, 3, 3], device='cuda:0')

In [12]:
data[-1][0]['masks'].shape

torch.Size([4, 2544, 3056])

In [13]:
# images, targets = data
# images, targets = model.transform(images, targets)

In [14]:
model.train()
loss_dict, outputs = model(*data[:-1], targets=data[-1])

## Results we get.
Four different losses are given in the result, we will use these losses to optimise the network while training. 

# Test Feedforawrd

### Detection.

A detection contain *boxes*, *lables*, and *scores*.

- *boxes*: All the bounding boxes for this image. 
- *lables*: Labels corresponded to the bounding boxes.
- *score*: Score (Confidence) for each boudning box.

In [15]:
loss_dict, outputs 

({'loss_classifier': tensor(1.8429, device='cuda:0', grad_fn=<NllLossBackward0>),
  'loss_box_reg': tensor(0.0648, device='cuda:0', grad_fn=<DivBackward0>),
  'loss_objectness': tensor(0.6932, device='cuda:0',
         grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
  'loss_rpn_box_reg': tensor(0.4561, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward0>)},
 [{'boxes': tensor([[ 921.3475,  916.9742, 1433.6675, 1630.9636],
           [   0.0000,    0.0000, 1082.6816,  457.8885],
           [   0.0000,    0.0000,  190.6875,  162.5804],
           [   0.0000,    0.0000, 3039.1206, 2544.0000],
           [ 485.3980,  921.4603,  865.1107, 1537.3938],
           [2363.5857, 1125.2233, 2666.8264, 1512.2043],
           [2361.2820, 1139.0974, 2669.3003, 1502.2478],
           [ 921.4305,  945.2888, 1434.4622, 1609.0178],
           [   0.0000,   10.6675, 1082.4666,  443.1554],
           [   0.0000,    3.7876,  190.6496,  157.3491]], device='cuda:0',
          grad_fn=<StackBackward0