## Try out our model here.

We test our mutli-modal Faster R-CNN with MIMIC dataset here.

In [4]:
!pip install mmcv
!pip install timm

Collecting timm
  Downloading timm-0.6.7-py3-none-any.whl (509 kB)
Installing collected packages: timm
Successfully installed timm-0.6.7


In [3]:
import pandas as pd
import numpy as np

from models.setup import ModelSetup
from models.build import create_model_from_setup
from data.load import get_datasets, get_dataloaders

from utils.init import reproducibility, clean_memory_get_device
from data.constants import DEFAULT_REFLACX_LABEL_COLS, XAMI_MIMIC_PATH

## Suppress the assignement warning from pandas.r
pd.options.mode.chained_assignment = None  # default='warn'

## Supress user warning
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

%matplotlib inline

  assert (


In [4]:
device = clean_memory_get_device()
reproducibility()

This notebook will running on device: [CPU]


### Define your MIMIC folde path here.

In [17]:
use_iobb = True
io_type_str = "IoBB" if use_iobb else "IoU"
labels_cols = DEFAULT_REFLACX_LABEL_COLS
iou_thrs = np.array([0.5])


common_args = {
    "use_custom_model": True,
    "use_early_stop_model": True,
    "optimiser": "sgd",
    "lr": 1e-3,
    "weight_decay": 1e-5,
    "pretrained": True,
    "record_training_performance": True,
    "dataset_mode": "normal",
    "image_size": 512,
    "batch_size": 4,
    "warmup_epochs": 0,
    "lr_scheduler": "ReduceLROnPlateau",
    "reduceLROnPlateau_factor": 0.1,
    "reduceLROnPlateau_patience": 999,
    "reduceLROnPlateau_full_stop": True,
    "multiStepLR_milestones": 100,
    "multiStepLR_gamma": 0.1,
    "use_mask": True,
    "clinical_num_len": 9,
    "gt_in_train_till": 999,
    "box_head_dropout_rate": 0,
    "spatialise_method": "convs",  # [convs, repeat]
    "normalise_clinical_num": False,
    "measure_test": True,
}

fusion_add_args = {
    "fuse_depth": 0,
    "fusion_residule": False,
    "fusion_strategy": "add",
}

small_model_args = {
    "mask_hidden_layers": 64,
    "fuse_conv_channels": 64,
    "clinical_input_channels": 64,
    "representation_size": 64,  # 32
    "clinical_conv_channels": 64,
    "clinical_expand_conv_channels": 64,
    "backbone_out_channels": 64,
}

mobilenet_args = {
    "backbone": "mobilenet_v3",
    "using_fpn": False,
}

model_setup = ModelSetup(
        name="forward_testing_model",
        use_clinical=False,
        use_fixations=True,
        spatialise_clinical=False,
        add_clinical_to_roi_heads=False,
        **mobilenet_args,
        **small_model_args,
        **common_args,
        **fusion_add_args,
    )


# Initiate datasets and dataloaders
The batch size is also defined in this section. For testing purpose, we only set it as 2.

In [7]:
dataset_params_dict = {
    "XAMI_MIMIC_PATH": XAMI_MIMIC_PATH,
    "with_clinical": model_setup.use_clinical,
    "dataset_mode": model_setup.dataset_mode,
    "bbox_to_mask": model_setup.use_mask,
    "labels_cols": DEFAULT_REFLACX_LABEL_COLS,
}

detect_eval_dataset, train_dataset, val_dataset, test_dataset = get_datasets(
    dataset_params_dict=dataset_params_dict
)

train_dataloader, val_dataloader, test_dataloader = get_dataloaders(
    train_dataset, val_dataset, test_dataset, batch_size=4
)

In [8]:
print(f"We used to have {len(detect_eval_dataset.df.dicom_id)}, after unifying, we will have {len(detect_eval_dataset.df.dicom_id.unique())}.")

We used to have 670, after unifying, we will have 590.


## Example instance from dataset
We show what's inside a single instance. It will provide:

- Images
- Clinical data
- Targets (Dictionary)

And, inside the target, there're:

- boxes (bounding boxes of abnormality)
- lable (disease index (Note: the class **0** means the background))
- image_id (idx to get that image)
- area (the areas that bouding boxes contain)
- iscrowd (if it's a place with multiple bouding boxes, we assume all the the bouding boxes are not crowd.)

In [9]:
train_dataset[0]

(tensor([[[0.5608, 0.5608, 0.5569,  ..., 0.0000, 0.0000, 0.0000],
          [0.5569, 0.5647, 0.5647,  ..., 0.0000, 0.0000, 0.0000],
          [0.5490, 0.5569, 0.5608,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.7686, 0.7686, 0.7647,  ..., 1.0000, 1.0000, 1.0000],
          [0.7608, 0.7647, 0.7608,  ..., 1.0000, 1.0000, 1.0000],
          [0.7529, 0.7569, 0.7569,  ..., 1.0000, 1.0000, 1.0000]],
 
         [[0.5608, 0.5608, 0.5569,  ..., 0.0000, 0.0000, 0.0000],
          [0.5569, 0.5647, 0.5647,  ..., 0.0000, 0.0000, 0.0000],
          [0.5490, 0.5569, 0.5608,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.7686, 0.7686, 0.7647,  ..., 1.0000, 1.0000, 1.0000],
          [0.7608, 0.7647, 0.7608,  ..., 1.0000, 1.0000, 1.0000],
          [0.7529, 0.7569, 0.7569,  ..., 1.0000, 1.0000, 1.0000]],
 
         [[0.5608, 0.5608, 0.5569,  ..., 0.0000, 0.0000, 0.0000],
          [0.5569, 0.5647, 0.5647,  ..., 0.0000, 0.0000, 0.0000],
          [0.5490, 0.5569, 0.5608,  ...,

## Define Model.

We define he models here. Two backbone examples are in the below code section. The MobileNet is a light weight network, and ResNet is heavier, but usually perform better. In our case, the calculation is not the most important factor; therefore, we chose ResNet with feature pyramid networks (FPN) backbone.

In [26]:
model = create_model_from_setup(
    labels_cols,
    model_setup,
    rpn_nms_thresh=0.3,
    box_detections_per_img=10,
    box_nms_thresh=0.2,
    rpn_score_thresh=0.0,
    box_score_thresh=0.05,
)

model.to(device)
model.train()


Load custom model
Using pretrained backbone. mobilenet_v3
Using pretrained backbone. mobilenet_v3


TypeError: __init__() got an unexpected keyword argument 'fixations_backbone'

## Prepare data to feed

We prepare three main data to test the model:

- CXR image
- Clinical data
- Target

And, for each data, we adjust the format to what the model expect.

In [9]:
data = next(iter(train_dataloader))
data = train_dataset.prepare_input_from_data(data, device)

# Test Feedforawrd (Training)

In [10]:
model.train()
loss_dict, outputs = model(*data[:-1], targets=data[-1])

In [15]:
images, clinical_num, clinical_cat, targets = data

In [16]:
original_image_sizes= []
for img in images:
    val = img.shape[-2:]
    assert len(val) == 2
    original_image_sizes.append((val[0], val[1]))

images, targets = model.transform(images, targets)

img_features = model.backbone(images.tensors)

print(img_features.shape)

# for k, v in img_features.items():
#     print(f"[{k}]: {v.shape}")

torch.Size([4, 64, 16, 16])


## Results we get.
Four different losses are given in the result, we will use these losses to optimise the network while training. 

# Test Feedforawrd

### Detection.

A detection contain *boxes*, *lables*, and *scores*.

- *boxes*: All the bounding boxes for this image. 
- *lables*: Labels corresponded to the bounding boxes.
- *score*: Score (Confidence) for each boudning box.

In [17]:
loss_dict, outputs 

({'loss_classifier': tensor(1.8703, grad_fn=<NllLossBackward0>),
  'loss_box_reg': tensor(0.0083, grad_fn=<DivBackward0>),
  'loss_mask': tensor(0.7661, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
  'loss_objectness': tensor(0.6923, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>),
  'loss_rpn_box_reg': tensor(0.0056, dtype=torch.float64, grad_fn=<DivBackward0>)},
 [{'boxes': tensor([[2.3629e+03, 1.0602e+02, 2.6215e+03, 2.2353e+02],
           [7.2340e+02, 1.9290e+02, 1.2145e+03, 1.1568e+03],
           [2.1753e+03, 8.1519e-01, 2.4317e+03, 5.7176e+01],
           [2.3989e+03, 7.1049e-01, 2.5735e+03, 8.7468e+01],
           [1.0604e+03, 8.0044e+01, 1.2399e+03, 2.5090e+02],
           [1.6428e+03, 8.4712e+02, 1.7817e+03, 1.0633e+03],
           [1.6446e+03, 1.0116e+03, 1.7859e+03, 1.2136e+03],
           [1.0849e+03, 1.1581e+03, 1.2075e+03, 1.4033e+03],
           [3.2304e+02, 1.7968e+03, 4.4830e+02, 2.0410e+03],
           [2.5567e+03, 3.8252e-01, 2.8163e+03, 6.0379e+01]],
       