In [1]:
import torch
import pandas as pd
import numpy as np

from models.setup import ModelSetup
from models.build import create_multimodal_rcnn_model

from utils.init import reproducibility, clean_memory_get_device
from data.constants import DEFAULT_REFLACX_LABEL_COLS, XAMI_MIMIC_PATH

## Suppress the assignement warning from pandas.r
pd.options.mode.chained_assignment = None  # default='warn'

## Supress user warning
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

%matplotlib inline

In [2]:
# clean gpu memory and tell pytorch to use deterministic algorithm.
device = clean_memory_get_device()
reproducibility()

This notebook will running on device: [CUDA]


In [3]:
use_iobb = True
io_type_str = "IoBB" if use_iobb else "IoU"
labels_cols = DEFAULT_REFLACX_LABEL_COLS
iou_thrs = np.array([0.5])

common_args = {
    "save_early_stop_model": True,
    "optimiser": "sgd",
    "lr": 1e-3,
    "weight_decay": 1e-5,
    "image_backbone_pretrained": True,
    "fixation_backbone_pretrained": True,
    "record_training_performance": True,
    "dataset_mode": "normal",
    "image_size": 512,
    "batch_size": 4,
    "warmup_epochs": 0,
    "lr_scheduler": "ReduceLROnPlateau",
    "reduceLROnPlateau_factor": 0.1,
    "reduceLROnPlateau_patience": 999,
    "reduceLROnPlateau_full_stop": True,
    "multiStepLR_milestones": 100,
    "multiStepLR_gamma": 0.1,
    "use_mask": True,
    "gt_in_train_till": 999,
    "box_head_dropout_rate": 0,
    "measure_test": True,
}

fusion_add_args = {
    "fuse_depth": 0,
    "fusion_residule": False,
    "fusion_strategy": "add", 
}

small_model_args = {
    "mask_hidden_layers": 64,
    "fuse_conv_channels": 64,
    "representation_size": 64, 
    "backbone_out_channels": 64,
}

mobilenet_args = {
    "backbone": "mobilenet_v3",
    "using_fpn": False,
}


# [TODO]: clean the model setup for fixation map.
model_setup = ModelSetup(
        name="forward_testing_model",
        use_fixations=True,
        **mobilenet_args,
        **small_model_args,
        **common_args,
        **fusion_add_args,
    )


In [4]:
model = create_multimodal_rcnn_model(
    labels_cols,
    model_setup,
    rpn_nms_thresh=0.3,
    box_detections_per_img=10,
    box_nms_thresh=0.2,
    rpn_score_thresh=0.0,
    box_score_thresh=0.05,
)

model.to(device)
model.eval()


Using pretrained backbone. mobilenet_v3
Using pretrained backbone. mobilenet_v3
forward_testing_model will use mask, [64] layers.


MultimodalMaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): Sequential(
      (0): ConvNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
    

In [5]:
example_image_input = [torch.randn(( 3, 512, 512)).to(device)]
example_fixations_input = [torch.randn(( 3, 512, 512)).to(device)]

In [6]:
model(example_image_input, fixations=example_fixations_input)

({},
 [{'boxes': tensor([[1.1506e+02, 4.2518e-01, 2.0828e+02, 2.2571e+01],
           [1.7954e+02, 3.9759e-01, 2.7196e+02, 2.2632e+01],
           [4.4861e+02, 5.1162e-01, 5.1200e+02, 3.1637e+01],
           [0.0000e+00, 2.0548e-01, 2.3685e+01, 1.0992e+01],
           [2.7514e+02, 4.0179e-01, 3.6801e+02, 2.2652e+01],
           [4.5712e+02, 8.5348e+01, 5.0388e+02, 1.0680e+02],
           [3.3909e+02, 3.8297e-01, 4.3216e+02, 2.2621e+01],
           [3.0686e+02, 9.9899e+00, 4.0016e+02, 5.4583e+01],
           [4.5722e+02, 1.1745e+02, 5.0395e+02, 1.3876e+02],
           [6.8821e+01, 1.4529e+00, 2.5621e+02, 7.7376e+01]], device='cuda:0',
          grad_fn=<StackBackward0>),
   'labels': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'),
   'scores': tensor([0.2315, 0.2304, 0.2287, 0.2284, 0.2277, 0.2276, 0.2270, 0.2267, 0.2265,
           0.2262], device='cuda:0', grad_fn=<IndexBackward0>),
   'masks': tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
             [0.