# For Google Colab

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install mean_average_precision

!pip install pyyaml==5.1
!pip install torch==1.10.0+cu111 torchvision==0.11.0+cu111 torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html

import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
# Install detectron2 that matches the above pytorch version
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html

#RESTART KERNEL AFTERWARDS!

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mean_average_precision
  Downloading mean_average_precision-2021.4.26.0-py3-none-any.whl (14 kB)
Installing collected packages: mean-average-precision
Successfully installed mean-average-precision-2021.4.26.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[K     |████████████████████████████████| 274 kB 7.6 MB/s 
[?25hBuilding wheels for collected packages: pyyaml
  Building wheel for pyyaml (setup.py) ... [?25l[?25hdone
  Created wheel for pyyaml: filename=PyYAML-5.1-cp37-cp37m-linux_x86_64.whl size=44092 sha256=b5485fec85c93e6611e6aa214dd88da7afac4bbb20bc1eded8258876ae1c3eda
  Stored in directory: /root/.cache/pip/wheels/77/f5/10/d00a2bd30928b972790053b5de0c703ca87324f3fead0f2fd9
Successfully built pyyaml
Installing collected packages: pyyaml
  Attempting un

In [2]:
%cd /content/drive/MyDrive/Github/ModelComparisons_MA

/content/drive/MyDrive/Github/ModelComparisons_MA


#Mask RCNN

In [3]:
from utils import DataSet
import utils.augmentation as aug
import utils.helpers as helpers

In [4]:
import torch, torchvision 
import albumentations as A
import cv2, copy
import numpy as np 
import pandas as pd
import os, random, time, json, math

from torch.utils.data import DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from sklearn.model_selection import train_test_split
from mean_average_precision import MetricBuilder
from tqdm.auto import tqdm
from detectron2.utils.logger import setup_logger
from detectron2.evaluation import DatasetEvaluator

from ImageEnhancement import MSRCR, FUSION, CLAHE

In [5]:
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog, build_detection_train_loader
from detectron2.data import detection_utils as utils
import detectron2.data.transforms as T

In [15]:
BATCH_SIZE = 8 # increase / decrease according to GPU memeory
RESIZE_TO = 800 # resize the image for training and transforms
NUM_EPOCHS = 40 # number of epochs to train for
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# training images and XML files directory
SEED = 42
TEST_RATIO = 0.1 # for train/test split

MAPPING = {
    'DUMMY': 0,
    'Fish': 1,
    'Cnidaria':2   
}

# whether to visualize images after crearing the data loaders
VISUALIZE_TRANSFORMED_IMAGES = False
# location to save model and plots
IN_DIR = "/content/drive/MyDrive/ROV_ECIM/multimedia/ECIM_bruv_data/"
OUT_DIR = '/content/res/'
VALIDATION_SET_FREQUENCY = 10 # tests on validaiton set every n epochs

PREPARE_TEST_DATA = True

IMAGE_DIRECTORY = "images"

In [7]:

imgs, labels = helpers.image_and_label_paths(IN_DIR, image_dir = "images", label_dir = "labels")
inputs_train, inputs_valid, targets_train, targets_valid = train_test_split(imgs, labels, test_size=TEST_RATIO, random_state=SEED)


In [8]:
# Train DataSet:
train_dataset = DataSet(inputs_train, 
                            targets_train, 
                            use_cache          = False,
                            mapping            = MAPPING,
                            random_enhancement = False,
                            use_detectron      = True
                            )



# Validation DataSet:
validation_dataset = DataSet(inputs_valid, 
                                targets_valid, 
                                use_cache          = False,
                                mapping            = MAPPING, 
                                random_enhancement = False,
                                use_detectron      = True
                            )          


In [9]:
for d, data in zip(["train", "val"], [train_dataset, validation_dataset]):
    DatasetCatalog.register("fish_" + d, lambda f=data, d=d: f.get_data_dicts(name = d))
    MetadataCatalog.get("fish_" + d).set(thing_classes=list(MAPPING.keys()))

fish_metadata = MetadataCatalog.get("fish_train")


In [10]:
counter = 0
dataset_dicts = validation_dataset.get_data_dicts(name  = "val")
for d in random.sample(dataset_dicts, 4):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=fish_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2.imwrite("./hi_{}.png".format(counter), out.get_image()[:, :, ::-1])
    counter += 1

  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset


In [11]:
class MYEvaluator(DatasetEvaluator):
  def __init__(
      self,
      datasetname,
      mapping = {"DUMMY":0,"Fish":1,"Cnidaria":2},   
      min_score = 0.6, 
      outpath = "./output/",
      valid_classes = None
      ):
    self._mapping = mapping
    self._minscore = min_score
    self._outpath = outpath
    self._dataset = DatasetCatalog.get(datasetname)
    if(valid_classes is None):
      self._valid_classes = list(mapping.values())
    else:
      self._valid_classes = []
      for elem in valid_classes:
          self._valid_classes.append(mapping[elem])
    self._metric_fn = MetricBuilder.build_evaluation_metric("map_2d", async_mode=True, num_classes=len(self._valid_classes))
  
    # if some classes are deemed irrelevant, the class labels for the mAP function need to be continuous from 0 to len(self._valid_classes)
    self.cls_remapping = np.zeros(len(mapping.keys()))
    free_slots = []
    for index, (key, val) in enumerate(mapping.items()):
      if (key in valid_classes and index < len(self._valid_classes)):
        self.cls_remapping[index] = index
      elif (key not in valid_classes and index < len(self._valid_classes)):
        self.cls_remapping[index] = None
        free_slots.append(index)
      elif (key not in valid_classes and index >= len(self._valid_classes)):
        self.cls_remapping[index] = None
      elif (key in valid_classes and index >= len(self._valid_classes)):
        self.cls_remapping[index] = free_slots[0]
        free_slots = free_slots[1:]

  def reset(self):
    self._metric_fn = MetricBuilder.build_evaluation_metric("map_2d", async_mode=True, num_classes=len(self._valid_classes))
    #self._false_positives = []
    #self._correctly_labled = []
    #self._mean_iou = []
    #self._total_score = []

  def process(self, inputs, outputs):
    for inputs, pred in zip(inputs, outputs):
      instances = pred["instances"].to("cpu")
      all_pred_scores = instances.scores
      all_pred_classes = instances.pred_classes
      all_pred_boxes = instances.pred_boxes
      gt = None
      for elem in self._dataset:    # pretty slow. Checks complete dataset for correct image to then load the gts as I cannot pass gts into the process function directly
        if(elem["image_id"] == inputs["image_id"]):
          gt = elem["annotations"].copy()
          break
      assert gt is not None, "Something went wrong in the mAP calculation for the validation dataset. Is there an image without any labels?"
      
      gt_formatted = []
      for elem in gt:
        if(elem["category_id"] not in self._valid_classes):
          continue
        gt_formatted.append(list(elem["bbox"]) + [self.cls_remapping[elem["category_id"]]] + [0,0])
      
      pred_formatted = []
      for box, cls, score in zip(all_pred_boxes, all_pred_classes, all_pred_scores):
        if cls not in self._valid_classes:
          continue
        if score < self._minscore:
          continue
        pred_formatted.append(list(box) + [self.cls_remapping[cls], score])
      
      self._metric_fn.add(np.array(pred_formatted), np.array(gt_formatted))

  def evaluate(self):
    # save self.count somewhere, or print it, or return it.
    print("evaluate")
    mAP = self._metric_fn.value(np.arange(0.5, 1.0, 0.05), recall_thresholds=np.arange(0., 1.01, 0.05), mpolicy='soft')['mAP']
    path = self._outpath+"_validation_values.csv"
    try:
      df = pd.read_csv(path, sep = ";", index_col = 0)
    except:
      df = pd.DataFrame(columns = ["mAP"])
    outl = [mAP]
    outs = pd.Series(outl, index=df.columns)
    
    df = df.append(outs, ignore_index=True)
    df.to_csv(path, sep = ";")
    return {k:v for k, v in zip(df.columns, outl)}

In [12]:
class CustomMapper():
    def __init__(self, transformlist, random_enhancement = False, enhancement_directories = ["images", "msrcr", "clahe", "fusion"]):
        self._transformlist = transformlist
        self.random_enhancemant = random_enhancement
        self.random_enhacement_directories = enhancement_directories

    def __call__(self, dataset_dict):
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
    
        #HERE COMES THE STUFF FOR READING IMAGES WITH RANDOM ENHANCEMENTS(from different directories)
        if self.random_enhancemant:
            dataset_dict["file_name"] = dataset_dict["file_name"].replace("images", random.choice(self.random_enhacement_directories))
        image = utils.read_image(dataset_dict["file_name"], format="BGR")
        image, transforms = T.apply_transform_gens(self._transformlist, image)
        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))

        annos = [
            utils.transform_instance_annotations(obj, transforms, image.shape[:2])
            for obj in dataset_dict.pop("annotations")
        ]
        instances = utils.annotations_to_instances(annos, image.shape[:2])
        dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict

class MyTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        transformlist = [T.Resize((cfg.RESIZE, cfg.RESIZE)),
                      T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
                      T.RandomFlip(prob=0.5, horizontal=True, vertical=False), 
                      T.RandomBrightness(0.8,1.2),
                      T.RandomCrop("relative", (0.6,0.6))
                      ]
        return build_detection_train_loader(cfg, mapper=CustomMapper(transformlist, random_enhancement = cfg.RANDOM_ENHANCEMENT))

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        if output_folder is None:
          output_folder = os.path.join(cfg.OUTPUT_DIR)
        return MYEvaluator(dataset_name, outpath = output_folder, valid_classes = ["Fish", "Cnidaria"])

    #@classmethod
    #def build_evaluator(cls, cfg, dataset_name, output_folder=None):
    #    if output_folder is None:
    #        output_folder = os.path.join(cfg.OUTPUT_DIR)
    #    return mAPEvaluator(dataset_name, outpath = output_folder)


In [16]:
cfg = get_cfg()

cfg.OUTPUT_DIR = OUT_DIR
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("fish_train",)
cfg.DATASETS.TEST = ("fish_val",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = BATCH_SIZE
cfg.SOLVER.BASE_LR = 0.003  #LR
cfg.RESIZE = RESIZE_TO

cfg.SOLVER.MAX_ITER = NUM_EPOCHS * len(inputs_train) / BATCH_SIZE
cfg.TEST.EVAL_PERIOD = VALIDATION_SET_FREQUENCY * len(inputs_train) / BATCH_SIZE

cfg.SOLVER.STEPS = []        
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(MAPPING.keys())
cfg.RANDOM_ENHANCEMENT = False

In [17]:
#iterations = np.arange(1, cfg.SOLVER.MAX_ITER, 1)
iterations = [cfg.SOLVER.MAX_ITER]
for it in iterations:
    print(it)
    cfg.SOLVER.MAX_ITER = int(it)
    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = MyTrainer(cfg) 
    trainer.resume_or_load(resume=True)
    trainer.train()

    xx = OUT_DIR+"model_final.pth"
    xx2 = OUT_DIR+"model_final_{}it.pth".format(it)
    !cp {xx} {xx2}

395.0
[32m[07/11 11:14:32 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
   

  0%|          | 0/79 [00:00<?, ?it/s]

loading train dataset
[32m[07/11 11:14:37 d2.data.build]: [0mRemoved 0 images with no usable annotations. 79 images left.
[32m[07/11 11:14:37 d2.data.build]: [0mUsing training sampler TrainingSampler
[32m[07/11 11:14:37 d2.data.common]: [0mSerializing 79 elements to byte tensors and concatenating them all ...
[32m[07/11 11:14:37 d2.data.common]: [0mSerialized dataset takes 0.09 MiB
[32m[07/11 11:14:37 d2.engine.hooks]: [0mLoading scheduler from state_dict ...
[32m[07/11 11:14:37 d2.engine.train_loop]: [0mStarting training from iteration 197


  tensor = torch.from_numpy(np.ascontiguousarray(img))
  tensor = torch.from_numpy(np.ascontiguousarray(img))
  max_size = (max_size + (stride - 1)) // stride * stride


  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:14:42 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[07/11 11:14:42 d2.data.common]: [0mSerializing 9 elements to byte tensors and concatenating them all ...
[32m[07/11 11:14:42 d2.data.common]: [0mSerialized dataset takes 0.01 MiB


  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:14:42 d2.evaluation.evaluator]: [0mStart inference on 9 batches
[32m[07/11 11:14:45 d2.evaluation.evaluator]: [0mTotal inference time: 0:00:00.722099 (0.180525 s / iter per device, on 1 devices)
[32m[07/11 11:14:45 d2.evaluation.evaluator]: [0mTotal inference pure compute time: 0:00:00 (0.108939 s / iter per device, on 1 devices)
evaluate
[32m[07/11 11:14:45 d2.engine.defaults]: [0mEvaluation results for fish_val in csv format:
[32m[07/11 11:14:45 d2.evaluation.testing]: [0mcopypaste: mAP=0.16145125031471252
[32m[07/11 11:14:45 d2.utils.events]: [0m eta: 0:04:20  iter: 199  total_loss: 1.056  loss_cls: 0.1585  loss_box_reg: 0.2808  loss_mask: 0.4951  loss_rpn_cls: 0.04782  loss_rpn_loc: 0.07448  time: 1.3374  data_time: 0.6320  lr: 0.0015129  max_mem: 4456M
[32m[07/11 11:15:12 d2.utils.events]: [0m eta: 0:03:46  iter: 219  total_loss: 1.265  loss_cls: 0.2185  loss_box_reg: 0.4061  loss_mask: 0.4913  loss_rpn_cls: 0.06947  loss_rpn_loc: 0.

  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:15:51 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[07/11 11:15:51 d2.data.common]: [0mSerializing 9 elements to byte tensors and concatenating them all ...
[32m[07/11 11:15:51 d2.data.common]: [0mSerialized dataset takes 0.01 MiB


  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:15:51 d2.evaluation.evaluator]: [0mStart inference on 9 batches
[32m[07/11 11:15:54 d2.evaluation.evaluator]: [0mTotal inference time: 0:00:00.727554 (0.181889 s / iter per device, on 1 devices)
[32m[07/11 11:15:54 d2.evaluation.evaluator]: [0mTotal inference pure compute time: 0:00:00 (0.108210 s / iter per device, on 1 devices)
evaluate
[32m[07/11 11:15:54 d2.engine.defaults]: [0mEvaluation results for fish_val in csv format:
[32m[07/11 11:15:54 d2.evaluation.testing]: [0mcopypaste: mAP=0.16100914776325226
[32m[07/11 11:16:06 d2.utils.events]: [0m eta: 0:02:50  iter: 259  total_loss: 1.236  loss_cls: 0.1819  loss_box_reg: 0.3465  loss_mask: 0.4771  loss_rpn_cls: 0.07585  loss_rpn_loc: 0.1203  time: 1.2886  data_time: 0.4132  lr: 0.0019681  max_mem: 5004M
[32m[07/11 11:16:32 d2.utils.events]: [0m eta: 0:02:26  iter: 279  total_loss: 1.277  loss_cls: 0.2081  loss_box_reg: 0.3681  loss_mask: 0.4756  loss_rpn_cls: 0.07079  loss_rpn_loc: 0.0

  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:16:59 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[07/11 11:16:59 d2.data.common]: [0mSerializing 9 elements to byte tensors and concatenating them all ...
[32m[07/11 11:16:59 d2.data.common]: [0mSerialized dataset takes 0.01 MiB


  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:17:00 d2.evaluation.evaluator]: [0mStart inference on 9 batches
[32m[07/11 11:17:02 d2.evaluation.evaluator]: [0mTotal inference time: 0:00:00.776504 (0.194126 s / iter per device, on 1 devices)
[32m[07/11 11:17:02 d2.evaluation.evaluator]: [0mTotal inference pure compute time: 0:00:00 (0.110762 s / iter per device, on 1 devices)
evaluate
[32m[07/11 11:17:03 d2.engine.defaults]: [0mEvaluation results for fish_val in csv format:
[32m[07/11 11:17:03 d2.evaluation.testing]: [0mcopypaste: mAP=0.17556633055210114
[32m[07/11 11:17:03 d2.utils.events]: [0m eta: 0:02:00  iter: 299  total_loss: 1.11  loss_cls: 0.1753  loss_box_reg: 0.3294  loss_mask: 0.4682  loss_rpn_cls: 0.06282  loss_rpn_loc: 0.112  time: 1.2836  data_time: 0.4102  lr: 0.0022716  max_mem: 5004M
[32m[07/11 11:17:33 d2.utils.events]: [0m eta: 0:01:35  iter: 319  total_loss: 1.137  loss_cls: 0.1815  loss_box_reg: 0.3288  loss_mask: 0.4614  loss_rpn_cls: 0.05762  loss_rpn_loc: 0.109

  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:18:17 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[07/11 11:18:17 d2.data.common]: [0mSerializing 9 elements to byte tensors and concatenating them all ...
[32m[07/11 11:18:17 d2.data.common]: [0mSerialized dataset takes 0.01 MiB


  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:18:18 d2.evaluation.evaluator]: [0mStart inference on 9 batches
[32m[07/11 11:18:20 d2.evaluation.evaluator]: [0mTotal inference time: 0:00:00.618073 (0.154518 s / iter per device, on 1 devices)
[32m[07/11 11:18:20 d2.evaluation.evaluator]: [0mTotal inference pure compute time: 0:00:00 (0.096721 s / iter per device, on 1 devices)
evaluate
[32m[07/11 11:18:20 d2.engine.defaults]: [0mEvaluation results for fish_val in csv format:
[32m[07/11 11:18:20 d2.evaluation.testing]: [0mcopypaste: mAP=0.17442122101783752
[32m[07/11 11:18:33 d2.utils.events]: [0m eta: 0:00:45  iter: 359  total_loss: 1.149  loss_cls: 0.1768  loss_box_reg: 0.348  loss_mask: 0.4205  loss_rpn_cls: 0.07131  loss_rpn_loc: 0.09524  time: 1.3395  data_time: 0.4588  lr: 0.0027269  max_mem: 5082M
[32m[07/11 11:18:59 d2.utils.events]: [0m eta: 0:00:19  iter: 379  total_loss: 1.058  loss_cls: 0.1675  loss_box_reg: 0.3249  loss_mask: 0.4103  loss_rpn_cls: 0.06073  loss_rpn_loc: 0.0

  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:19:19 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[07/11 11:19:19 d2.data.common]: [0mSerializing 9 elements to byte tensors and concatenating them all ...
[32m[07/11 11:19:19 d2.data.common]: [0mSerialized dataset takes 0.01 MiB


  0%|          | 0/9 [00:00<?, ?it/s]

loading val dataset
[32m[07/11 11:19:20 d2.evaluation.evaluator]: [0mStart inference on 9 batches
[32m[07/11 11:19:22 d2.evaluation.evaluator]: [0mTotal inference time: 0:00:00.662167 (0.165542 s / iter per device, on 1 devices)
[32m[07/11 11:19:22 d2.evaluation.evaluator]: [0mTotal inference pure compute time: 0:00:00 (0.100901 s / iter per device, on 1 devices)
evaluate
[32m[07/11 11:19:22 d2.engine.defaults]: [0mEvaluation results for fish_val in csv format:
[32m[07/11 11:19:22 d2.evaluation.testing]: [0mcopypaste: mAP=0.17976847290992737


#Testing the model

In [19]:
class resize_image_and_boxes():
  def __init__(self, width, height):
    self.width = width
    self.height = height
  
  def __call__(self, image, bboxes, labels):
      new_boxes = []
      xShape = image.shape
      for bb in bboxes:
        x1 = bb[0]/xShape[1]*self.width
        y1 = bb[1]/xShape[0]*self.height
        x2 = bb[2]/xShape[1]*self.width
        y2 = bb[3]/xShape[0]*self.height
        new_boxes.append([x1, y1, x2, y2])

      new_boxes = np.array(new_boxes)
      image = cv2.resize(image, (self.width, self.height))
      return image, new_boxes, labels 

In [18]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model to test
predictor = DefaultPredictor(cfg)

In [20]:
PATH_TEST = "/content/drive/MyDrive/ROV_ECIM/multimedia/ModelTestData/" #path to the dataset to test

inputs_test, targets_test = helpers.image_and_label_paths(PATH_TEST, image_dir = "images", label_dir = "labels")

test_dataset = DataSet(inputs_test, 
                            targets_test, 
                            use_cache          = False,
                            mapping            = MAPPING,
                            random_enhancement = False,
                            use_detectron      = True
                            )

In [21]:
for d, data in zip(["test"], [test_dataset]):
    DatasetCatalog.register("fish_" + d, lambda f=data, d=d: f.get_data_dicts(name = d))
    MetadataCatalog.get("fish_" + d).set(thing_classes=list(MAPPING.keys()))

In [26]:
dataset = DatasetCatalog.get("fish_test")
metric_fn = MetricBuilder.build_evaluation_metric("map_2d", async_mode=True, num_classes=2)
resizer = resize_image_and_boxes(RESIZE_TO, RESIZE_TO)

for dataEl in dataset:
    im = cv2.imread(dataEl["file_name"])   
    boxes = [el["bbox"] for el in dataEl["annotations"]]
    labels = [el["category_id"] for el in dataEl["annotations"]]
    im, boxes, labels = resizer(im, boxes, labels)

    outputs = predictor(im)["instances"].to("cpu")  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    gts = []
    for b, l in zip(boxes, labels):
      if l != 1:
        continue
      gts.append(list(b)+[l-1]+[0,0])
      
    preds = []
    for b, l, s in zip(outputs.pred_boxes, outputs.pred_classes, outputs.scores):
      if l != 1:
        continue
      preds.append(list(b.numpy()) +  [max(l.item()-1, 0)] + [s.item()])
    metric_fn.add(np.array(preds), np.array(gts)) 
    
    
mmap = metric_fn.value(iou_thresholds=np.arange(0.5, 1.0, 0.05), recall_thresholds=np.arange(0., 1.01, 0.01), mpolicy='soft')['mAP']
print("mAP Score for the test dataset: ", mmap)


  0%|          | 0/35 [00:00<?, ?it/s]

loading test dataset


  max_size = (max_size + (stride - 1)) // stride * stride


0.21547599
