# Library

All the library that can be useful to train the model.

In [1]:
# Common library
import os
import cv2
from matplotlib import pyplot as plt
import random
from datetime import datetime
import sys
import copy
import torch

# Preparation of the dataset
from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog, MetadataCatalog

# Visualisation of the segmentation 
from detectron2.utils.visualizer import Visualizer
from detectron2.utils.visualizer import ColorMode

# Configure the model
from detectron2 import model_zoo
from detectron2.config import get_cfg

# Data augmentation
from detectron2.data import detection_utils as d_utils
from detectron2.data import build_detection_train_loader
from detectron2.data import transforms as T
from detectron2.data import DatasetMapper


# Evaluate the model 
from detectron2.engine import DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# Train
from detectron2.engine import DefaultTrainer


import copy
import logging
import numpy as np
from typing import List, Optional, Union
import torch

from detectron2.config import configurable

# Preparation of the dataset

We need first to register the dataset in COCO format, we could have used a customized function for any other dataset format

**Replace by your path to images and json**

In [2]:
TRAIN_JSON = "datasets/train/annotations/instances_default.json"
TRAIN_IMAGES = "datasets/train/images"
TEST_JSON = "datasets/test/annotations/instances_default.json"
TEST_IMAGES = "datasets/test/images"

In [3]:
register_coco_instances(
    name="bloc_segmentation_train", 
    metadata={}, 
    json_file=TRAIN_JSON, 
    image_root=TRAIN_IMAGES
)

register_coco_instances(
    name="bloc_segmentation_test", 
    metadata={}, 
    json_file=TEST_JSON, 
    image_root=TEST_IMAGES
)

# Training of the model

## Configuration of the model

**You can change all the hyperparameters, you can find the differents architectures [here](https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md).**

In [4]:
# Hyperparameters

DATA_SET_NAME = "bloc_segmentation"
ARCHITECTURE = "mask_rcnn_R_101_FPN_3x"
CONFIG_FILE_PATH = f"COCO-InstanceSegmentation/{ARCHITECTURE}.yaml"
MAX_ITER = 2000
EVAL_PERIOD = 100
BASE_LR = 0.001
NUM_CLASSES = 1
CHECKPOINT = 1000

**Create automatically the folder where model are going to be saved.**

In [5]:
# Output dir 
OUTPUT_DIR_PATH = os.path.join(
    DATA_SET_NAME, 
    ARCHITECTURE, 
    datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
)
os.makedirs(OUTPUT_DIR_PATH, exist_ok=True)

In [7]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(CONFIG_FILE_PATH)) # Get the arch
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(CONFIG_FILE_PATH) # Get the weights
cfg.DATASETS.TRAIN = ("bloc_segmentation_train",) 
cfg.DATASETS.TEST = ("bloc_segmentation_test",)
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64 # Come from the RPN process, the RPN uses a sliding window
cfg.TEST.EVAL_PERIOD = EVAL_PERIOD # Evaluation period
cfg.DATALOADER.NUM_WORKERS = 2 # How many core of the cpu are going to be used for the process
cfg.SOLVER.IMS_PER_BATCH = 1 # Number of training exemple in one iteration
cfg.INPUT.MASK_FORMAT='bitmask' # Format of segmentaion mask 
cfg.SOLVER.BASE_LR = BASE_LR # Learning rate
cfg.SOLVER.MAX_ITER = MAX_ITER # Number of iteration
cfg.MODEL.ROI_HEADS.NUM_CLASSES = NUM_CLASSES # Number of classes 
cfg.OUTPUT_DIR = OUTPUT_DIR_PATH # Output dir
cfg.SOLVER.CHECKPOINT_PERIOD = CHECKPOINT # Checkpoint period


## Data augmentation configuration

We overwrite the default Dataset mapper. The dataset mapper is used by the data loader

**You can modify the custom_augmentation if you do data augmentation.**

In [6]:

class CustomDatasetMapper(DatasetMapper):
    @configurable
    def __init__(
        self,
        is_train: bool,
        augmentations: List[Union[T.Augmentation, T.Transform]],
        image_format: str,
        use_instance_mask: bool = False,
        use_keypoint: bool = False,
        instance_mask_format: str = "polygon",
        keypoint_hflip_indices: Optional[np.ndarray] = None,
        precomputed_proposal_topk: Optional[int] = None,
        recompute_boxes: bool = False,
    ):
        
        custom_augmentations = [
            T.RandomApply(T.RandomFlip(prob=0.4, horizontal=False, vertical=True), prob=0.5),
            T.RandomApply(T.RandomBrightness(0.8, 1.2), prob=0.5),
            T.RandomApply(T.RandomContrast(0.6, 1.4), prob=0.5),
            T.RandomApply(T.RandomSaturation(0.8, 1.2), prob=0.5)
        ]
        augmentations.extend(custom_augmentations)

        super().__init__(
            is_train=is_train,
            augmentations=augmentations,
            image_format=image_format,
            use_instance_mask=use_instance_mask,
            use_keypoint=use_keypoint,
            instance_mask_format=instance_mask_format,
            keypoint_hflip_indices=keypoint_hflip_indices,
            precomputed_proposal_topk=precomputed_proposal_topk,
            recompute_boxes=recompute_boxes
        )

    @classmethod
    def from_config(cls, cfg, is_train: bool = True):
        augs = d_utils.build_augmentation(cfg, is_train)
        if cfg.INPUT.CROP.ENABLED and is_train:
            augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))
            recompute_boxes = cfg.MODEL.MASK_ON
        else:
            recompute_boxes = False

        ret = {
            "is_train": is_train,
            "augmentations": augs,
            "image_format": cfg.INPUT.FORMAT,
            "use_instance_mask": cfg.MODEL.MASK_ON,
            "instance_mask_format": cfg.INPUT.MASK_FORMAT,
            "use_keypoint": cfg.MODEL.KEYPOINT_ON,
            "recompute_boxes": recompute_boxes,
        }

        if cfg.MODEL.KEYPOINT_ON:
            ret["keypoint_hflip_indices"] = d_utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)

        if cfg.MODEL.LOAD_PROPOSALS:
            ret["precomputed_proposal_topk"] = (
                cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
                if is_train
                else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
            )
        return ret

We also need to create a new trainer 

In [7]:
class MyTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=CustomDatasetMapper(cfg, is_train=True))

## Training part 

### Without data augmentation 

Let's start the training 

In [6]:
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[07/18 16:13:15 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[07/18 16:13:15 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[07/18 16:13:31 d2.utils.events]: [0m eta: 0:09:38  iter: 19  total_loss: 5.891  loss_cls: 0.6813  loss_box_reg: 0.6393  loss_mask: 0.696  loss_rpn_cls: 3.647  loss_rpn_loc: 0.2639    time: 0.5565  last_time: 0.2731  data_time: 0.5273  last_data_time: 0.0032   lr: 1.9981e-05  max_mem: 2366M
[32m[07/18 16:13:46 d2.utils.events]: [0m eta: 0:10:33  iter: 39  total_loss: 2.611  loss_cls: 0.5902  loss_box_reg: 0.7096  loss_mask: 0.6691  loss_rpn_cls: 0.4023  loss_rpn_loc: 0.156    time: 0.6623  last_time: 0.2725  data_time: 0.5090  last_data_time: 0.0013   lr: 3.9961e-05  max_mem: 2635M
[32m[07/18 16:14:01 d2.utils.events]: [0m eta: 0:10:53  iter: 59  total_loss: 2.149  loss_cls: 0.542  loss_box_reg: 0.6954  loss_mask: 0.6095  loss_rpn_cls: 0.1477  loss_rpn_loc: 0.1542    time: 0.6942  last_time: 1.3165  data_time: 0.5087  last_data_time: 1.0960   lr: 5.9941e-05  max_mem: 2898M
[32m[07/18 16:14:21 d2.utils.events]: [0m eta: 0:10:47  iter: 79  total_loss: 2.208  loss_cls: 0.5082 

### With data augmentation

In [8]:
torch.cuda.empty_cache()
trainer = MyTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[07/19 13:03:02 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[07/19 13:03:03 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[07/19 13:03:25 d2.utils.events]: [0m eta: 0:09:21  iter: 19  total_loss: 6.026  loss_cls: 0.6295  loss_box_reg: 0.4684  loss_mask: 0.6968  loss_rpn_cls: 3.982  loss_rpn_loc: 0.2776    time: 0.8257  last_time: 0.2116  data_time: 0.7822  last_data_time: 0.0165   lr: 1.9981e-05  max_mem: 3594M
[32m[07/19 13:03:39 d2.utils.events]: [0m eta: 0:09:17  iter: 39  total_loss: 2.766  loss_cls: 0.6103  loss_box_reg: 0.679  loss_mask: 0.6673  loss_rpn_cls: 0.6187  loss_rpn_loc: 0.2058    time: 0.7568  last_time: 0.2463  data_time: 0.4681  last_data_time: 0.0201   lr: 3.9961e-05  max_mem: 3594M
[32m[07/19 13:03:56 d2.utils.events]: [0m eta: 0:09:15  iter: 59  total_loss: 2.172  loss_cls: 0.5564  loss_box_reg: 0.6598  loss_mask: 0.6239  loss_rpn_cls: 0.195  loss_rpn_loc: 0.1637    time: 0.7917  last_time: 0.2682  data_time: 0.6015  last_data_time: 0.0066   lr: 5.9941e-05  max_mem: 4057M
[32m[07/19 13:04:12 d2.utils.events]: [0m eta: 0:09:18  iter: 79  total_loss: 2.011  loss_cls: 0.5043

### Resume training 

**If you need to continue to train your model, you can use this part of the code. You can modify the NB_ITERATION.**

In [None]:
NB_ITERATION = 2000

In [10]:
MAX_ITER = MAX_ITER + NB_ITERATION
cfg.MODEL.WEIGHTS = os.path.join(OUTPUT_DIR_PATH, 'model_final.pth')  # Chemin vers le dernier checkpoint
cfg.SOLVER.MAX_ITER = MAX_ITER
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=True)
trainer.train()

[32m[07/19 14:32:42 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

# Evaluation of the model

**To evaluate the model, you can monitor the loss function with tensorboard.**

In [18]:
%load_ext tensorboard

**We can evaluate the mean average precision (MAP).**

In [19]:
cfg.MODEL.WEIGHTS = os.path.join(OUTPUT_DIR_PATH, 'model_final.pth')  # path to the model we just trained 2024-07-19-13-02-30
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

AssertionError: Checkpoint bloc_segmentation\mask_rcnn_R_101_FPN_3x\2024-08-26-19-13-40\model_final.pth not found!

**The 2 part below take into account the loading of the model, you can use them if you leave or restart the jupyter after the training.**

**Precision with data augmentation**

In [12]:
evaluation_path = os.path.join(OUTPUT_DIR_PATH, 'evaluation', f'{MAX_ITER}_iter')
os.makedirs(evaluation_path, exist_ok=True)
evaluator = COCOEvaluator("bloc_segmentation_test", output_dir=evaluation_path)
val_loader = build_detection_test_loader(cfg, "bloc_segmentation_test")
print(inference_on_dataset(predictor.model, val_loader, evaluator))

[32m[07/19 15:03:11 d2.data.datasets.coco]: [0mLoaded 28 images in COCO format from datasets/test/annotations/instances_default.json
[32m[07/19 15:03:11 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[07/19 15:03:11 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[07/19 15:03:11 d2.data.common]: [0mSerializing 28 elements to byte tensors and concatenating them all ...
[32m[07/19 15:03:11 d2.data.common]: [0mSerialized dataset takes 0.82 MiB
[32m[07/19 15:03:11 d2.evaluation.evaluator]: [0mStart inference on 28 batches
[32m[07/19 15:03:29 d2.evaluation.evaluator]: [0mInference done 1/28. Dataloading: 6.5550 s/iter. Inference: 5.8218 s/iter. Eval: 5.8628 s/iter. Total: 18.2406 s/iter. ETA=0:08:12
[32m[07/19 15:04:25 d2.evaluation.evaluator]: [0mInference done 2/28. Dataloading: 3.2778 s/iter

**Precision without data augmentation**

In [14]:
WEIGHTS_PATH = r"C:\Users\Jalil\Desktop\PROJECTS\Vrak3D\training\bloc_segmentation\mask_rcnn_R_101_FPN_3x\2024-07-19-13-02-30"

In [15]:
cfg.MODEL.WEIGHTS = os.path.join(WEIGHTS_PATH, 'model_final.pth')
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [16]:
evaluation_path = os.path.join(WEIGHTS_PATH, 'evaluation', f'{MAX_ITER}_iter')
os.makedirs(evaluation_path, exist_ok=True)
evaluator = COCOEvaluator("bloc_segmentation_test", output_dir=evaluation_path)
val_loader = build_detection_test_loader(cfg, "bloc_segmentation_test")
print(inference_on_dataset(predictor.model, val_loader, evaluator))

Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.552
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.707
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.647
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.081
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.584
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.012
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.114
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.591
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.085
 Average Recall     (AR) @[ IoU=0.50:0.