<a href="https://colab.research.google.com/github/Michal287/computer_vision/blob/main/Detectron2_MaskRCNN_RGBD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive

import sys
import os
import distutils.core
import random
import tqdm
import cv2
import matplotlib.pyplot as plt
import numpy as np
from google.colab.patches import cv2_imshow
import copy
import torch

# Connect do google drive
drive.mount('/content/drive')

# Download syntetic data
!7z x /content/drive/MyDrive/final2.zip
!mkdir /content/321/depth_images
!cp -r /content/321/images/* /content/321/depth_images/
# Download real data
# !7z x /content/drive/MyDrive/!7z x /content/drive/MyDrive/final2.zip
# !cp -r /content/drive/MyDrive/real_data2 /content

# Copy models
!cp /content/drive/MyDrive/Model_Final_41/model_final_41.pth /content/model_final.pth
!cp /content/drive/MyDrive/Model_Final_41/config_41.yaml /content/config.yaml

# Download and install detectron
!pip install 'git+https://github.com/facebookresearch/detectron2'

from detectron2.config import get_cfg, LazyConfig
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2 import model_zoo
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog, build_detection_test_loader, build_detection_train_loader, DatasetCatalog
from detectron2.data import detection_utils as utils
import detectron2.data.transforms as T

import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

Mounted at /content/drive

7-Zip [64] 16.02 : Copyright (c) 1999-2016 Igor Pavlov : 2016-05-21
p7zip Version 16.02 (locale=en_US.UTF-8,Utf16=on,HugeFiles=on,64 bits,2 CPUs Intel(R) Xeon(R) CPU @ 2.20GHz (406F0),ASM,AES-NI)

Scanning the drive for archives:
  0M Scan /content/drive/MyDrive/                                 1 file, 698444441 bytes (667 MiB)

Extracting archive: /content/drive/MyDrive/final2.zip
--
Path = /content/drive/MyDrive/final2.zip
Type = zip
Physical Size = 698444441

  0%      1% 10 - 321/images/image_120268.png                                       2% 17 - 321/images/image_130202.png                                       4% 33 - 321/images/image_145290.png                                     

# Modify Model

In [None]:
from detectron2.modeling.backbone import build_resnet_backbone
from detectron2.modeling import Backbone, ShapeSpec, BACKBONE_REGISTRY
from detectron2.modeling.backbone.fpn import FPN, LastLevelMaxPool

@BACKBONE_REGISTRY.register()
def build_resnet_fpn_backbone8(cfg, input_shape: ShapeSpec):
    """
    Args:
        cfg: a detectron2 CfgNode

    Returns:
        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.

    """
    bottom_up = build_resnet_backbone(cfg, ShapeSpec(channels=4, height=None, width=None, stride=None))
    in_features = cfg.MODEL.FPN.IN_FEATURES
    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
    in_channels_p6p7 = out_channels
    backbone = FPN(
        bottom_up=bottom_up,
        in_features=in_features,
        out_channels=out_channels,
        norm=cfg.MODEL.FPN.NORM,
        top_block=LastLevelMaxPool(),
        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
    )
    return backbone

# Load Data

In [None]:
register_coco_instances("tulips_train", {}, f"data/labels.json", f"data/images")
# DatasetCatalog.remove("tulips_train")
# tulips_train = DatasetCatalog.get("tulips_train")
# tulips_metadata_train = MetadataCatalog.get("tulips_train")

# Load Config

In [None]:
# Documentation https://detectron2.readthedocs.io/en/latest/modules/config.html?highlight=config#yaml-config-references

# Load pretrained model
cfg = get_cfg()
cfg.merge_from_file("config.yaml")
cfg.MODEL.BACKBONE.NAME = 'build_resnet_fpn_backbone8'
# cfg.MODEL.FPN.IN_FEATURES = ['conv3']
# cfg.MODEL.RPN.IN_FEATURES = ['conv3']
# cfg.MODEL.ROI_HEADS.IN_FEATURES = ['conv3']
cfg.MODEL.WEIGHTS = "model_final.pth"

# For 4th dims
cfg.MODEL.PIXEL_MEAN = [103.53, 116.28, 123.675, 0.0]
cfg.MODEL.PIXEL_STD = [1, 1, 1, 1]

# Set dataset
cfg.DATASETS.TRAIN = ("tulips_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2 # Number of data loading threads
cfg.SOLVER.IMS_PER_BATCH = 4  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.01 # Learning rate
cfg.SOLVER.MAX_ITER = 1000 # Iterations

In [None]:
# DatasetCatalog.remove("tulips_train")

In [None]:
image = cv2.imread('data/images/image_102310.png')
depth = np.zeros((1024, 1024, 1), dtype=np.uint8)
img = np.concatenate([image, depth], axis=2)

In [None]:
img.shape

(1024, 1024, 4)

# Agumentation

In [None]:
def rgb_filename_to_depth_filename(path):
    depth_image_path = path.split("/")
    depth_image_path[1] = 'depth_images'
    return "/".join(depth_image_path)

def custom_mapper(dataset_dict):
    dataset_dict = copy.deepcopy(dataset_dict)
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    depth = np.zeros((image.shape[0], image.shape[1], 1), dtype=np.uint8)

    # Load depth image path
    # depth_image_path = rgb_filename_to_depth_filename(dataset_dict["file_name"])
    # depth_image = utils.read_image(depth_image_path, format="BGR")

    transform_list = [
        T.RandomApply(T.RandomBrightness(0.6, 2), prob=0.2),

        # T.RandomApply(T.RandomSaturation(intensity_min=0.6, intensity_max=1.3), prob=0.2),
        # T.RandomApply(T.RandomContrast(intensity_min=0.7, intensity_max=1.7), prob=0.2),

        # T.RandomApply(T.RandomRotation(angle=[-10, 10], expand=False, sample_style='range'), prob=0.2),
        # T.RandomApply(T.RandomFlip(prob=0.2, horizontal=True, vertical=False), prob=0.2),
        # T.RandomApply(T.RandomFlip(prob=0.2, horizontal=False, vertical=True), prob=0.2),
    ]

    # Apply transorfmations on image and return which transformation has been used
    image, transforms = T.apply_transform_gens(transform_list, image)
    # depth_image, transforms = T.apply_transform_gens(transform_list, depth_image)
    # Transform image to tensor

    # dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
    # dataset_dict["depth_images"] = torch.as_tensor(depth_image.transpose(2, 0, 1).astype("float32"))
    img = np.concatenate([image, depth], axis=2)
    dataset_dict["image"] = torch.as_tensor(img.transpose(2, 0, 1).astype("float32"))

    # Return annotations if exist
    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]

    # Apply image transformation on anntations if exist
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)

    return dataset_dict


class CustomTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=custom_mapper)

In [None]:
from detectron2.modeling import BACKBONE_REGISTRY

print(BACKBONE_REGISTRY)

Registry of BACKBONE:
╒═════════════════════════════════════╤══════════════════════════════════════════════════════════════════╕
│ Names                               │ Objects                                                          │
╞═════════════════════════════════════╪══════════════════════════════════════════════════════════════════╡
│ build_resnet_backbone               │ <function build_resnet_backbone at 0x7f664e1e0e50>               │
├─────────────────────────────────────┼──────────────────────────────────────────────────────────────────┤
│ build_resnet_fpn_backbone           │ <function build_resnet_fpn_backbone at 0x7f664e1e13f0>           │
├─────────────────────────────────────┼──────────────────────────────────────────────────────────────────┤
│ build_retinanet_resnet_fpn_backbone │ <function build_retinanet_resnet_fpn_backbone at 0x7f664e1e1480> │
├─────────────────────────────────────┼──────────────────────────────────────────────────────────────────┤
│ build_resnet_

# Trening

In [None]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CustomTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[05/10 00:33:03 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

backbone.bottom_up.stem.conv1.weight


[05/10 00:33:06 d2.engine.train_loop]: Starting training from iteration 0
[05/10 00:33:44 d2.utils.events]:  eta: 0:31:33  iter: 19  total_loss: 1.77  loss_cls: 0.6479  loss_box_reg: 0.501  loss_mask: 0.4223  loss_rpn_cls: 0.1089  loss_rpn_loc: 0.0778    time: 1.9334  last_time: 1.9241  data_time: 0.0892  last_data_time: 0.0688   lr: 0.00019981  max_mem: 7411M
[05/10 00:34:23 d2.utils.events]:  eta: 0:30:38  iter: 39  total_loss: 0.9947  loss_cls: 0.2917  loss_box_reg: 0.3541  loss_mask: 0.2449  loss_rpn_cls: 0.05125  loss_rpn_loc: 0.05063    time: 1.9225  last_time: 1.9037  data_time: 0.0711  last_data_time: 0.0669   lr: 0.00039961  max_mem: 7412M
[05/10 00:35:01 d2.utils.events]:  eta: 0:29:58  iter: 59  total_loss: 0.7942  loss_cls: 0.2213  loss_box_reg: 0.2894  loss_mask: 0.1911  loss_rpn_cls: 0.0192  loss_rpn_loc: 0.04678    time: 1.9248  last_time: 1.8779  data_time: 0.0704  last_data_time: 0.0622   lr: 0.00059941  max_mem: 7412M
[05/10 00:35:39 d2.utils.events]:  eta: 0:29:20  i

# Tensorboard

In [None]:
%load_ext tensorboard
%tensorboard --logdir output

In [None]:
!cp /content/drive/MyDrive/Model_Final_41/model_final_41.pth /content/model_final_41.pth
!cp /content/drive/MyDrive/Model_Final_41/config_41.yaml /content/config41.yaml

# Predict

In [None]:
cfg = get_cfg()
cfg.merge_from_file("config.yaml")
cfg.MODEL.WEIGHTS = "output/model_final.pth"
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
predictor = DefaultPredictor(cfg)

[05/10 01:45:29 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from model_final.pth ...


backbone.bottom_up.stem.conv1.weight


In [None]:
def create_plot(output_name, image_dir, sample=3):
  images = os.listdir(image_dir)

  image_counter = 0
  plt.figure(figsize=(30, sample * 10))

  for image_path in random.sample(images, sample):
      img = cv2.imread(os.path.join(image_dir, image_path)) # Model ten przyjmuje BGR
      img = cv2.resize(img, (1024, 1024))
      depth = np.zeros((1024, 1024, 1), dtype=np.uint8)
      img2 = np.concatenate([img, depth], axis=2)

      outputs = predictor(img2)

      v = Visualizer(img[:, :, ::-1],
                    scale=1,
                    instance_mode=ColorMode.IMAGE_BW)

      # Plot nativ Image
      plt.subplot(sample, 3, image_counter+1)
      plt.title(f"Image-{image_path}")
      plt.imshow(img)

      # Clear Segmentation
      for mask in outputs["instances"].pred_masks.to('cpu'):
            v.draw_soft_mask(mask)

      vg = v.get_output()
      mask =  vg.get_image()[:, :, ::-1]

      plt.subplot(sample, 3, image_counter+2)
      plt.title("Clear Segmentation")
      plt.imshow(mask)

      # Segmentation with bounding boxes and propability
      out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

      plt.subplot(sample, 3, image_counter+3)
      plt.title("Segmentation")
      plt.imshow(out.get_image()[:, :, ::-1])

      image_counter += 3

  plt.savefig(output_name)

In [None]:
create_plot('segment55ation.pdf', image_dir='/content/data/images', sample=5)