In [1]:
import torch
import torchvision
import cv2

In [1]:
import os
import numpy as np
import json
import random
import matplotlib.pyplot as plt
%matplotlib inline

# pip install 'git+https://github.com/facebookresearch/detectron2.git'
import sys
sys.path.append("detectron2-main/")
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.utils.logger import setup_logger
from detectron2.data.datasets import register_coco_instances

from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, Visualizer

from detectron2.utils.visualizer import Visualizer
from detectron2.utils.visualizer import ColorMode
import cv2
import pickle

In [11]:
#### Utils ####
def plot_samples(dataset_name, n=1):
    dataset_custom = DatasetCatalog.get(dataset_name)
    dataset_custom_metadata = MetadataCatalog.get(dataset_name)

    for s in random.sample(dataset_custom, n):
        img = cv2.imread(s["file_name"])
        v = Visualizer(img[:,:,::-1], metadata=dataset_custom_metadata, scale = 1)
        v = v.draw_dataset_dict(s)
        plt.figure(figsize=(5,5))
        plt.imshow(v.get_image())
        plt.show()

def get_train_cfg(config_file_path, checkpoint_url, train_dataset_name, test_dataset_name, num_classes, device, output_dir):
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(config_file_path))
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(checkpoint_url)
    cfg.DATASETS.TRAIN = (train_dataset_name,)
    cfg.DATASETS.TEST = (test_dataset_name,)
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.SOLVER.IMS_PER_BATCH = 5
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 10000
    cfg.SOLVER.STEPS = []
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_classes
    cfg.MODEL.deivce = device
    cfg.OUTPUT_DIR = output_dir

    return cfg 

def on_image(image_path, predictor):
    im = cv2.imread(image_path)
    outputs = predictor(im)
    print(outputs)
    v = Visualizer(im[:,:,::-1], metadata={}, scale =1, instance_mode=ColorMode.SEGMENTATION)
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=(5,5))
    plt.imshow(v.get_image())
    plt.show()


In [4]:
## If doing instance segmentation:
config_file_path = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
checkpoint_url = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
output_dir = "./output"
num_classes = 5

device = "cuda"

train_dataset_name = "LP_train"
train_images_path = "train"
train_json_annot_path = "train/_annotations.coco.json"

test_dataset_name = "LP_valid"
test_images_path = "test"
test_json_annot_path = "test/_annotations.coco.json"

cfg_save_path = "IS_cfg.pickle"

register_coco_instances(name = train_dataset_name, metadata={}, json_file=train_json_annot_path, image_root=train_images_path)
register_coco_instances(name = test_dataset_name, metadata={}, json_file=test_json_annot_path, image_root=test_images_path)

# plot_samples(train_dataset_name)

cfg = get_train_cfg(config_file_path, checkpoint_url, train_dataset_name, test_dataset_name, num_classes, device, output_dir)\

with open(cfg_save_path, 'wb') as f:
    pickle.dump(cfg, f, protocol=pickle.HIGHEST_PROTOCOL)

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)



In [5]:
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume = True)

trainer.train()

[32m[09/19 05:32:09 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[09/19 05:32:45 d2.utils.events]: [0m eta: 3:56:50  iter: 2019  total_loss: 0.4494  loss_cls: 0.1049  loss_box_reg: 0.08401  loss_mask: 0.2481  loss_rpn_cls: 0.0006694  loss_rpn_loc: 0.01058  time: 1.5509  data_time: 0.0870  lr: 0.00025  max_mem: 3855M
[32m[09/19 05:33:04 d2.utils.events]: [0m eta: 3:05:11  iter: 2039  total_loss: 0.4275  loss_cls: 0.09861  loss_box_reg: 0.07769  loss_mask: 0.2288  loss_rpn_cls: 0.0007763  loss_rpn_loc: 0.01011  time: 1.2515  data_time: 0.0124  lr: 0.00025  max_mem: 3855M
[32m[09/19 05:33:23 d2.utils.events]: [0m eta: 1:33:49  iter: 2059  total_loss: 0.4386  loss_cls: 0.09588  loss_box_reg: 0.07568  loss_mask: 0.2352  loss_rpn_cls: 0.0007175  loss_rpn_loc: 0.01004  time: 1.1441  data_time: 0.0145  lr: 0.00025  max_mem: 3855M
[32m[09/19 05:33:42 d2.utils.events]: [0m eta: 1:25:58  iter: 2079  total_loss: 0.439  loss_cls: 0.105  loss_box_reg: 0.08237  loss_mask: 0.2392  loss_rpn_cls: 0.002165  loss_rpn_loc: 0.01022  time: 1.0895  data_time: 0

In [30]:

cfg_save_path = "IS_cfg.pickle"
with open(cfg_save_path, 'rb') as f:
    cfg = pickle.load(f)

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4

predictor = DefaultPredictor(cfg)

def test_output(dataset_name, n=1):
    dataset_custom = DatasetCatalog.get(dataset_name)
    dataset_custom_metadata = MetadataCatalog.get(dataset_name)

    for s in random.sample(dataset_custom, n):
        on_image(s["file_name"], predictor)

# test_output(test_dataset_name, 5)
image_path = "1.jpg"
im = cv2.imread(image_path)
outputs = predictor(im)
pred_img = np.zeros(im.shape, dtype = np.uint8)
labels = outputs["instances"].pred_classes.cpu().numpy()
masks = outputs["instances"].pred_masks.cpu().numpy()

label_map = {
    1: (255, 0, 0),
    2: (0, 255, 0),
    3: (0, 0, 255),
    4: (255, 255, 0),
    5: (0, 255, 255)
}

for i in range(masks.shape[0]):
    for w in range(masks[i].shape[0]):
        for j in range(masks[i].shape[1]):
            if masks[i][w][j] != False:
                pred_img[w][j] = label_map[labels[i]]

cv2.imwrite("out.png", pred_img)


True