In [None]:
import os

In [None]:
from tqdm import tqdm, tnrange
import shutil
import csv

# Data Loading 

In [None]:
# _Start: create a directory for dataset 
data_root = "../imaterialist-fashion-2020-fgvc7"
os.makedirs(data_root, exist_ok = True)
# _End: create a directory for dataset 

In [None]:
train_json_path = os.path.join(data_root, 'train2.json')
validation_json_path = os.path.join(data_root, 'validation2.json')

# Get train.json in coco format

#generate_COCO_formatted_json(boxes=backpack_boxes, 
#                             mask_dir=backpack_mask_dir, 
#                             image_dir=backpack_image_dir, 
#                             image_list=train_images, 
#                             json_filepath=train_json_path)

# Get validation.json in coco format

#generate_COCO_formatted_json(boxes=backpack_boxes, 
#                             mask_dir=backpack_mask_dir, 
#                             image_dir=backpack_image_dir, 
#                             image_list=val_images, 
#                             json_filepath=validation_json_path)

In [None]:
train_json_path

# Train using Detectron2

In [None]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

In [None]:
# import some common detectron2 utilities

# model_zoo has a lots of pre-trained model
from detectron2 import model_zoo

# DefaultTrainer is a class for training instance segmentation model and inference
from detectron2.engine import DefaultTrainer, DefaultPredictor

# detectron2 has its configuration format
from detectron2.config import get_cfg

# detectron2 has implemented Visualizer of object detection
from detectron2.utils.visualizer import Visualizer, ColorMode

# from DatasetCatalog, detectron2 gets dataset and from MetadatCatalog it gets metadata of the dataset
from detectron2.data import DatasetCatalog, MetadataCatalog

# Registers COCO format datasets
from detectron2.data.datasets import register_coco_instances

# COCOEvaluator based on COCO evaluation metric, inference_on_dataset is used for evaluation for a given metric
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

# build_detection_test_loader, used to create test loader for evaluation
from detectron2.data import build_detection_test_loader

In [None]:
# import some common libraries
import numpy as np
import cv2
import random
import os
import json
import matplotlib.pyplot as plt

### Register Dataset 
* As the dataset is already in COCO format, we can use the following method 
    > ```register_coco_instances(name, metadata, json_file, image_root)```

* This method internally class <b>load_coco_json</b> method, registers dataset and also adds metadata.

In [None]:
backpack_image_dir = os.path.join(data_root, 'train2')
train_data_name = 'backpack_train'
val_data_name = 'backpack_val'

register_coco_instances(train_data_name, {}, train_json_path, backpack_image_dir)
register_coco_instances(val_data_name, {}, validation_json_path, backpack_image_dir)

In [None]:
backpack_train_metadata = MetadataCatalog.get(train_data_name)

dataset_dicts = DatasetCatalog.get(name=train_data_name)

for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=backpack_train_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    plt.figure(figsize = (12, 12))
    plt.imshow(vis.get_image())
    plt.show()

## Set Config and Import model files 
Get default configuration of detectron2 and update parameters according to our requirements.

Get more details of the configuration <a  href="https://detectron2.readthedocs.io/modules/config.html#">here</a>.

In [None]:
#Initialize with default configuration
cfg = get_cfg()

# update configuration with MaskRCNN configuration
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))

# Let's replace the detectron2 default train dataset with our train dataset.
cfg.DATASETS.TRAIN = (train_data_name,)

# No metric implemented for the test dataset, we will have to update cfg.DATASET.TEST with empty tuple
cfg.DATASETS.TEST = (val_data_name,)

# data loader configuration
cfg.DATALOADER.NUM_WORKERS = 2

# Update model URL in detectron2 config file
# Let training initialize from model zoo
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  

# batch size
cfg.SOLVER.IMS_PER_BATCH = 2

# choose a good learning rate
cfg.SOLVER.BASE_LR = 0.0005

# We need to specify the number of iteration for training in detectron2, not the number of epochs.
cfg.SOLVER.MAX_ITER = 40000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128

# number of output class
# we have only one class that is Backpack
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 351

## Training 

In [None]:
# update create ouptput directory 
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

In [None]:
# training

# Create a trainer instance with the configuration.
trainer = DefaultTrainer(cfg) 

# if resume=False, because we don't have trained model yet. It will download model from model url and load it
trainer.resume_or_load(resume=False)

# start training
trainer.train()

## Inference using the trained model 

In [None]:
# inference on our fine-tuned model

# By default detectron2 save the model with name model_final.pth
# update the model path in configuration that will be used to load the model
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")

# set the testing threshold for this model
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.9


cfg.DATASETS.TEST = (val_data_name,)

backpack_test_metadata = MetadataCatalog.get(val_data_name)

# create a predictor instance with the configuration (it has our fine-tuned model)
# this predictor does prdiction on a single image
predictor = DefaultPredictor(cfg)

In [None]:
# let's have a look on prediction
test_dataset_dicts = DatasetCatalog.get(val_data_name)

for d in random.sample(test_dataset_dicts, 3):
    print(d["file_name"])
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=backpack_test_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    plt.figure(figsize = (12, 12))
    plt.imshow(vis.get_image())
    plt.show()

## Inference on Video

In [None]:
def inference_on_video(video_path, out_video_path):
    cap = cv2.VideoCapture(video_path)
    cnt = 0

    output_frames = []

    while True:
        ret, im = cap.read()

        if not ret:
            break

        if cnt%5 == 0:
            outputs = predictor(im)
            v = Visualizer(im[:, :, ::-1],
                           metadata=backpack_test_metadata, 
                           scale=0.8, 
#                          instance_mode=ColorMode.IMAGE_BW
                          )
            print(outputs)
            v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
            output_frames.append(v.get_image()[:, :, ::-1])

        cnt = cnt + 1

    height, width, _ = output_frames[0].shape
    size = (width,height)
    out = cv2.VideoWriter(out_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 10, size)

    for i in range(len(output_frames)):
        out.write(output_frames[i])

    out.release()

In [None]:
#Output video is stored as out.mp4 with backpack segmentations
video_path = os.path.join(data_root, "bag_shopping.mp4")
output_path = os.path.join(data_root, "bag_shopping_output.mp4")

inference_on_video(video_path, output_path)