### Inspect and Clean up the Dataset
Procedures follow the example on the Mask_RCNN GitHub Page: https://github.com/matterport/Mask_RCNN/blob/master/samples/balloon/inspect_balloon_data.ipynb

In [5]:
import os
import sys
import itertools
import math
import logging
import json
import re
import random
from collections import OrderedDict
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
from matplotlib.patches import Polygon

"""
Need to Install mrcnn and detectron2
"""

import detectron2

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")
# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log

from samples.balloon import balloon

%matplotlib inline

ModuleNotFoundError: No module named 'mrcnn'

### Image Resize with Detectron
This is a built-in function in Detectron that can be used to resize the dataset. This resize likely does not use padding.

In [None]:
import detectron2.data.transforms as T

# Set dataloader to resize
from detectron2.data import DatasetMapper   # the default mapper
dataloader = build_detection_train_loader(cfg,
   mapper=DatasetMapper(cfg, is_train=True, augmentations=[
      T.Resize((800, 800))
   ]))

### Image Resize with MRCNN
This piece of code resize the image and the mask altogether

In [None]:
# Load random image and mask.
image_id = np.random.choice(dataset.image_ids, 1)[0]
image = dataset.load_image(image_id)
mask, class_ids = dataset.load_mask(image_id)
original_shape = image.shape
# Resize
image, window, scale, padding, _ = utils.resize_image(
    image, 
    min_dim=config.IMAGE_MIN_DIM, 
    max_dim=config.IMAGE_MAX_DIM,
    mode=config.IMAGE_RESIZE_MODE)
mask = utils.resize_mask(mask, scale, padding)
# Compute Bounding box
bbox = utils.extract_bboxes(mask)

# Display image and additional stats
print("image_id: ", image_id, dataset.image_reference(image_id))
print("Original shape: ", original_shape)
log("image", image)
log("mask", mask)
log("class_ids", class_ids)
log("bbox", bbox)
# Display image and instances
visualize.display_instances(image, bbox, mask, class_ids, dataset.class_names)

### Convert Our Data to Dictionary and Register to Detectron2
Procedrues follow the tutorial: https://wendeehsu.medium.com/instance-segmentation-with-detectron2-127fbe01b20b <br/>
We first convert our .json annotation files to Detectron2 dictionary.

In [None]:
def get_dicts(img_dir):
    """
    This function takes in our annotated json file and convert it to 
    the dictionary file type that is accepted by Detectron2.
    """
    json_file = os.path.join(img_dir, "via_export_json.json") ### Need to set out own directory and json filename here
    
    with open(json_file) as f:
        imgs_anns = json.load(f)
    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}
        
        filename = os.path.join(img_dir, v["filename"])
        height, width = cv2.imread(filename).shape[:2]
        
        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
      
        annos = v["regions"]
        objs = []
        for anno in annos:
            anno = anno["shape_attributes"]
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]
            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,
                "iscrowd": 0
                }
            objs.append(obj)
            print(objs)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

Then Register the voltage discharge curve and dqdv curve dataset with Detectron2

In [None]:
from detectron2.data import DatasetCatalog, MetadataCatalog
path = "/content/drive/.../wendee/images" ##### Need to set the path to our own image directory

for file in ["training_data_discharge_curve", "testing_data_discharge_curve"]:
    DatasetCatalog.register("voltage_discharge_curve_" + file, lambda file=file: get_dicts(path + "/" +  file))
    MetadataCatalog.get("voltage_discharge_curve_" + d).set(thing_classes=["DischargeCurve"])

for file in ["training_data_dqdv_curve", "testing_data_dqdv_curve"]:
    DatasetCatalog.register("dqdv_curve_" + file, lambda file=file: get_dicts(path + "/" +  file))
    MetadataCatalog.get("dqdv_curve_" + d).set(thing_classes=["DqdvCurve"])

Visualize the data to see if it's registered okay.

In [6]:
import random
from detectron2.utils.visualizer import Visualizer

dataset_dicts = get_dicts(path + "training_data_discharge_curve")
for data in random.sample(dataset_dicts, 3):
    img = cv2.imread(data["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("voltage_discharge_curve_training_data_discharge_curve"), scale=0.5)
    vis = visualizer.draw_dataset_dict(data)
    cv2_imshow(vis.get_image()[:, :, ::-1])

NameError: name 'd' is not defined