# 0. Import Library

In [1]:
import os
if not os.path.exists("./tfdet"):
    !git clone -q http://github.com/burf/tfdetection.git
    !mv ./tfdetection/tfdet ./tfdet
    !rm -rf ./tfdetection

In [2]:
#ignore warning
import warnings, os
warnings.filterwarnings(action = "ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

import tfdet

# 1. Load Dataset

In [3]:
import os
path = "./balloon"
if not os.path.exists(path):
    !wget -qq --no-check-certificate http://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip
    !unzip -qq balloon_dataset.zip
print(tfdet.dataset.util.tree_dir(path))

balloon/
    train/
        605521662_a470fef77f_b.jpg
        9330497995_4cf0438cb6_k.jpg
        5178670692_63a4365c9c_b.jpg
        ...
    val/
        4838031651_3e7b5ea5c7_b.jpg
        16335852991_f55de7958d_k.jpg
        24631331976_defa3bb61f_k.jpg
        ...


In [4]:
tr_path = os.path.join(path, "train/via_region_data.json")
te_path = os.path.join(path, "val/via_region_data.json")

tr_pipe = tfdet.dataset.balloon.load_pipe(tr_path, mask = True)
te_pipe = tfdet.dataset.balloon.load_pipe(te_path, mask = True)

label = tfdet.dataset.balloon.LABEL

out = next(iter(tr_pipe))
image, y_true, bbox_true = out[:3]
mask_true = out[3] if 3 < len(out) else None

if mask_true is None:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}".format(image.shape, y_true.shape, bbox_true.shape))
else:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}, mask_true:{3}".format(image.shape, y_true.shape, bbox_true.shape, mask_true.shape))

x_true:(), y_true:(1, 1), bbox_true:(1, 4), mask_true:(1, 1536, 2048, 1)


# 2. Default Pipeline

In [5]:
tr_pipe = tfdet.dataset.balloon.load_pipe(tr_path, mask = True)
tr_pipe = tfdet.dataset.pipeline.load(tr_pipe)
#tr_pipe = tfdet.dataset.pipeline.args2dict(tr_pipe) #for train_model to object detection
tr_pipe = tfdet.dataset.pipeline.resize(tr_pipe, image_shape = [512, 512], keep_ratio = True)
tr_pipe = tfdet.dataset.pipeline.filter_annotation(tr_pipe, min_scale = 2, min_instance_area = 1)
tr_pipe = tfdet.dataset.pipeline.label_encode(tr_pipe, label = label)
tr_pipe = tfdet.dataset.pipeline.normalize(tr_pipe, mean = [123.675, 116.28, 103.53], std = [58.395, 57.12, 57.375], bbox_normalize = True)
tr_pipe = tfdet.dataset.pipeline.pad(tr_pipe, image_shape = [512, 512], max_pad_size = 100, mode = "both")
tr_pipe = tfdet.dataset.pipeline.cast(tr_pipe, map = {"x_true":tf.float32, "y_true":tf.float32, "bbox_true":tf.float32, "mask_true":tf.float32})
tr_pipe = tr_pipe.batch(4).prefetch(1)

out = next(iter(tr_pipe))
image, y_true, bbox_true = out[:3]
mask_true = out[3] if 3 < len(out) else None

if mask_true is None:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}".format(image.shape, y_true.shape, bbox_true.shape))
else:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}, mask_true:{3}".format(image.shape, y_true.shape, bbox_true.shape, mask_true.shape))

x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)


# 3. Augmentation

3-1. Default

In [6]:
tr_pipe = tfdet.dataset.balloon.load_pipe(tr_path, mask = True)
tr_pipe = tfdet.dataset.pipeline.load(tr_pipe)
#tr_pipe = tfdet.dataset.pipeline.args2dict(tr_pipe) #for train_model to object detection
tr_pipe = tfdet.dataset.pipeline.weak_augmentation(tr_pipe, crop_shape = None, p_flip = 0.5, min_area = 0., min_visibility = 0.) #If crop_shape is shape or ratio, apply random_crop.
tr_pipe = tfdet.dataset.pipeline.resize(tr_pipe, image_shape = [512, 512], keep_ratio = True)
tr_pipe = tfdet.dataset.pipeline.filter_annotation(tr_pipe, min_scale = 2, min_instance_area = 1)
tr_pipe = tfdet.dataset.pipeline.label_encode(tr_pipe, label = label)
tr_pipe = tfdet.dataset.pipeline.normalize(tr_pipe, mean = [123.675, 116.28, 103.53], std = [58.395, 57.12, 57.375], bbox_normalize = True)
tr_pipe = tfdet.dataset.pipeline.pad(tr_pipe, image_shape = [512, 512], max_pad_size = 100, mode = "both")
tr_pipe = tfdet.dataset.pipeline.cast(tr_pipe, map = {"x_true":tf.float32, "y_true":tf.float32, "bbox_true":tf.float32, "mask_true":tf.float32})
tr_pipe = tr_pipe.batch(4).prefetch(1)

out = next(iter(tr_pipe))
image, y_true, bbox_true = out[:3]
mask_true = out[3] if 3 < len(out) else None

if mask_true is None:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}".format(image.shape, y_true.shape, bbox_true.shape))
else:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}, mask_true:{3}".format(image.shape, y_true.shape, bbox_true.shape, mask_true.shape))

x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)


3-2. Albumentations

In [7]:
import albumentations as A
import cv2

tr_pipe = tfdet.dataset.balloon.load_pipe(tr_path, mask = True)
tr_pipe = tfdet.dataset.pipeline.load(tr_pipe)
#tr_pipe = tfdet.dataset.pipeline.args2dict(tr_pipe) #for train_model to object detection
tr_pipe = tfdet.dataset.pipeline.albumentations(tr_pipe,
                                                transform = [A.CLAHE(p = 0.1, clip_limit = 4., tile_grid_size = (8, 8)),
                                                             A.RandomBrightnessContrast(p = 0.1, brightness_limit = 0.2, contrast_limit = 0.2),
                                                             A.RandomGamma(p = 0.1, gamma_limit = [80, 120]),
                                                             A.Blur(p = 0.1),
                                                             A.MedianBlur(p = 0.1),
                                                             A.ToGray(p = 0.1),
                                                             A.RGBShift(p = 0.1, r_shift_limit = 10, g_shift_limit = 10, b_shift_limit = 10),
                                                             A.HueSaturationValue(p = 0.1, hue_shift_limit = 10, sat_shift_limit = 40, val_shift_limit = 50),
                                                             A.ChannelShuffle(p = 0.1),
                                                             #A.ShiftScaleRotate(p = 0.1, rotate_limit = 30, shift_limit = 0.0625, scale_limit = 0.1, interpolation = cv2.INTER_LINEAR, border_mode = cv2.BORDER_CONSTANT),
                                                             #A.RandomResizedCrop(p = 0.1, height = 512, width = 512, scale = [0.8, 1.0], ratio = [0.9, 1.11]),
                                                             A.ImageCompression(p = 0.1, quality_lower = 75),
                                                            ],
                                                min_area = 0., min_visibility = 0.)
tr_pipe = tfdet.dataset.pipeline.resize(tr_pipe, image_shape = [512, 512], keep_ratio = True)
tr_pipe = tfdet.dataset.pipeline.filter_annotation(tr_pipe, min_scale = 2, min_instance_area = 1)
tr_pipe = tfdet.dataset.pipeline.label_encode(tr_pipe, label = label)
tr_pipe = tfdet.dataset.pipeline.normalize(tr_pipe, mean = [123.675, 116.28, 103.53], std = [58.395, 57.12, 57.375], bbox_normalize = True)
tr_pipe = tfdet.dataset.pipeline.pad(tr_pipe, image_shape = [512, 512], max_pad_size = 100, mode = "both")
tr_pipe = tfdet.dataset.pipeline.cast(tr_pipe, map = {"x_true":tf.float32, "y_true":tf.float32, "bbox_true":tf.float32, "mask_true":tf.float32})
tr_pipe = tr_pipe.batch(4).prefetch(1)

out = next(iter(tr_pipe))
image, y_true, bbox_true = out[:3]
mask_true = out[3] if 3 < len(out) else None

if mask_true is None:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}".format(image.shape, y_true.shape, bbox_true.shape))
else:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}, mask_true:{3}".format(image.shape, y_true.shape, bbox_true.shape, mask_true.shape))

x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)


3-3. MMDet

In [8]:
import albumentations as A
import cv2

tr_pipe = tfdet.dataset.balloon.load_pipe(tr_path, mask = True)
tr_pipe = tfdet.dataset.pipeline.load(tr_pipe)
#tr_pipe = tfdet.dataset.pipeline.args2dict(tr_pipe) #for train_model to object detection
#tr_pipe = tfdet.dataset.pipeline.albumentations(tr_pipe)
#tr_pipe = tfdet.dataset.pipeline.mmdet_augmentation(tr_pipe, image_shape = [int(512 * 0.1), int(512 * 2)], crop_shape = [512, 512], resize_mode = "range", keep_ratio = True, shape_divisor = 32, p_flip = 0.5)
tr_pipe = tfdet.dataset.pipeline.mmdet_augmentation(tr_pipe, image_shape = [512, 512], crop_shape = None, keep_ratio = True, shape_divisor = 32, p_flip = 0.5)
tr_pipe = tfdet.dataset.pipeline.filter_annotation(tr_pipe, min_scale = 2, min_instance_area = 1)
tr_pipe = tfdet.dataset.pipeline.label_encode(tr_pipe, label = label)
tr_pipe = tfdet.dataset.pipeline.normalize(tr_pipe, mean = [123.675, 116.28, 103.53], std = [58.395, 57.12, 57.375], bbox_normalize = True)
tr_pipe = tfdet.dataset.pipeline.pad(tr_pipe, image_shape = [512, 512], max_pad_size = 100, mode = "both")
tr_pipe = tfdet.dataset.pipeline.cast(tr_pipe, map = {"x_true":tf.float32, "y_true":tf.float32, "bbox_true":tf.float32, "mask_true":tf.float32})
tr_pipe = tr_pipe.batch(4).prefetch(1)

out = next(iter(tr_pipe))
image, y_true, bbox_true = out[:3]
mask_true = out[3] if 3 < len(out) else None

if mask_true is None:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}".format(image.shape, y_true.shape, bbox_true.shape))
else:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}, mask_true:{3}".format(image.shape, y_true.shape, bbox_true.shape, mask_true.shape))

x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)


3-4. Yolo

In [9]:
tr_pipe = tfdet.dataset.balloon.load_pipe(tr_path, mask = True)
tr_pipe = tfdet.dataset.pipeline.load(tr_pipe)
#tr_pipe = tfdet.dataset.pipeline.args2dict(tr_pipe) #for train_model to object detection
tr_pipe = tfdet.dataset.pipeline.resize(tr_pipe, image_shape = [512, 512], keep_ratio = True)
#tr_pipe = tfdet.dataset.pipeline.yolo_augmentation(tr_pipe, sample_x_true = tr_pipe.cache("./sample_cache"), image_shape = [512, 512], p_mix_up = 0.15, p_copy_paste = 0.0, scale = 0.5, min_area = 0., min_visibility = 0.)
tr_pipe = tfdet.dataset.pipeline.yolo_augmentation(tr_pipe, sample_x_true = tr_pipe, sample_cache = "./sample_cache", image_shape = [512, 512], p_mix_up = 0.15, p_copy_paste = 0.0, scale = 0.5, min_area = 0., min_visibility = 0.)
tr_pipe = tfdet.dataset.pipeline.filter_annotation(tr_pipe, min_scale = 2, min_instance_area = 1)
tr_pipe = tfdet.dataset.pipeline.label_encode(tr_pipe, label = label)
tr_pipe = tfdet.dataset.pipeline.normalize(tr_pipe, mean = [123.675, 116.28, 103.53], std = [58.395, 57.12, 57.375], bbox_normalize = True)
tr_pipe = tfdet.dataset.pipeline.pad(tr_pipe, image_shape = [512, 512], max_pad_size = 200)
tr_pipe = tfdet.dataset.pipeline.cast(tr_pipe, map = {"x_true":tf.float32, "y_true":tf.float32, "bbox_true":tf.float32, "mask_true":tf.float32})
tr_pipe = tr_pipe.batch(4).prefetch(1)

out = next(iter(tr_pipe))
image, y_true, bbox_true = out[:3]
mask_true = out[3] if 3 < len(out) else None

if mask_true is None:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}".format(image.shape, y_true.shape, bbox_true.shape))
else:
    print("x_true:{0}, y_true:{1}, bbox_true:{2}, mask_true:{3}".format(image.shape, y_true.shape, bbox_true.shape, mask_true.shape))
    
#remove cahce after train
import os
for filename in os.listdir("./"):
    name, ext = os.path.splitext(filename)
    if name in ["sample_cache"]:# and (ext == ".index" or ".data" in ext):
        os.remove(filename)

x_true:(4, 512, 512, 3), y_true:(4, 200, 1), bbox_true:(4, 200, 4), mask_true:(4, 200, 512, 512, 1)


# 4. Test Time Augmenatation(TTA)

In [10]:
te_pipe = tfdet.dataset.balloon.load_pipe(te_path, mask = True)
te_pipe = tfdet.dataset.pipeline.load(te_pipe)
#te_pipe = tfdet.dataset.pipeline.args2dict(te_pipe) #for train_model to object detection
te_pipe = tfdet.dataset.pipeline.filter_annotation(te_pipe, min_scale = 2, min_instance_area = 1)
te_pipe = tfdet.dataset.pipeline.label_encode(te_pipe, label = label)
te_pipe = tfdet.dataset.pipeline.normalize(te_pipe, mean = [123.675, 116.28, 103.53], std = [58.395, 57.12, 57.375], bbox_normalize = True)
te_pipe = tfdet.dataset.pipeline.cast(te_pipe, map = {"x_true":tf.float32, "y_true":tf.float32, "bbox_true":tf.float32, "mask_true":tf.float32})
te_pipe = tfdet.dataset.pipeline.multi_scale_flip(te_pipe, image_shape = [[512, 512], [1024, 1024]], keep_ratio = True, flip_mode = ["horizontal"], max_pad_size = 100, batch_size = 4, prefetch = True)

iter_data = iter(te_pipe)
for out in iter_data:
    image, y_true, bbox_true = out[:3]
    mask_true = out[3] if 3 < len(out) else None

    if mask_true is None:
        print("x_true:{0}, y_true:{1}, bbox_true:{2}".format(image.shape, y_true.shape, bbox_true.shape))
    else:
        print("x_true:{0}, y_true:{1}, bbox_true:{2}, mask_true:{3}".format(image.shape, y_true.shape, bbox_true.shape, mask_true.shape))

x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)
x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)
x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)
x_true:(1, 512, 512, 3), y_true:(1, 100, 1), bbox_true:(1, 100, 4), mask_true:(1, 100, 512, 512, 1)
x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)
x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)
x_true:(4, 512, 512, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 512, 512, 1)
x_true:(1, 512, 512, 3), y_true:(1, 100, 1), bbox_true:(1, 100, 4), mask_true:(1, 100, 512, 512, 1)
x_true:(4, 1024, 1024, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 1024, 1024, 1)
x_true:(4, 1024, 1024, 3), y_true:(4, 100, 1), bbox_true:(4, 100, 4), mask_true:(4, 100, 1024, 1