# 0. Import Library

In [1]:
import os
if not os.path.exists("./tfdet"):
    !git clone -q http://github.com/burf/tfdetection.git
    !mv ./tfdetection/tfdet ./tfdet
    !rm -rf ./tfdetection

In [2]:
#ignore warning
import warnings, os
warnings.filterwarnings(action = "ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

import tfdet

# 1. Init Dataset

In [3]:
#sample dataset
import functools
import numpy as np

image_shape = [512, 512]
n_class = 21 #background + 20 label
max_pad_size = 100
total_data_size = 4
batch_size = 1

def load(mask = False):
    image = np.random.random([*image_shape, 3]).astype(np.float32)
    y_true = np.random.randint(n_class, size = [max_pad_size, 1])
    bbox_true = np.array([tfdet.core.bbox.random_bbox(image_shape = image_shape) for _ in range(max_pad_size)])
    result = [image, y_true, bbox_true]
    if mask:
        mask_true = np.random.random((max_pad_size, *image_shape, 1)).astype(np.float32)
        result = [image, y_true, bbox_true, mask_true]
    return tuple(result)

def generator(mask = False):
    for _ in range(total_data_size):
        yield load(mask = mask)
        
mask_genrator = functools.partial(generator, mask = True)
bbox_pipe = tf.data.Dataset.from_generator(generator, (tf.float32, tf.int32, tf.int32))
mask_pipe = tf.data.Dataset.from_generator(mask_genrator, (tf.float32, tf.int32, tf.int32, tf.float32))

bbox_pipe = tfdet.dataset.pipeline.args2dict(bbox_pipe).batch(batch_size)
mask_pipe = tfdet.dataset.pipeline.args2dict(mask_pipe).batch(batch_size)

# 2. Build 1-Stage Object Detector

2-1. RetinaNet

In [4]:
#train model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
feature = tfdet.model.backbone.resnet50(x, weights = "imagenet")

out = tfdet.model.detector.retinanet(feature, image_shape = tf.shape(x)[1:3], n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], octave = 3)

model = tfdet.model.train.retina.train_model(x, *out,
                                             proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)
model.compile()
model.evaluate(bbox_pipe)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]



[(4, 100, 21), (4, 100, 4)]

In [5]:
#predict model
nms_out = tfdet.model.postprocess.retina.FilterDetection(proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)(out)
model = tf.keras.Model(x, nms_out)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]

[(4, 100, 21), (4, 100, 4)]

2-2. EfficientDet

In [6]:
#train model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
out = tfdet.model.detector.effdet_d4(x, n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], octave = 3, weights = "imagenet")

model = tfdet.model.train.effdet.train_model(x, *out,
                                             proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)
model.compile()
model.evaluate(bbox_pipe)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]



[(4, 100, 21), (4, 100, 4)]

In [7]:
#predict model
nms_out = tfdet.model.postprocess.effdet.FilterDetection(proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)(out)
model = tf.keras.Model(x, nms_out)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]

[(4, 100, 21), (4, 100, 4)]

2-3. EfficientDet-Lite

In [8]:
#train model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
out = tfdet.model.detector.effdet_lite_d4(x, n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], octave = 3, weights = "imagenet")

model = tfdet.model.train.effdet.train_model(x, *out,
                                             proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)
model.compile()
model.evaluate(bbox_pipe)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]



[(4, 100, 21), (4, 100, 4)]

In [9]:
#predict model
nms_out = tfdet.model.postprocess.effdet.FilterDetection(proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)(out)
model = tf.keras.Model(x, nms_out)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]

[(4, 100, 21), (4, 100, 4)]

2-4. Yolo

In [10]:
#train model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
out = tfdet.model.detector.yolo_v4(x, n_class = n_class, size = [[ 10, 13], [ 16,  30], [ 33,  23], 
                                                                 [ 30, 61], [ 62,  45], [ 59, 119], 
                                                                 [116, 90], [156, 198], [373, 326]], weights = "darknet")

model = tfdet.model.train.yolo.train_model(x, *out,
                                           proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)
model.compile()
model.evaluate(bbox_pipe)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]



[(4, 100, 21), (4, 100, 4)]

In [11]:
#predict model
nms_out = tfdet.model.postprocess.yolo.FilterDetection(proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)(out)
model = tf.keras.Model(x, nms_out)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]

[(4, 100, 21), (4, 100, 4)]

## 3. Build 2-Stage Object Detector

3-1. Faster R-CNN

In [12]:
#train model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
feature = tfdet.model.backbone.resnet50(x, weights = "imagenet")

out = tfdet.model.detector.faster_rcnn(feature, image_shape = tf.shape(x)[1:3], n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], proposal_count = 2000,
                                       sampling_count = 256, sampling_positive_ratio = 0.25) #sampling for train

model = tfdet.model.train.rcnn.train_model(x, *out, rpn_positive_ratio = 0.5,
                                           proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)
model.compile()
model.evaluate(bbox_pipe)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]



[(4, 100, 21), (4, 100, 4)]

In [13]:
#predict model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
feature = tfdet.model.backbone.resnet50(x, weights = "imagenet")

out = tfdet.model.detector.faster_rcnn(feature, image_shape = tf.shape(x)[1:3], n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], proposal_count = 1000)
nms_out = tfdet.model.postprocess.rcnn.FilterDetection(proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)(out)
model = tf.keras.Model(x, nms_out)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]

[(4, 100, 21), (4, 100, 4)]

3-2. Mask R-CNN

In [14]:
#train model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
feature = tfdet.model.backbone.resnet50(x, weights = "imagenet")

out = tfdet.model.detector.mask_rcnn(feature, image_shape = tf.shape(x)[1:3], n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], proposal_count = 2000,
                                     sampling_count = 256, sampling_positive_ratio = 0.25) #sampling for train

model = tfdet.model.train.rcnn.train_model(x, *out, rpn_positive_ratio = 0.5,
                                           proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)
model.compile()
model.evaluate(mask_pipe)
[p.shape for p in model.predict(mask_pipe, verbose = 0)]



[(4, 100, 21), (4, 100, 4), (4, 100, 14, 14, 1)]

In [15]:
#predict model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
feature = tfdet.model.backbone.resnet50(x, weights = "imagenet")

out = tfdet.model.detector.mask_rcnn(feature, image_shape = tf.shape(x)[1:3], n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], proposal_count = 1000)
nms_out = tfdet.model.postprocess.rcnn.FilterDetection(proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)(out)
model = tf.keras.Model(x, nms_out)
[p.shape for p in model.predict(mask_pipe, verbose = 0)]

[(4, 100, 21), (4, 100, 4), (4, 100, 14, 14, 1)]

3-3. Cascade R-CNN

In [16]:
#train model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
feature = tfdet.model.backbone.resnet50(x, weights = "imagenet")

out = tfdet.model.detector.cascade_rcnn(feature, image_shape = tf.shape(x)[1:3], n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], proposal_count = 2000,
                                        sampling_count = 256, sampling_positive_ratio = 0.25) #sampling for train

model = tfdet.model.train.rcnn.train_model(x, *out, rpn_positive_ratio = 0.5,
                                           proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)
model.compile()
model.evaluate(bbox_pipe)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]



[(4, 100, 21), (4, 100, 4)]

In [17]:
#predict model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
feature = tfdet.model.backbone.resnet50(x, weights = "imagenet")

out = tfdet.model.detector.cascade_rcnn(feature, image_shape = tf.shape(x)[1:3], n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], proposal_count = 1000)
nms_out = tfdet.model.postprocess.rcnn.FilterDetection(proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)(out)
model = tf.keras.Model(x, nms_out)
[p.shape for p in model.predict(bbox_pipe, verbose = 0)]

[(4, 100, 21), (4, 100, 4)]

3-4. Hybrid Task Cascade

In [18]:
#train model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
feature = tfdet.model.backbone.resnet50(x, weights = "imagenet")

out = tfdet.model.detector.hybrid_task_cascade(feature, image_shape = tf.shape(x)[1:3], n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], proposal_count = 2000,
                                               sampling_count = 256, sampling_positive_ratio = 0.25) #sampling for train

model = tfdet.model.train.rcnn.train_model(x, *out, rpn_positive_ratio = 0.5,
                                           proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)
model.compile()
model.evaluate(mask_pipe)
[p.shape for p in model.predict(mask_pipe, verbose = 0)]



[(4, 100, 21), (4, 100, 4), (4, 100, 14, 14, 1)]

In [19]:
#predict model
x = tf.keras.layers.Input(shape = [*image_shape, 3], name = "x_true")
feature = tfdet.model.backbone.resnet50(x, weights = "imagenet")

out = tfdet.model.detector.hybrid_task_cascade(feature, image_shape = tf.shape(x)[1:3], n_class = n_class, scale = [32, 64, 128, 256, 512], ratio = [0.5, 1, 2], proposal_count = 1000)
nms_out = tfdet.model.postprocess.rcnn.FilterDetection(proposal_count = 100, iou_threshold = 0.5, score_threshold = 0.05)(out)
model = tf.keras.Model(x, nms_out)
[p.shape for p in model.predict(mask_pipe, verbose = 0)]

[(4, 100, 21), (4, 100, 4), (4, 100, 14, 14, 1)]