# Object Detection with DETR

In [None]:
!pip install --upgrade git+https://github.com/EmGarr/kerod.git

In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from kerod.core.standard_fields import BoxField, DatasetField
from kerod.dataset.preprocessing import preprocess_coco_example
import functools

batch_size = 2

padded_shape = ({
  DatasetField.IMAGES: [None, None, 3],
  DatasetField.IMAGES_INFO: [2],
  DatasetField.IMAGES_PMASK: [None, None],

},
{
  BoxField.BOXES: [None, 4],
  BoxField.LABELS: [None],
  BoxField.NUM_BOXES: [1],
  BoxField.WEIGHTS: [None]
})

ds_train, ds_info = tfds.load(name="coco/2017", split="train", shuffle_files=True, with_info=True)
ds_train = ds_train.map(functools.partial(preprocess_coco_example, bgr=True, padded_mask=True),
                        num_parallel_calls=tf.data.experimental.AUTOTUNE)
# Filter example with no boxes after preprocessing
ds_train =  ds_train.filter(lambda x, y: tf.shape(y[BoxField.BOXES])[0] > 1)
ds_train =  ds_train.padded_batch(batch_size, padded_shape)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

ds_val = tfds.load(name="coco/2017", split="validation", shuffle_files=False)
ds_val = ds_val.map(functools.partial(preprocess_coco_example, horizontal_flip=False, bgr=True, padded_mask=True),
                      num_parallel_calls=tf.data.experimental.AUTOTUNE)
# Filter example with no boxes after preprocessing
ds_val =  ds_val.filter(lambda x, y: tf.shape(y[BoxField.BOXES])[0] > 1)
ds_val =  ds_val.padded_batch(batch_size, padded_shape)
ds_val = ds_val.prefetch(tf.data.experimental.AUTOTUNE)

## Create an iterator on a small subset of the dataset

In [None]:
num_elem = 1
# Create an iterator to feed to tensorflow dataset
train_tf = ([x for x in ds_train.take(num_elem)])
ds_train = tf.data.Dataset.from_generator(
    lambda: train_tf,
    ({
        "images": tf.float32,
        "images_information": tf.float32,
        "images_padding_mask": tf.int8
    }, {
        "bbox": tf.float32,
        "label": tf.int32,
        "num_boxes": tf.int32,
        "weights": tf.float32
    }),
    output_shapes=(
        {
            "images": tf.TensorShape((batch_size, None, None, 3)),
            "images_information": tf.TensorShape((batch_size, 2)),
            "images_padding_mask": tf.TensorShape((batch_size, None, None)),
        },
        {
            "bbox": tf.TensorShape((batch_size, None, 4)),
            "label": tf.TensorShape((batch_size, None)),
            "num_boxes": tf.TensorShape((batch_size, 1)),
            "weights": tf.TensorShape((batch_size, None))
        },
    ),
)


In [None]:
ds_info

## Visualize example

In [None]:
from kerod.utils.drawing import BoxDrawer, draw_bounding_boxes
    
for i, (features, gt) in enumerate(ds_train):
    draw_bounding_boxes(features['images'][0],
                gt['bbox'][0],
                resize=False)
    if i==10:
        break

# Load and train the network

Perfom an overfit

In [None]:
import tensorflow_addons as tfa

def downscale_backbone_grads(grads_and_vars):
    """Allow to decrease the gradients by 0.1 for the backbone like in the paper.
    Decreasing the gradients is equivalent to decreasing the learning rate by 0.1
    """
    variables_backbone = {var.name for var in detr.backbone.trainable_variables}
    outputs = []
    for g, v in grads_and_vars:
        if v.name in variables_backbone:
            outputs.append((tf.multiply(g, 0.1), v))
        else:
            outputs.append((g, v))
    return outputs

def clip_norm_grads(grads_and_vars):
    return [(tf.clip_by_norm(g, 0.1), v) for g, v in grads_and_vars]

optimizer = tfa.optimizers.AdamW(
    weight_decay=10e-4,
    learning_rate=10e-4,
    gradient_transformers=[downscale_backbone_grads, clip_norm_grads]
)

In [None]:
from kerod.core.standard_fields import BoxField
from kerod.model import factory
from kerod.model.factory import KerodModel
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

# Number of classes of Pascal Voc
classes = ds_info.features['objects']['label'].names
num_classes = len(classes)

detr = factory.build_model(num_classes, name=KerodModel.detr_resnet50)
detr.compile(optimizer=optimizer, loss=None)

## Graph mode 

In [None]:
detr.fit(ds_train, epochs=200)

## Eager mode 

In [None]:
# Allows to iterate eager mode
detr.run_eagerly = True

ds = [ds for ds in ds_train]
for _ in range(300):
    detr.train_step(ds[0])

## Visualize overfit

In [None]:
from kerod.utils.drawing import BoxDrawer, draw_bounding_boxes

drawer = BoxDrawer(classes)

for example in ds_train:
    boxes, scores, labels = detr.predict_on_batch(example[0])
    drawer(
        example[0]['images'],
        example[0]['images_information'],
        boxes,
        scores=scores,
        labels=labels,
        num_valid_detections=[10 for _ in range(batch_size)]
    )
    draw_bounding_boxes(example[0]['images'][0],
                example[1]['bbox'][0],
                resize=False)