## This notebook illustrates fine tuning a `RetinaNet` object detection model for object detection.

In [1]:
%matplotlib inline

In [2]:
import os
import io
import glob
import random
import imageio
import scipy.misc
import numpy as np
from six import BytesIO

import matplotlib
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display, Javascript

import tensorflow as tf
from object_detection.utils import label_map_util, config_util, colab_utils
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

2022-11-05 23:12:11.783621: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-05 23:12:12.407336: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-05 23:12:12.407413: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-11-05 23:12:12.614280: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-05 23:12:16.854777: W tensorflow/stream_executor/platform/de

## Utilities.

In [3]:
def load_image_into_numpy_array(path):
    '''Load an image from a file into a numpy array.
    
    Args:
        path: A file path.
    Returns:
        uint8 numpy array with shape (im_height, im_width, 3)
    '''
    img_data = tf.io.gfile.GFile(path, 'rb').read()
    image = Image.open(BytesIO(img_data))
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        im_height, im_width, 3).astype(np.uint8)

In [4]:
def plot_detections(image_np, boxes, classes, 
                    scores, category_index, figsize = (12, 16), 
                    image_name = None):
    '''Wrapper function to visualize detections.
    Args:
        image_np: uint8 numpy array with shape (im_height, im_width, 3)
        boxes: A numpy array of shape [N, 4]
        classes: A numpy array of shape [N]
        scores: A numpy array of shape [N] or None. If scores = None, this 
            function assumes that the boxes plotted are groundtruth boxes and plot all boxes as black with no classes or scores.
            
        category_index: A dictionary containing category index and category names.
        figsize: Size of the figure.
        image_name: A name for the image file.
    '''
    image_np = np.array(image_np)
    image_np_with_annotations = image_np.copy()
    viz_utils.visualize_boxes_and_labels_on_image_array(
        image_np_with_annotations, boxes, 
        classes, scores, category_index, 
        use_normalized_coordinates=True, 
        min_score_thresh=0.8)
    
    if image_name:
        plt.imsave(image_name, image_np_with_annotations)
    else:
        plt.imshow(image_np_with_annotations)

## Load dataset.
### The dataset used is the `Rubber duck dataset`

In [5]:
train_image_dir = '../inputs/rubber_duck/train/'
train_images_np = []

for i in range(1, 6):
    image_path = os.path.join(train_image_dir, f'robertducky{i}.jpg')
    
    train_images_np.append(load_image_into_numpy_array(image_path))
    
plt.rcParams['axes.grid'] = False
plt.rcParams['xtick.labelsize'] = False
plt.rcParams['ytick.labelsize'] = False
plt.rcParams['xtick.top'] = False
plt.rcParams['xtick.bottom'] = False
plt.rcParams['ytick.left'] = False
plt.rcParams['ytick.right'] = False
plt.rcParams['figure.figsize'] = [14, 7]

for idx, train_image_np in enumerate(train_images_np):
    plt.subplot(2, 3, idx + 1)
    plt.imshow(train_image_np)
    
plt.show()

  plt.show()


In [6]:
## Annotate image with bounding boxes.

gt_boxes = [
            np.array([[0.436, 0.591, 0.629, 0.712]], dtype=np.float32),
            np.array([[0.539, 0.583, 0.73, 0.71]], dtype=np.float32),
            np.array([[0.464, 0.414, 0.626, 0.548]], dtype=np.float32),
            np.array([[0.313, 0.308, 0.648, 0.526]], dtype=np.float32),
            np.array([[0.256, 0.444, 0.484, 0.629]], dtype=np.float32)
]

## Prepare data for training.

In [7]:
duck_class_id = 1
num_classes = 1

category_index = {duck_class_id: {'id': duck_class_id, 
                                  'name': 'rubber ducky'}}

#Convert class labels to one-hot.

#The label_id_offset shifts all classes by a certain number of indices.
#We do this so that the mode receives one-hot labels where non-backgroud 
#classes start counting at the 0th index.

label_id_offset = 1
train_image_tensors = []
gt_classes_one_hot_tensors = []
gt_box_tensors = []

for (train_image_np, gt_box_np) in zip(train_images_np, gt_boxes):
    train_image_np = tf.expand_dims(tf.convert_to_tensor(train_image_np, dtype = tf.float32), axis = 0)
    train_image_tensors.append(train_image_np)
    
    gt_box_tensors.append(tf.convert_to_tensor(gt_box_np, dtype=tf.float32))
    
    zero_indexed_groundtruth_classes = tf.convert_to_tensor(
        np.ones(shape=[gt_box_np.shape[0]], dtype=np.int32) - label_id_offset)
    
    gt_classes_one_hot_tensors.append(tf.one_hot(zero_indexed_groundtruth_classes, num_classes))

2022-11-05 23:12:32.436179: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-11-05 23:12:32.436253: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2022-11-05 23:12:32.436301: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (debonair): /proc/driver/nvidia/version does not exist
2022-11-05 23:12:32.436889: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
dummy_scores = np.array([1.0], dtype = np.float32)

plt.figure(figsize = (30, 15))
for idx in range(5):
    plt.subplot(2, 3, idx + 1)
    plot_detections(train_images_np[idx], 
                    gt_boxes[idx], 
                    np.ones(shape = [gt_boxes[idx].shape[0]], dtype = np.int32),
                    dummy_scores, category_index)
    
plt.show()

  plt.show()


### Prepare model for training.

In [28]:
#Download model weights.
!wget 'http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz'

!mv 'ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz' 'models/'

#Extract model weights.
!tar -xf 'models/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz'

--2022-11-05 22:11:09--  http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 216.58.223.112, 2a00:1450:401a:805::2010
Connecting to download.tensorflow.org (download.tensorflow.org)|216.58.223.112|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 244817203 (233M) [application/x-tar]
Saving to: ‘ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz’


2022-11-05 22:14:38 (1.12 MB/s) - ‘ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz’ saved [244817203/244817203]



In [30]:
#Download config file.
!wget 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config'
!mv 'ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config' 'models/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8/'

--2022-11-05 22:18:34--  https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4528 (4.4K) [text/plain]
Saving to: ‘ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config’


2022-11-05 22:18:36 (24.2 MB/s) - ‘ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config’ saved [4528/4528]



In [9]:
tf.keras.backend.clear_session()

print('Building model and restoring weights for fine-tuning...', flush = True)

num_classes = 1
pipeline_config = 'models/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config'
checkpoint_path = 'models/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8/checkpoint/ckpt-0'

configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True

detection_model = model_builder.build(model_config = model_config, is_training = True)

Building model and restoring weights for fine-tuning...


In [10]:
fake_box_predictor = tf.compat.v2.train.Checkpoint(_base_tower_layers_for_heads = detection_model._box_predictor._base_tower_layers_for_heads,
                                                   _box_predictor_head = detection_model._box_predictor._box_prediction_head)

fake_model = tf.compat.v2.train.Checkpoint(_feature_extractor = detection_model._feature_extractor, 
                                           _box_predictor = fake_box_predictor)

ckpt = tf.compat.v2.train.Checkpoint(model = fake_model)
ckpt.restore(checkpoint_path).expect_partial()

#Run model through a dummy image so that variables are created.
image, shapes = detection_model.preprocess(tf.zeros([1, 640, 640, 3]))
predictions_dict = detection_model.predict(image, shapes)
_ = detection_model.postprocess(predictions_dict, shapes)
print('Weights restored')

Weights restored


### Eager mode custom training loop.

In [11]:
tf.keras.backend.set_learning_phase(True)

# These parameters can be tuned; since our training set has 5 images.
# It doesn't make sense to have a much larger batch size, though we could
# fit more examples in memory if we wanted to.

batch_size = 4
learning_rate = 0.01
num_batches = 100

#Select variables in top layers to fine-tune.
trainable_variables = detection_model.trainable_variables
to_fine_tune = []
prefixes_to_train = [
    'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead',
  'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead']

for var in trainable_variables:
    if any([var.name.startswith(prefix) for prefix in prefixes_to_train]):
        to_fine_tune.append(var)
        
        
#Set up a forward + backward pass for a single train step.
def get_model_train_step_function(model, optimizer, vars_to_fine_tune):
    '''Get a tf.function for training step/'''
    
    @tf.function
    def train_step_fn(image_tensors, 
                      groundtruth_boxes_list, 
                      groundtruth_classes_list):
        '''A single training iteration
        Args:
            image_tensors: A list of [1, height, width, 3] tensors of type tf.float32.
            groundtruth_boxes_list: A list of tensors of shape [N_i, 4] with type tf.float32 representing groundtruth boxes for each image in the batch.
            groundtruth_classes_list: A list of tensors of shape [N_i, num_classes with type tf.float32 representing groundtruth boxes for each image in the batch.
        
        Returns:
            A scalar tensor representing the total loss for the input batch.
        '''
        
        shapes = tf.constant(batch_size * [[640, 640, 3]], dtype = tf.int32)
        model.provide_groundtruth(
            groundtruth_boxes_list = groundtruth_boxes_list,
            groundtruth_classes_list = groundtruth_classes_list)
        
        with tf.GradientTape() as tape:
            preprocessed_images = tf.concat(
                [detection_model.preprocess(image_tensor)[0] for image_tensor in image_tensors], axis = 0)
            
            prediction_dict = model.predict(preprocessed_images, shapes)
            losses_dict = model.loss(prediction_dict, shapes)
            total_loss = losses_dict['Loss/localization_loss'] + losses_dict['Loss/classification_loss']
            gradients = tape.gradient(total_loss, vars_to_fine_tune)
            optimizer.apply_gradients(zip(gradients, vars_to_fine_tune))
            
        return total_loss
    return train_step_fn



In [12]:
optimizer = tf.keras.optimizers.SGD(learning_rate = learning_rate, momentum = 0.9)
train_step_fn = get_model_train_step_function(detection_model, optimizer, to_fine_tune)

print('Start fine-tuning...', flush = True)
for idx in range(num_batches):
    #Grab keys for a random subset of examples.
    all_keys = list(range(len(train_images_np)))
    random.shuffle(all_keys)
    example_keys = all_keys[:batch_size]
    
    gt_boxes_list = [gt_box_tensors[key] for key in example_keys]
    gt_classes_list = [gt_classes_one_hot_tensors[key] for key in example_keys]
    image_tensors = [train_image_tensors[key] for key in example_keys]
    
    #Training step (forward pass + backward pass)
    total_loss = train_step_fn(image_tensors, gt_boxes_list, gt_classes_list)
    
    if idx % 10 == 0:
        print(f'Batch {idx} of {num_batches}, loss = {total_loss.numpy()}', flush = True)
        
print('Done fine-tuning.')

Start fine-tuning...
Batch 0 of 100, loss = 1.7429404258728027
Batch 10 of 100, loss = 0.2845860719680786
Batch 20 of 100, loss = 0.08820019662380219
Batch 30 of 100, loss = 0.047110289335250854
Batch 40 of 100, loss = 0.013916511088609695
Batch 50 of 100, loss = 0.0064419968985021114
Batch 60 of 100, loss = 0.004060050006955862
Batch 70 of 100, loss = 0.002770668128505349
Batch 80 of 100, loss = 0.001954255159944296
Batch 90 of 100, loss = 0.0023290542885661125
Done fine-tuning.


### Load test images to perform inference.

In [15]:
test_image_dir = '../inputs/rubber_duck/test/'
test_images_np = []

for item in range(1,10):
    image_path = os.path.join(test_image_dir, 'out' + str(item) + '.jpg')
    test_image = np.expand_dims(load_image_into_numpy_array(image_path), axis = 0)
    
@tf.function
def detect(input_tensor):
    '''Run detection on an input image.
    Args:
        input_tensor: A list with [1, height, width, 3] shape of type tf.float32.
    Returns:
        A dictionary containing 3 tensors (detection_boxes, detection_classes, detection_scores)
    '''
    preprocessed_image, shapes = detection_model.preprocess(input_tensor)
    prediction_dict = detection_model.predict(preprocessed_image, shapes)
    return detection_model.postprocess(prediction_dict, shapes)


for i in range(len(test_images_np)):
    input_tensor = tf.convert_to_tensor(test_images_np[i], dtype = tf.float32)
    detections = detect(input_tensor)
    
    plot_detections(
        test_images_np[i][0],
        detections['detection_boxes'][0].numpy(),
        detections['detection_classes'][0].numpy()/astype(np.uint32) + label_id_offset, 
        detections['detection_scores'][0].numpy(),
        category_index, figsize = (15, 20),
        image_name = "../inputs/gif_frame_" + ('%02d' % i) + ".jpg")

../inputs/rubber_duck/test/out1.jpg
../inputs/rubber_duck/test/out2.jpg
../inputs/rubber_duck/test/out3.jpg
../inputs/rubber_duck/test/out4.jpg
../inputs/rubber_duck/test/out5.jpg
../inputs/rubber_duck/test/out6.jpg
../inputs/rubber_duck/test/out7.jpg
../inputs/rubber_duck/test/out8.jpg
../inputs/rubber_duck/test/out9.jpg


In [None]:
!ls ../inputs/rubber_duck/test/

out1.jpg  out3.jpg  out5.jpg  out7.jpg	out9.jpg
out2.jpg  out4.jpg  out6.jpg  out8.jpg


In [14]:
imageio.plugins.freeimage.download()

anim_file = '../inputs/duckies_test.gif'
filenames = glob.glob('../inputs/gif_frame_*.jpg')
filenames = sorted(filenames)
last = -1
images = []

for filename in filenames:
    image = imageio.imread(filename)
    images.append(image)
    
imageio.mimsave(anim_file, images, 'GIF-FI', fps = 5)

display(IPyImage(open(anim_file, 'rb').read()))

RuntimeError: Zero images were written.