# 0.Install Tensorflow Object Detection API
This section follows the installation guide provided in the tensorflow object detection api github: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2.md

In [None]:
# clone tensorflow/models repository
!git clone https://github.com/tensorflow/models

In [None]:
#checkout a commit to ommit changes after this notebook
!cd models && git checkout f08513d

In [None]:
# Go into models/research and check
%cd models/research
!dir

In [None]:
# Compile protos
!protoc object_detection/protos/*.proto --python_out=.

In [None]:
setup_str = """
import os
from setuptools import find_packages
from setuptools import setup

REQUIRED_PACKAGES = [
    # Required for apache-beam with PY3
    'avro-python3',
    'apache-beam',
    'pillow',
    'lxml',
    'matplotlib',
    'Cython',
    'contextlib2',
    'tf-slim==1.1.0',
    'six',
    'pycocotools',
    'lvis',
    'scipy',
    'pandas',
    'tf-models-official==2.7.0',
    'tensorflow_io==0.23.1',
    'keras==2.7.0'
]

setup(
    name='object_detection',
    version='0.1',
    install_requires=REQUIRED_PACKAGES,
    include_package_data=True,
    packages=(
        [p for p in find_packages() if p.startswith('object_detection')] +
        find_packages(where=os.path.join('.', 'slim'))),
    package_dir={
        'datasets': os.path.join('slim', 'datasets'),
        'nets': os.path.join('slim', 'nets'),
        'preprocessing': os.path.join('slim', 'preprocessing'),
        'deployment': os.path.join('slim', 'deployment'),
        'scripts': os.path.join('slim', 'scripts'),
    },
    description='Tensorflow Object Detection Library',
    python_requires='>3.6',
)
"""

In [None]:
with open('setup.py', 'w') as file:  # Use file to refer to the file object
    file.write(setup_str)

In [None]:
# Install TensorFlow Object Detection API.
#!copy .\object_detection\packages\tf2\setup.py
!python -m pip install .

In [None]:
# Test the installation.
!python object_detection/builders/model_builder_tf2_test.py

In [None]:
# Go back to home directory
%cd ../..
!dir

In [None]:
#restart runtime to avoid bug when importing tensorflow
import os
os.kill(os.getpid(), 9)

### Imports

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import cv2
import io
import os
import ast
import tensorflow as tf

In [None]:
print(tf.__version__)
print(tf.test.is_gpu_available())
print(tf.config.list_physical_devices('GPU'))

# 1 - Load the data

In [None]:
# Path
INPUT_PATH = os.getcwd()
IMAGES_PATH = os.path.join(INPUT_PATH, "train_images")
print("Images path: ", IMAGES_PATH)
df_train = pd.read_csv(os.path.join(INPUT_PATH,'train.csv'))
df_test = pd.read_csv(os.path.join(INPUT_PATH,'test.csv'))

In [None]:
os.path.join(IMAGES_PATH, "video_") 

In [None]:
df_train["image_path"] = IMAGES_PATH + "/video_" \
                        + df_train["video_id"].astype(str) \
                        + "/" + df_train["video_frame"].astype(str) + ".jpg"

In [None]:
# annotations are saved as an array formatted as a string (like "[]"), 
# I use the ast library to extract the array from the string (to get [] instead of "[]")
df_train["annotations"] = df_train["annotations"].apply(lambda x: ast.literal_eval(x))
# having the annotations in array format, I can now check the length of the array 
# getting how many bounding boxes are in the image
df_train["num_bboxes"] = df_train["annotations"].apply(lambda x: len(x))

In [None]:
df_train

### Load annotated image into numpy array and show using matplotlib

In [None]:
annotaded_image_path = df_train[df_train["num_bboxes"]>0]["image_path"].values[0]
image = cv2.imread(annotaded_image_path)
#convert to RGB, cv2 cretaes BGR image by default
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
print("Image shape: ", image.shape)
plt.imshow(image)
plt.show()

### Load image using PIL.Image

In [None]:
from PIL import Image
Image.open(annotaded_image_path)

In [None]:
annotation_dict = df_train[df_train["num_bboxes"]>0].head(1)["annotations"].values[0]
print(annotation_dict[0])
x_start = annotation_dict[0]["x"]
x_end = x_start + annotation_dict[0]["width"]
y_start = annotation_dict[0]["y"]
y_end = y_start + annotation_dict[0]["height"]

print("bbox x_start: ", x_start)
print("bbox x_end: ", x_end)
print("bbox x_start: ", y_start)
print("bbox x_start: ", y_end)

In [None]:
# draw the bounding box in the image
image = cv2.imread(annotaded_image_path)
start_point = (x_start, y_start)
end_point = (x_end, y_end)
# Red color in BGR
color = (0, 0, 255)
# Line thickness of 2 px
thickness = 2
image = cv2.rectangle(image, start_point, end_point, color, thickness)
# Window name in which image is displayed
window_name = 'Image'
# Displaying the image 
#convert to format bgr
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
Image.fromarray(image, mode="RGB")

In [None]:
image.shape

In [None]:
# function to draw the bounding boxes in any image
def show_image_with_bboxes(df_row):
    annotations = df_row["annotations"]
    image = cv2.imread(df_row["image_path"])
    for annotation in annotations:
        x_start = annotation["x"]
        x_end = x_start + annotation["width"]
        y_start = annotation["y"]
        y_end = y_start + annotation["height"]
        start_point = (x_start, y_start)
        end_point = (x_end, y_end)
        # Red color in BGR
        color = (0, 0, 255)
        # Line thickness of 2 px
        thickness = 2
        image = cv2.rectangle(image, start_point, end_point, color, thickness)
        # Window name in which image is displayed
    window_name = 'Image'
    # Displaying the image
    #convert to format bgr
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return Image.fromarray(image, mode="RGB")

In [None]:
row = df_train[df_train["num_bboxes"]>9].head(1)
example = row.to_dict(orient='records')[0]
show_image_with_bboxes(example)

# 2 - Details of the training Data
-Frames per video
<br>
-Sequences per video
<br>
-Frames per sequence
<br>
-Annotations

In [None]:
info_videos = df_train.groupby("video_id").agg({"sequence": pd.Series.nunique, "video_frame": "count"})
info_videos = info_videos.rename(columns={"sequence": "total_sequences"})
info_videos = info_videos.rename(columns={"video_frame": "total_video_frames"})
print("Number of sequences and frames in each video: \n")
info_videos

In [None]:
total_frames = info_videos.sum()["total_video_frames"]
total_sequences = info_videos.sum()["total_sequences"]
print("Total number of frames (number of images) = ", total_frames)
print("Total number of sequences = ", total_sequences)

In [None]:
# ratio of the total frames in each video
info_videos["ratio_video_frame"] = (info_videos["total_video_frames"]/total_frames)
print("Ratio of the total frames in each video")
info_videos = info_videos.reset_index()
info_videos

In [None]:
info_sequences = (df_train.groupby(["video_id", "sequence"]).count()["sequence_frame"]).to_frame()
print("Number of frames in each sequence: \n")
info_sequences = info_sequences.rename(columns={"sequence_frame": "total_sequence_frames"})
info_sequences

In [None]:
# ratio of the total frames in each sequence
info_sequences["ratio_sequence_frame"] = (info_sequences["total_sequence_frames"]/total_frames)
print("Ratio of the total frames in each sequence")
info_sequences

In [None]:
#number of annotations
total_bounding_boxes = sum(df_train["num_bboxes"])
total_frames_with_bbox = len(df_train[df_train["num_bboxes"] > 0]["video_frame"])
print("Total number of bounding boxes = ", total_bounding_boxes)
print("Total number of frames with at least a bounding box = ", total_frames_with_bbox)
print("Percentage of images with bounding boxes = ", (total_frames_with_bbox/total_frames)*100, "%")

In [None]:
info_videos["num_bboxes"] = df_train.groupby("video_id").agg(pd.Series.sum)["num_bboxes"]
info_videos["ratio_video_bboxes"] = info_videos["num_bboxes"] / total_bounding_boxes
info_videos["frames_with_bbox"] = df_train[df_train["num_bboxes"] > 0].groupby("video_id").agg(pd.Series.count)["video_frame"]
info_videos["ratio_video_frames_with_bboxes"] = info_videos["frames_with_bbox"] / total_frames_with_bbox
info_videos

In [None]:
plt.bar("video_"+info_videos["video_id"].astype(str), info_videos["total_sequences"])
plt.ylabel("sequences")
plt.show()

In [None]:
plt.bar("video_"+info_videos["video_id"].astype(str), info_videos["total_video_frames"])
plt.ylabel("frames")
plt.show()

In [None]:
plt.bar("video_"+info_videos["video_id"].astype(str), info_videos["frames_with_bbox"])
plt.ylabel("frames with bbox")
plt.show()

In [None]:
plt.bar("video_"+info_videos["video_id"].astype(str), info_videos["num_bboxes"])
plt.ylabel("num bounding boxes")
plt.show()

In [None]:
info_sequences["num_bboxes"] = df_train.groupby(["video_id", "sequence"]).agg(pd.Series.sum)["num_bboxes"]
info_sequences["ratio_sequence_bboxes"] = info_sequences["num_bboxes"] / total_bounding_boxes
info_sequences["frames_with_bbox"] = df_train[df_train["num_bboxes"] > 0].groupby(["video_id", "sequence"]).agg(pd.Series.count)["video_frame"]
info_sequences["frames_with_bbox"] = info_sequences["frames_with_bbox"].fillna(0).astype(int)
info_sequences["ratio_sequence_frames_with_bboxes"] = info_sequences["frames_with_bbox"] / total_frames_with_bbox
info_sequences = info_sequences.reset_index()
info_sequences

In [None]:
#info_sequences.loc[[info_sequences["num_bboxes"].idxmax()]]

In [None]:
plt.bar(info_sequences["sequence"].astype(str), info_sequences["total_sequence_frames"])
plt.ylabel("frames")
plt.xlabel("sequences")
plt.xticks(rotation=90)
plt.show()

In [None]:
plt.bar(info_sequences["sequence"].astype(str), info_sequences["frames_with_bbox"])
plt.ylabel("frames with bbox")
plt.xlabel("sequences")
plt.xticks(rotation=90)
plt.show()

In [None]:
plt.bar(info_sequences["sequence"].astype(str), info_sequences["num_bboxes"])
plt.ylabel("Bounding boxes")
plt.xlabel("sequences")
plt.xticks(rotation=90)
plt.show()

# 3.Train - Eval split
I choose to split train and evaluation sets the following way:
- A single sequence can't be split between training and evaluation set, as images are similar and would leak information
- Keeping around 75% to 85% of the total images with at least a bounding box in the training set
- Keeping around 75% to 85% of the total number of images in the training set

Looking at info_videos and info_sequences dataframes, I've chosen to keep all images from video 0 and video 1 and images from the sequence 37114 of the video 2 in the training set. That way, the training set has around 86% of the images with bounding boxes and around 75% of the total number of images. It will have around 79% of the total number of bounding boxes as well.

In [None]:
# placeholder, finish in future versions to obtain random splits
from sklearn.model_selection import train_test_split

def split_sequences(sequences_info_df, train_ratio):
    sequence_list = sequences_info_df["sequence"].values
    train, test = train_test_split(sequence_list, test_size = train_ratio)
    return (train, test)

In [None]:
sequences_train_split = info_sequences[(info_sequences["video_id"] == 0) | (info_sequences["video_id"] == 1) | (info_sequences["sequence"] == 37114)]
sequences_train_split

In [None]:
print("Training set split:")
print("percentage of images with bounding boxes: ", sum(sequences_train_split["ratio_sequence_frames_with_bboxes"])*100, "%")
print("percentage of total images: ", sum(sequences_train_split["ratio_sequence_frame"])*100, "%")
print("percentage of bounding boxes: ", sum(sequences_train_split["ratio_sequence_bboxes"])*100, "%")
total_split_train_images = sum(sequences_train_split["total_sequence_frames"])
total_split_eval_images = len(df_train) - total_split_train_images
print("Total images in training split: ", total_split_train_images)
print("Total images in evaluation split: ", total_split_eval_images)

In [None]:
df_train_split = df_train[df_train["sequence"].isin(sequences_train_split["sequence"])]
df_train_split

In [None]:
df_eval_split = df_train[~df_train["sequence"].isin(sequences_train_split["sequence"])]
df_eval_split = df_eval_split.reset_index()
df_eval_split

# 5.Create TF records
This section follows the official guide "Bringing in your own dataset" from TF object detection github: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md

In [None]:
from object_detection.utils import dataset_util, label_map_util

def create_tf_example(example):
    image = Image.open(example["image_path"], mode='r')
    encoded_image_data = io.BytesIO()
    image.save(encoded_image_data, image.format)
    encoded_image_data = encoded_image_data.getvalue()  # Encoded image byte
    #print("encoded_image_data: ", encoded_image_data)
    width, height = image.size
#     print("width: ", width)
#     print("height: ", height)
    filename = example["image_path"].encode('utf8') # Filename of the image. Empty if image is not from file
#     print("filename: ", filename)
    image_format = b'jpg'
    class_name = "COT"
    class_name = class_name.encode('utf8')
    
    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box
             # (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box
             # (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)
    
    for annotation in example["annotations"]:
        xmins.append(annotation["x"]/width)
        xmaxs.append((annotation["x"]+annotation["width"])/width)
        ymins.append(annotation["y"]/height)
        ymaxs.append((annotation["y"]+annotation["height"])/height)
        classes_text.append(class_name)
        classes.append(1)
        
#     print("xmins: ", xmins)
#     print("xmaxs: ", xmaxs)
#     print("ymins: ", ymins)
#     print("ymaxs: ", ymaxs)
#     print("classes_text: ", classes_text)
#     print("classes: ", classes)
    
    tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


In [None]:
def create_tf_record(df, record_path):
    writer = tf.python_io.TFRecordWriter(record_path)
    for index, example in df.iterrows():
        tf_example = create_tf_example(example)
        writer.write(tf_example.SerializeToString())
    writer.close()
    print('Successfully created the TFRecord file: {}'.format(record_path))

In [None]:
import contextlib2
from object_detection.dataset_tools import tf_record_creation_util

def create_tf_record_shards(df, output_filebase, num_shards=10):
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
              tf_record_close_stack, output_filebase, num_shards)
        for index, example in df.iterrows():
            tf_example = create_tf_example(example)
            output_shard_index = index % num_shards
            output_tfrecords[output_shard_index].write(tf_example.SerializeToString())

In [None]:
!mkdir training

In [None]:
CURRENT_DIRECTORY = os.getcwd()
TRAINING_DIRECTORY = os.path.join(CURRENT_DIRECTORY, "training")
TRAIN_RECORD_PATH = os.path.join(TRAINING_DIRECTORY, "train.record")
EVAL_RECORD_PATH = os.path.join(TRAINING_DIRECTORY, "eval.record")

#### Create training and evaluation tfrecords

In [None]:
#training tfrecord
create_tf_record_shards(df_train_split, TRAIN_RECORD_PATH)

In [None]:
# evaluation tfrecord
create_tf_record_shards(df_eval_split, EVAL_RECORD_PATH)

In [None]:
#print an example from a record to check
raw_dataset=tf.data.TFRecordDataset(TRAIN_RECORD_PATH+"-00009-of-00010")
for raw_record in raw_dataset.take(3):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    print(example)

In [None]:
 # show image with bounding boxes from the example above, looking at the video_0 and folder and frame_id from the filename
row = df_train[(df_train["video_id"]==0) & (df_train["video_frame"]==29)]
example = row.to_dict(orient='records')[0]
show_image_with_bboxes(example)

#### Create the label map

In [None]:
LABELMAP_PATH = os.path.join(TRAINING_DIRECTORY, "label_map.pbtxt")

In [None]:
# Create label map
label_map_str = """item {
  id: 1
  name: 'COT'
}"""
# Write labelmap
with open(LABELMAP_PATH, 'w') as writefile:
    writefile.write(label_map_str)
!more {LABELMAP_PATH}

# 6.Training the model
This section follows the official guide "Bringing in your own dataset" from TF object detection github: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md

In [None]:
# Download the pretrained EfficientDet-D0 model
!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz
!tar -xvzf efficientdet_d0_coco17_tpu-32.tar.gz

In [None]:
!mkdir cot_model

In [None]:
COT_MODEL_DIRECTORY = os.path.join(CURRENT_DIRECTORY, "cot_model")
PIPELINE_CONFIG_PATH = os.path.join(COT_MODEL_DIRECTORY, "cot_model.config")

In [None]:
config_txt= """
# SSD with EfficientNet-b0 + BiFPN feature extractor,
# shared box predictor and focal loss (a.k.a EfficientDet-d0).
# See EfficientDet, Tan et al, https://arxiv.org/abs/1911.09070
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from an EfficientNet-b0 checkpoint.
#
# Train on TPU-8

model {
  ssd {
    inplace_batchnorm_update: true
    freeze_batchnorm: false
    num_classes: 1
    add_background_class: false
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
        use_matmul_gather: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    encode_background_as_zeros: true
    anchor_generator {
      multiscale_anchor_generator {
        min_level: 3
        max_level: 7
        anchor_scale: 4.0
        aspect_ratios: [1.0, 2.0, 0.5]
        scales_per_octave: 3
      }
    }
    image_resizer {
      keep_aspect_ratio_resizer {
        min_dimension: 512
        max_dimension: 512
        pad_to_max_dimension: true
        }
    }
    box_predictor {
      weight_shared_convolutional_box_predictor {
        depth: 64
        class_prediction_bias_init: -4.6
        conv_hyperparams {
          force_use_bias: true
          activation: SWISH
          regularizer {
            l2_regularizer {
              weight: 0.00004
            }
          }
          initializer {
            random_normal_initializer {
              stddev: 0.01
              mean: 0.0
            }
          }
          batch_norm {
            scale: true
            decay: 0.99
            epsilon: 0.001
          }
        }
        num_layers_before_predictor: 3
        kernel_size: 3
        use_depthwise: true
      }
    }
    feature_extractor {
      type: 'ssd_efficientnet-b0_bifpn_keras'
      bifpn {
        min_level: 3
        max_level: 7
        num_iterations: 3
        num_filters: 64
      }
      conv_hyperparams {
        force_use_bias: true
        activation: SWISH
        regularizer {
          l2_regularizer {
            weight: 0.00004
          }
        }
        initializer {
          truncated_normal_initializer {
            stddev: 0.03
            mean: 0.0
          }
        }
        batch_norm {
          scale: true,
          decay: 0.99,
          epsilon: 0.001,
        }
      }
    }
    loss {
      classification_loss {
        weighted_sigmoid_focal {
          alpha: 0.25
          gamma: 1.5
        }
      }
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
    normalize_loss_by_num_matches: true
    normalize_loc_loss_by_codesize: true
    post_processing {
      batch_non_max_suppression {
        score_threshold: 1e-8
        iou_threshold: 0.5
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
  }
}

train_config: {
  fine_tune_checkpoint: "efficientdet_d0_coco17_tpu-32/checkpoint/ckpt-0"
  fine_tune_checkpoint_version: V2
  fine_tune_checkpoint_type: "detection"
  batch_size: 2
  sync_replicas: true
  startup_delay_steps: 0
  replicas_to_aggregate: 1
  use_bfloat16: true
  num_steps: 10000
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    random_scale_crop_and_pad_to_square {
      output_size: 512
      scale_min: 0.1
      scale_max: 2.0
    }
  }
  optimizer {
    momentum_optimizer: {
      learning_rate: {
        cosine_decay_learning_rate {
          learning_rate_base: 8e-2
          total_steps: 10000
          warmup_learning_rate: .001
          warmup_steps: 1000
        }
      }
      momentum_optimizer_value: 0.9
    }
    use_moving_average: false
  }
  max_number_of_boxes: 100
  unpad_groundtruth_tensors: false
}

train_input_reader: {
  label_map_path: "training/label_map.pbtxt"
  tf_record_input_reader {
    input_path: "training/train.record-?????-of-00010"
  }
}

eval_config: {
  metrics_set: "coco_detection_metrics"
  use_moving_averages: false
  batch_size: 1;
}

eval_input_reader: {
  label_map_path: "training/label_map.pbtxt"
  shuffle: false
  num_epochs: 1
  tf_record_input_reader {
    input_path: "training/eval.record-?????-of-00010"
  }
}"""

In [None]:
with open(PIPELINE_CONFIG_PATH, 'w') as config_file:
    config_file.write(config_txt)

In [None]:
!more {PIPELINE_CONFIG_PATH}

#### Training the model, use cmd terminal instead

In [None]:
# train model
!python models/research/object_detection/model_main_tf2.py \
  --model_dir=squeal_model \
  --pipeline_config_path=squeal_model/squeal_model.config \
  --alsologtostderr

In [None]:
# evaluate model
!python models/research/object_detection/model_main_tf2.py \
  --model_dir=cot_model \
  --pipeline_config_path=cot_model/cot_model.config \
  --checkpoint_dir=cot_model \
  --eval_timeout=0 \
  --alsologtostderr

#### Export model to get saved model

In [None]:
!mkdir saved_models

In [None]:
%cd saved_models
!mkdir cot_model
%cd ..

In [None]:
!python models/research/object_detection/exporter_main_v2.py --input_type image_tensor \
  --pipeline_config_path=cot_model/cot_model.config \
  --trained_checkpoint_dir=cot_model \
  --output_directory=saved_models/cot_model

In [None]:
%cd saved_models
!tar.exe -a -c -f cot_model.zip cot_model
%cd ..

In [None]:
# unzip saved models if they are imported
%cd saved_models
!tar -xvzf cot_model_highres.zip
%cd ..

Evaluate saved model on console:

python models/research/object_detection/model_main_tf2.py --model_dir=saved_models/cot_model/checkpoint --pipeline_config_path=saved_models/cot_model_v4/pipeline.config --checkpoint_dir=saved_models/cot_model_v4/checkpoint --eval_timeout=0 --alsologtostderr


# Inference

In [None]:
SAVED_MODEL_DIRECTORY = os.path.join(CURRENT_DIRECTORY, "saved_models")

In [None]:
# Load the TensorFlow COTS detection model into memory.
tf.keras.backend.clear_session()
detect_fn_tf_odt = tf.saved_model.load(os.path.join(os.path.join(SAVED_MODEL_DIRECTORY, 'cot_model_v4'), 'saved_model'))

In [None]:
from six import BytesIO
def load_image_into_numpy_array(path):
    """Load an image from file into a numpy array.

    Puts image into numpy array to feed into tensorflow graph.
    Note that by convention we put it into a numpy array with shape
    (height, width, channels), where channels=3 for RGB.

    Args:
    path: the file path to the image

    Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
    """
    img_data = tf.io.gfile.GFile(path, 'rb').read()
    image = Image.open(BytesIO(img_data))
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)


In [None]:
row = df_train_split[df_train_split["num_bboxes"] >0].head(1)
example = row.to_dict(orient='records')[0]
row

In [None]:
image_path = example["image_path"]
image_np = load_image_into_numpy_array(image_path)
input_tensor = np.expand_dims(image_np, 0)
detections = detect_fn_tf_odt(input_tensor)

In [None]:
detections

In [None]:
from object_detection.utils import visualization_utils as viz_utils

def show_image_with_detected_bboxes(example):
    image_path = example["image_path"]
    image_np = load_image_into_numpy_array(image_path)
    input_tensor = np.expand_dims(image_np, 0)
    detections = detect_fn_tf_odt(input_tensor)
    category_index = label_map_util.create_category_index_from_labelmap(LABELMAP_PATH,
                                                                    use_display_name=True)
    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                   for key, value in detections.items()}
    detections['num_detections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
    
    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'],
          detections['detection_classes'],
          detections['detection_scores'],
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=200,
          min_score_thresh=.10,
          agnostic_mode=False)
    return Image.fromarray(image_np_with_detections, mode="RGB")

In [None]:
show_image_with_detected_bboxes(example)

In [None]:
show_image_with_bboxes(example)

In [None]:
row = df_eval_split[df_eval_split["num_bboxes"] >1].head(1)
example = row.to_dict(orient='records')[0]
row

In [None]:
image_path = example["image_path"]
image_np = load_image_into_numpy_array(image_path)
input_tensor = np.expand_dims(image_np, 0)
detections = detect_fn_tf_odt(input_tensor)
detections

In [None]:
show_image_with_detected_bboxes(example)

In [None]:
show_image_with_bboxes(example)