# Intro to Object Detection Colab

Welcome to the object detection colab!  This demo will take you through the steps of running an "out-of-the-box" detection model on a collection of images.

## Imports and Setup

In [1]:
import os
import pathlib
import matplotlib
import matplotlib.pyplot as plt

import io
import scipy.misc
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont

import tensorflow as tf
import pandas as pd
import time
from tqdm.notebook import tqdm
import gc

from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

%matplotlib inline

2024-12-13 16:15:09.345637: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-13 16:15:09.368468: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Utilities

In [2]:
def load_image_into_numpy_array(path):
  """Load an image from file into a numpy array.

  Puts image into numpy array to feed into tensorflow graph.
  Note that by convention we put it into a numpy array with shape
  (height, width, channels), where channels=3 for RGB.

  Args:
    path: the file path to the image

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
  """
  img_data = tf.io.gfile.GFile(path, 'rb').read()
  image = Image.open(BytesIO(img_data))
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

def get_keypoint_tuples(eval_config):
  """Return a tuple list of keypoint edges from the eval config.
  
  Args:
    eval_config: an eval config containing the keypoint edges
  
  Returns:
    a list of edge tuples, each in the format (start, end)
  """
  tuple_list = []
  kp_list = eval_config.keypoint_edge
  for edge in kp_list:
    tuple_list.append((edge.start, edge.end))
  return tuple_list

### Build a detection model and load pre-trained model weights

This sometimes takes a little while, please be patient!

In [3]:
pipeline_config = "/home/ttran/projects/TFmodels2/models/research/object_detection/training/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config"
model_dir = "/home/ttran/projects/TFmodels2/models/research/object_detection/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8_3"

# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
detection_model = model_builder.build(
      model_config=model_config, is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(
      model=detection_model)
ckpt.restore(os.path.join(model_dir, 'ckpt-26')).expect_partial()

def get_model_detection_function(model):
  """Get a tf.function for detection."""

  @tf.function
  def detect_fn(image):
    """Detect objects in image."""

    image, shapes = model.preprocess(image)
    prediction_dict = model.predict(image, shapes)
    detections = model.postprocess(prediction_dict, shapes)

    return detections, prediction_dict, tf.reshape(shapes, [-1])

  return detect_fn

detect_fn = get_model_detection_function(detection_model)

2024-12-13 16:15:10.929535: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-12-13 16:15:10.931106: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-12-13 16:15:10.951172: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

# Load label map data (for plotting).

Label maps correspond index numbers to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine.

In [4]:
label_map_path = configs['eval_input_config'].label_map_path
label_map = label_map_util.load_labelmap(label_map_path)
categories = label_map_util.convert_label_map_to_categories(
    label_map,
    max_num_classes=label_map_util.get_max_label_map_index(label_map),
    use_display_name=True)
category_index = label_map_util.create_category_index(categories)
label_map_dict = label_map_util.get_label_map_dict(label_map, use_display_name=True)

### Putting everything together!

Run the below code which loads an image, runs it through the detection model and visualizes the detection results, including the keypoints.

Note that this will take a long time (several minutes) the first time you run this code due to tf.function's trace-compilation --- on subsequent runs (e.g. on new images), things will be faster.

Here are some simple things to try out if you are curious:
* Try running inference on your own images (local paths work)
* Modify some of the input images and see if detection still works.  Some simple things to try out here (just uncomment the relevant portions of code) include flipping the image horizontally, or converting to grayscale (note that we still expect the input image to have 3 channels).
* Print out `detections['detection_boxes']` and try to match the box locations to the boxes in the image.  Notice that coordinates are given in normalized form (i.e., in the interval [0, 1]).
* Set min_score_thresh to other values (between 0 and 1) to allow more detections in or to filter out more detections.

Note that you can run this cell repeatedly without rerunning earlier cells.


In [5]:
#image_dir = "/mnt/sda1/Backup/heif_lite"
#image_dir = "/mnt/sda1/Backup/jpg_lite"
image_dir = "/mnt/sda1/Backup/heif_filtered/Ni_nodate"
#image_dir = "/home/ttran/projects/TFmodels2/models/research/object_detection/test_images"
#image_path = os.path.join(image_dir, '0a00b11de9ad098befcd6543625b311a9a90ca80_lite.jpg')

#result = []
label_id_offset = 1
timestamp = time.time()

file_list = os.listdir(image_dir)

def split_list_into_chunks(lst, n):
    """Splits a list into chunks of size n."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
chunk_size = 1000

list_of_list = list(split_list_into_chunks(file_list, chunk_size))

def run_inference(directory, list_of_list):
    count = 0
    start_point = 0
    # Make sure result dir exist
    dest_path = os.path.join("/mnt/sda1/Backup/result", "result_" + str(timestamp))
    os.makedirs(dest_path, exist_ok=True)
    for chunk in range(len(list_of_list)):
        result = []
        print(f"Current count: {count}")
        for filename in tqdm(list_of_list[chunk+start_point], desc="Processing"):
            try:
                print(f"Working on {filename}") 
                if "_skip" in filename:
                    continue
                _result = {}
                file_path = os.path.join(directory, filename) 
                start_time = time.time()
                image_np = load_image_into_numpy_array(file_path)
                input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
                detections, predictions_dict, shapes = detect_fn(input_tensor)
                # Class result
                pred_class = (detections['detection_classes'][0].numpy() + label_id_offset).astype(int)
                pred_score = detections['detection_scores'][0].numpy()
                end_time = time.time()
                _result['name'] = filename
                _result['timestamp'] = timestamp
                _result['inferred'] = "x"
                _result['time(ms)'] = round((end_time - start_time)*1000,2)
                for i in range(len(pred_class)):
                    _result['pred_' + str(i+1)] = category_index[pred_class[i]]['name']
                    _result['pred_' + str(i+1) + '_score'] = pred_score[i]
                print(f"Finished {filename} in {_result['time(ms)']} ms")
                result.append(_result)
            except Exception as e:
                print(f"Can't process {filename}")
                new_name = filename + "_skip"
                new_file_path = os.path.join(directory, new_name)
                os.rename(file_path, new_file_path)
                print(f"Renamed {filename} into {new_name}")
        result_df = pd.DataFrame(result)
        result_df.to_parquet(f"{dest_path}/chunk_{chunk+start_point}.parquet")
        gc.collect()
        print(f"Finished chunk {chunk}")
        count = count + 1
        # Stop the code after processing 3000 photos to avoid OOM issue. Update start_point above to match the next batch. 
        if count == 2:
            break
    #return result

result = run_inference(image_dir, list_of_list)
result_df = pd.DataFrame(result)
#result_df.to_parquet(f"/mnt/sda1/Backup/result/result_{timestamp}.parquet")
result_df

Current count: 0


Processing:   0%|          | 0/469 [00:00<?, ?it/s]

Working on 2fac51a36dcac97b1021d4599fed1797c9a6db92.jpg


2024-12-13 16:15:22.194333: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8902
2024-12-13 16:15:22.233799: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-12-13 16:15:22.296737: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Finished 2fac51a36dcac97b1021d4599fed1797c9a6db92.jpg in 11662.76 ms
Working on 2fbe50f43f4bcb1c0c6f8e7c000dd866fbc0e510.jpg
Finished 2fbe50f43f4bcb1c0c6f8e7c000dd866fbc0e510.jpg in 4781.43 ms
Working on 47962a8fb7b19f404befd8b3847a10a0a59b0ae9.jpg
Finished 47962a8fb7b19f404befd8b3847a10a0a59b0ae9.jpg in 2725.83 ms
Working on 5f53ffa689f141b0b946dc4f4979123607f13ede.jpg
Finished 5f53ffa689f141b0b946dc4f4979123607f13ede.jpg in 4485.16 ms
Working on b14e02d7cb3f522b575a2ca4f836761b4dfeac17.jpg
Finished b14e02d7cb3f522b575a2ca4f836761b4dfeac17.jpg in 3361.89 ms
Working on 24daea6231f95a09d04d6c7fbd12fab247f2d2c9.jpg
Finished 24daea6231f95a09d04d6c7fbd12fab247f2d2c9.jpg in 2744.96 ms
Working on d1c1585dc37c29a96cd8600aa4723872d1f10692.jpg
Finished d1c1585dc37c29a96cd8600aa4723872d1f10692.jpg in 2684.9 ms
Working on fd88360a9fc698fc9b5e18de2698377e5a90c9dd.jpg
Finished fd88360a9fc698fc9b5e18de2698377e5a90c9dd.jpg in 2733.7 ms
Working on 04d14633779b92df5e3e36c70165fccc9757c3c2.jpg
Finished 



Finished 29e56d6b439ae3c486370ca0723a4ca9be2c5c00.jpg in 3015.64 ms
Working on cad3e1de3979868e181a54c121057c74f22aa013.jpg
Finished cad3e1de3979868e181a54c121057c74f22aa013.jpg in 2818.03 ms
Working on eecaaa8be7d11942778d83b847816fded1cedf1a.jpg
Finished eecaaa8be7d11942778d83b847816fded1cedf1a.jpg in 1617.81 ms
Working on 7eec9788db79b9a14f36a80a1b25a950f83f6a0e.jpg
Finished 7eec9788db79b9a14f36a80a1b25a950f83f6a0e.jpg in 2716.07 ms
Working on b8c4319809af8c979393795a3d087f40991ec50f.jpg
Finished b8c4319809af8c979393795a3d087f40991ec50f.jpg in 2713.81 ms
Working on 5575c005d21a20e4d1d7e43db112711c91e47b10.jpg
Finished 5575c005d21a20e4d1d7e43db112711c91e47b10.jpg in 2722.84 ms
Working on ae9a8b83d3e9b14d97b3e30de3367341dc5c3e8c.jpg
Finished ae9a8b83d3e9b14d97b3e30de3367341dc5c3e8c.jpg in 2847.6 ms
Working on b306f94198e432badecdd36b4da7ce67f8fac2e7.jpg
Finished b306f94198e432badecdd36b4da7ce67f8fac2e7.jpg in 3780.91 ms
Working on 85882b1e0c66116c7ed765a32b5f8d12d1a23899.jpg
Finished 