# Intro to Object Detection Colab

Welcome to the object detection colab!  This demo will take you through the steps of running an "out-of-the-box" detection model on a collection of images.

## Imports and Setup

In [1]:
import os
import pathlib
import matplotlib
import matplotlib.pyplot as plt

import io
import scipy.misc
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont

import tensorflow as tf
import pandas as pd
import time
from tqdm.notebook import tqdm
import gc

from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

%matplotlib inline

2024-12-12 17:25:13.399810: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-12 17:25:13.426165: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Utilities

In [2]:
def load_image_into_numpy_array(path):
  """Load an image from file into a numpy array.

  Puts image into numpy array to feed into tensorflow graph.
  Note that by convention we put it into a numpy array with shape
  (height, width, channels), where channels=3 for RGB.

  Args:
    path: the file path to the image

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
  """
  img_data = tf.io.gfile.GFile(path, 'rb').read()
  image = Image.open(BytesIO(img_data))
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

def get_keypoint_tuples(eval_config):
  """Return a tuple list of keypoint edges from the eval config.
  
  Args:
    eval_config: an eval config containing the keypoint edges
  
  Returns:
    a list of edge tuples, each in the format (start, end)
  """
  tuple_list = []
  kp_list = eval_config.keypoint_edge
  for edge in kp_list:
    tuple_list.append((edge.start, edge.end))
  return tuple_list

### Build a detection model and load pre-trained model weights

This sometimes takes a little while, please be patient!

In [3]:
pipeline_config = "/home/ttran/projects/TFmodels2/models/research/object_detection/training/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8.config"
model_dir = "/home/ttran/projects/TFmodels2/models/research/object_detection/ssd_resnet152_v1_fpn_640x640_coco17_tpu-8_3"

# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
detection_model = model_builder.build(
      model_config=model_config, is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(
      model=detection_model)
ckpt.restore(os.path.join(model_dir, 'ckpt-26')).expect_partial()

def get_model_detection_function(model):
  """Get a tf.function for detection."""

  @tf.function
  def detect_fn(image):
    """Detect objects in image."""

    image, shapes = model.preprocess(image)
    prediction_dict = model.predict(image, shapes)
    detections = model.postprocess(prediction_dict, shapes)

    return detections, prediction_dict, tf.reshape(shapes, [-1])

  return detect_fn

detect_fn = get_model_detection_function(detection_model)

2024-12-12 17:25:14.876535: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-12-12 17:25:14.878133: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-12-12 17:25:14.898939: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

# Load label map data (for plotting).

Label maps correspond index numbers to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine.

In [4]:
label_map_path = configs['eval_input_config'].label_map_path
label_map = label_map_util.load_labelmap(label_map_path)
categories = label_map_util.convert_label_map_to_categories(
    label_map,
    max_num_classes=label_map_util.get_max_label_map_index(label_map),
    use_display_name=True)
category_index = label_map_util.create_category_index(categories)
label_map_dict = label_map_util.get_label_map_dict(label_map, use_display_name=True)

### Putting everything together!

Run the below code which loads an image, runs it through the detection model and visualizes the detection results, including the keypoints.

Note that this will take a long time (several minutes) the first time you run this code due to tf.function's trace-compilation --- on subsequent runs (e.g. on new images), things will be faster.

Here are some simple things to try out if you are curious:
* Try running inference on your own images (local paths work)
* Modify some of the input images and see if detection still works.  Some simple things to try out here (just uncomment the relevant portions of code) include flipping the image horizontally, or converting to grayscale (note that we still expect the input image to have 3 channels).
* Print out `detections['detection_boxes']` and try to match the box locations to the boxes in the image.  Notice that coordinates are given in normalized form (i.e., in the interval [0, 1]).
* Set min_score_thresh to other values (between 0 and 1) to allow more detections in or to filter out more detections.

Note that you can run this cell repeatedly without rerunning earlier cells.


In [5]:
#image_dir = "/mnt/sda1/Backup/heif_lite"
image_dir = "/mnt/sda1/Backup/jpg_lite"
#image_dir = "/home/ttran/projects/TFmodels2/models/research/object_detection/test_images"
#image_path = os.path.join(image_dir, '0a00b11de9ad098befcd6543625b311a9a90ca80_lite.jpg')

#result = []
label_id_offset = 1
timestamp = time.time()

file_list = os.listdir(image_dir)

def split_list_into_chunks(lst, n):
    """Splits a list into chunks of size n."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
chunk_size = 1000

list_of_list = list(split_list_into_chunks(file_list, chunk_size))

def run_inference(directory, list_of_list):
    count = 0
    start_point = 37
    for chunk in range(len(list_of_list)):
        result = []
        print(f"Current count: {count}")
        for filename in tqdm(list_of_list[chunk+start_point], desc="Processing"):
            try:
                print(f"Working on {filename}") 
                if "_skip" in filename:
                    continue
                _result = {}
                file_path = os.path.join(directory, filename) 
                start_time = time.time()
                image_np = load_image_into_numpy_array(file_path)
                input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
                detections, predictions_dict, shapes = detect_fn(input_tensor)
                # Class result
                pred_class = (detections['detection_classes'][0].numpy() + label_id_offset).astype(int)
                pred_score = detections['detection_scores'][0].numpy()
                end_time = time.time()
                _result['name'] = filename
                _result['timestamp'] = timestamp
                _result['inferred'] = "x"
                _result['time(ms)'] = round((end_time - start_time)*1000,2)
                for i in range(len(pred_class)):
                    _result['pred_' + str(i+1)] = category_index[pred_class[i]]['name']
                    _result['pred_' + str(i+1) + '_score'] = pred_score[i]
                print(f"Finished {filename} in {_result['time(ms)']} ms")
                result.append(_result)
            except Exception as e:
                print(f"Can't process {filename}")
                new_name = filename + "_skip"
                new_file_path = os.path.join(directory, new_name)
                os.rename(file_path, new_file_path)
                print(f"Renamed {filename} into {new_name}")
        result_df = pd.DataFrame(result)
        result_df.to_parquet(f"/mnt/sda1/Backup/result/result_jpg_chunk_{chunk+start_point}.parquet")
        gc.collect()
        print(f"Finished chunk {chunk}")
        count = count + 1
        # Stop the code after processing 3000 photos to avoid OOM issue. Update start_point above to match the next batch. 
        if count == 2:
            break
    #return result

result = run_inference(image_dir, list_of_list)
result_df = pd.DataFrame(result)
result_df.to_parquet(f"/mnt/sda1/Backup/result/result_{timestamp}.parquet")
result_df

Current count: 0


Processing:   0%|          | 0/845 [00:00<?, ?it/s]

Working on 7f3b10835d9e684c98fc141fc7c8f11793fb8430.jpg


2024-12-12 17:25:24.086501: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8902
2024-12-12 17:25:24.127359: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-12-12 17:25:24.196207: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Finished 7f3b10835d9e684c98fc141fc7c8f11793fb8430.jpg in 9615.07 ms
Working on 400a820f33cdb395bcdf23741e0fb5533d0b3c74.jpg
Finished 400a820f33cdb395bcdf23741e0fb5533d0b3c74.jpg in 1977.15 ms
Working on 5bbc97ab8ed474c37fccc506b515065850b963b1.jpg
Finished 5bbc97ab8ed474c37fccc506b515065850b963b1.jpg in 1997.09 ms
Working on 6a3587dfcc6a1eeed8dba89fb1ab5be9152a6481.jpg
Finished 6a3587dfcc6a1eeed8dba89fb1ab5be9152a6481.jpg in 138.26 ms
Working on 5326b33b75c5cb5e4ee72b00bb48699ee08161be.jpg
Finished 5326b33b75c5cb5e4ee72b00bb48699ee08161be.jpg in 1834.96 ms
Working on f98ee05999fc1801a06d90bd0a7d56631d9b77d1.jpg
Finished f98ee05999fc1801a06d90bd0a7d56631d9b77d1.jpg in 73.28 ms
Working on 12a405534d91a38b3eeef2e83f989f13a62bf542.jpg




Finished 12a405534d91a38b3eeef2e83f989f13a62bf542.jpg in 1847.51 ms
Working on 9c7cfd01b4afe68935810000de510c1a21d79311.jpg
Finished 9c7cfd01b4afe68935810000de510c1a21d79311.jpg in 55.34 ms
Working on 40652bab7d29683b498cb33c4415c6db1d175735.jpg
Finished 40652bab7d29683b498cb33c4415c6db1d175735.jpg in 72.59 ms
Working on f20aa96a9a18755391d19fd2554ab637bc6ff0d7.jpg




Finished f20aa96a9a18755391d19fd2554ab637bc6ff0d7.jpg in 2103.06 ms
Working on 066b7b1bd70f87e55dd2c6f9ec74b790524464f8.jpg
Finished 066b7b1bd70f87e55dd2c6f9ec74b790524464f8.jpg in 73.6 ms
Working on 7acc2c3ded5fa5168a9cf6820903a6de45791db9.jpg
Finished 7acc2c3ded5fa5168a9cf6820903a6de45791db9.jpg in 71.72 ms
Working on 2ef8fe1a57e9368efed7232f377716e64e5dd776.jpg
Finished 2ef8fe1a57e9368efed7232f377716e64e5dd776.jpg in 72.28 ms
Working on 71acc16f9c38f7a0a4a01cd9412d6de61eb167ee.jpg
Finished 71acc16f9c38f7a0a4a01cd9412d6de61eb167ee.jpg in 71.72 ms
Working on a401bbd724d6138066501f149884b8bf564d319a.jpg
Finished a401bbd724d6138066501f149884b8bf564d319a.jpg in 52.73 ms
Working on bcba10ef2300f658e63020af7846ca80530d25ae.jpg
Finished bcba10ef2300f658e63020af7846ca80530d25ae.jpg in 70.0 ms
Working on c0d308b0c915ba565ffb0b4e55de01a36082c0ac.jpg
Finished c0d308b0c915ba565ffb0b4e55de01a36082c0ac.jpg in 1889.6 ms
Working on 0f5705389ffa783f1ffb3c79bef466818cb19fbc.jpg
Finished 0f5705389ffa78

IndexError: list index out of range

In [6]:
result

In [7]:
num_pred = 100
list_of_pred = []
list_of_score=[]
col_to_drop = []
for i in range(num_pred):
    pred_col = "pred_" + str(i + 1)
    list_of_pred.append(pred_col)
    list_of_score = "pred_" + str(i + 1) + "_score"

In [8]:
view_result = pd.read_parquet("/mnt/sda1/Backup/result/result_1733953814.3274329.parquet")
#view_result.sort_values(by=['pred_1_score'], ascending=False).head(20)
view_result

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/sda1/Backup/result/result_1733953814.3274329.parquet'

In [None]:
final_result = view_result[['name', 'timestamp', 'inferred', 'time(ms)', 'pred_1', 'pred_1_score']]
final_result[final_result.pred_1_score > 0.3]

Unnamed: 0,name,timestamp,inferred,time(ms),pred_1,pred_1_score
1,95ae1c23f4c925b3db84d921de5adee8e4740805.jpg,1.733954e+09,x,138.23,may22,0.524855
2,9c60590711e39ba8472dff3fc95c114c57daa957.jpg,1.733954e+09,x,136.82,jun22,0.922711
4,e258a1c1a3b6be4351d0f255bf26fca74c6a2715.jpg,1.733954e+09,x,140.64,jun22,0.928089
5,20d648a9a07b7fc45519aae30ce568fa5e5f9726.jpg,1.733954e+09,x,138.44,jun22,0.939674
7,9ffc935535b464c009194044503636b95b534cdb.jpg,1.733954e+09,x,137.25,aug22,0.972110
...,...,...,...,...,...,...
14595,10f89358e3d5d8358cadd022395092868c529ce3.jpg,1.733954e+09,x,131.16,apr22,0.967149
14596,f92d0f66d74dff89ec7f68917fa2006ce90f42cb.jpg,1.733954e+09,x,116.14,jun22,0.783865
14597,e2c38ceedf02dc8c48a90221aa465391154f5ff4.jpg,1.733954e+09,x,132.56,mar22,0.967408
14605,3a8fa2940ce31916b4545754ceba279206c759bd.jpg,1.733954e+09,x,132.18,aug22,0.520826
