In [1]:
# Clone Tensorflow Models Repository
!git clone https://github.com/tensorflow/models.git

Cloning into 'models'...
remote: Enumerating objects: 87254, done.[K
remote: Counting objects: 100% (1008/1008), done.[K
remote: Compressing objects: 100% (485/485), done.[K
remote: Total 87254 (delta 587), reused 899 (delta 512), pack-reused 86246[K
Receiving objects: 100% (87254/87254), 599.26 MiB | 25.71 MiB/s, done.
Resolving deltas: 100% (62446/62446), done.


In [2]:
# Install Protobuf
# No need to do this if you are on Colab.

# If you're doing this locally, download the latest Protobuf library for your OS from https://github.com/google/protobuf/releases

# The filename should look like "protoc--.zip".

# Assuming you've unzipped this zip file to \<path>, the next step is to add \<path>/bin to your PATH environment variable (on Linux or Mac).

# Once Protobuf has been successfully installed, you can run the following command (note: must be done from the models/research folder).

!cd models/research/ && protoc object_detection/protos/*.proto --python_out=.

In [3]:
# Install the object detection API
!cd models/research && \
    cp object_detection/packages/tf2/setup.py . && \
    python -m pip install .

Processing /content/models/research
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting avro-python3 (from object-detection==0.1)
  Downloading avro-python3-1.10.2.tar.gz (38 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting apache-beam (from object-detection==0.1)
  Downloading apache_beam-2.49.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m99.7 MB/s[0m eta [36m0:00:00[0m
Collecting lvis (from object-detection==0.1)
  Downloading lvis-0.5.3-py3-none-any.whl (14 kB)
Collecting tf-models-official>=2.5.1 (from object-detection==0.1)
  Downloading tf_models_official-2.13.1-py2.py3-none-any.whl (2.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m88.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow_io (from object-detection==0.1)
  Downloading tensorflow_io-0.33.0-cp310-cp310-manylinux_2_12_x86_64.ma

In [4]:
# Test Installation (Optional)
!cd models/research && python object_detection/builders/model_builder_tf2_test.py

2023-08-21 21:42:31.910204: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-21 21:42:43.168401: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-21 21:42:43.727516: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-08-

In [5]:
# Import Libraries
import os
import imageio
import pathlib
import tensorflow as tf

import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

%matplotlib inline

In [6]:
# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [7]:
# Download Videos
!wget -nc https://lazyprogrammer.me/cnn_class2_videos.zip

--2023-08-21 21:44:20--  https://lazyprogrammer.me/cnn_class2_videos.zip
Resolving lazyprogrammer.me (lazyprogrammer.me)... 172.67.213.166, 104.21.23.210, 2606:4700:3030::ac43:d5a6, ...
Connecting to lazyprogrammer.me (lazyprogrammer.me)|172.67.213.166|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2073140 (2.0M) [application/zip]
Saving to: ‘cnn_class2_videos.zip’


2023-08-21 21:44:21 (2.85 MB/s) - ‘cnn_class2_videos.zip’ saved [2073140/2073140]



In [8]:
!unzip cnn_class2_videos.zip

Archive:  cnn_class2_videos.zip
  inflating: catdog.mp4              
  inflating: safari.mp4              
  inflating: traffic.mp4             


In [9]:
!ls

catdog.mp4  cnn_class2_videos.zip  models  safari.mp4  sample_data  traffic.mp4


In [10]:
INPUT_VIDEOS = ['catdog', 'safari', 'traffic']

In [11]:
# Download and extract model files
# Get URLs from the "Object Detection Zoo": https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md

url = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.tar.gz'

PATH_TO_MODEL_DIR = tf.keras.utils.get_file(
    fname='ssd_resnet101_v1_fpn_640x640_coco17_tpu-8',
    origin=url,
    untar=True)

Downloading data from http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.tar.gz


In [12]:
PATH_TO_MODEL_DIR

'/root/.keras/datasets/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8'

In [13]:
# Download Labels File
# Label files can be found here: https://github.com/tensorflow/models/tree/master/research/object_detection/data

# You probably won't need these since Object Detection Zoo contains only models trained on COCO.

url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/mscoco_label_map.pbtxt'

PATH_TO_LABELS = tf.keras.utils.get_file(
    fname='mscoco_label_map.pbtxt',
    origin=url,
    untar=False)



Downloading data from https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/mscoco_label_map.pbtxt


In [14]:
PATH_TO_LABELS

'/root/.keras/datasets/mscoco_label_map.pbtxt'

In [15]:
!head {PATH_TO_LABELS}

item {
  name: "/m/01g317"
  id: 1
  display_name: "person"
}
item {
  name: "/m/0199g"
  id: 2
  display_name: "bicycle"
}


In [16]:
# Load in the model
PATH_TO_SAVED_MODEL = PATH_TO_MODEL_DIR + "/saved_model"

print('Loading model...', end='')
start_time = time.time()

# Load saved model and build the detection function
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)

end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))

Loading model...Done! Took 23.19679093360901 seconds


In [17]:
# Load in the labels
category_index = label_map_util.create_category_index_from_labelmap(
    PATH_TO_LABELS,
    use_display_name=True)

In [18]:
category_index

{1: {'id': 1, 'name': 'person'},
 2: {'id': 2, 'name': 'bicycle'},
 3: {'id': 3, 'name': 'car'},
 4: {'id': 4, 'name': 'motorcycle'},
 5: {'id': 5, 'name': 'airplane'},
 6: {'id': 6, 'name': 'bus'},
 7: {'id': 7, 'name': 'train'},
 8: {'id': 8, 'name': 'truck'},
 9: {'id': 9, 'name': 'boat'},
 10: {'id': 10, 'name': 'traffic light'},
 11: {'id': 11, 'name': 'fire hydrant'},
 13: {'id': 13, 'name': 'stop sign'},
 14: {'id': 14, 'name': 'parking meter'},
 15: {'id': 15, 'name': 'bench'},
 16: {'id': 16, 'name': 'bird'},
 17: {'id': 17, 'name': 'cat'},
 18: {'id': 18, 'name': 'dog'},
 19: {'id': 19, 'name': 'horse'},
 20: {'id': 20, 'name': 'sheep'},
 21: {'id': 21, 'name': 'cow'},
 22: {'id': 22, 'name': 'elephant'},
 23: {'id': 23, 'name': 'bear'},
 24: {'id': 24, 'name': 'zebra'},
 25: {'id': 25, 'name': 'giraffe'},
 27: {'id': 27, 'name': 'backpack'},
 28: {'id': 28, 'name': 'umbrella'},
 31: {'id': 31, 'name': 'handbag'},
 32: {'id': 32, 'name': 'tie'},
 33: {'id': 33, 'name': 'suitc

In [19]:
# Do some object detection
def detect_objects_in_image(image_np):
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image_np)

    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis, ...]

    # input_tensor = np.expand_dims(image_np, 0)
    detections = detect_fn(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                   for key, value in detections.items()}
    detections['num_detections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'],
          detections['detection_classes'],
          detections['detection_scores'],
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=200,
          min_score_thresh=.30,
          agnostic_mode=False)
    return image_np_with_detections


def detect_objects_in_video(input_video):
    print(f'Running inference for {input_video}.mp4... ', end='')

    video_reader = imageio.get_reader(f'{input_video}.mp4')
    video_writer = imageio.get_writer(f'{input_video}_annotated.mp4', fps=10)

    # loop through and process each frame
    t0 = time.time()
    n_frames = 0
    for frame in video_reader:
        n_frames += 1
        new_frame = detect_objects_in_image(frame)

        # instead of plotting image, we write the frame to video
        video_writer.append_data(new_frame)

    fps = n_frames / (time.time() - t0)
    print("Frames processed: %s, Speed: %s fps" % (n_frames, fps))

    # clean up
    video_writer.close()

In [20]:
detect_objects_in_video(INPUT_VIDEOS[0])

Running inference for catdog.mp4... Frames processed: 50, Speed: 1.3324873128699466 fps


In [21]:
detect_objects_in_video(INPUT_VIDEOS[1])

Running inference for safari.mp4... 



Frames processed: 100, Speed: 2.8000518606254436 fps


In [22]:
detect_objects_in_video(INPUT_VIDEOS[2])

Running inference for traffic.mp4... Frames processed: 70, Speed: 2.5832447201844775 fps
