In [None]:
!git clone https://github.com/tensorflow/models.git

# Compile protos
!apt-get install -y protobuf-compiler
!cd models/research && protoc object_detection/protos/*.proto --python_out=.

# Install the Object Detection API
!cd models/research && cp object_detection/packages/tf2/setup.py . && python -m pip install .


In [None]:
# Downgrade protobuf for compatibility with other libraries
!pip install protobuf==3.20.3 grpcio==1.51.1 --force-reinstall


In [1]:
import os
import pathlib
import tensorflow as tf

import time
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

%matplotlib inline

# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)


In [None]:
!wget -nc https://lazyprogrammer.me/course_files/cnn_class2_videos.zip

In [None]:
!unzip cnn_class2_videos.zip

In [4]:
!ls

catdog.mp4  cnn_class2_videos.zip  models  safari.mp4  sample_data  traffic.mp4


In [5]:
INPUT_VIDEOS = ['catdog', 'safari', 'traffic']

In [None]:
#Download and extract model files
import tensorflow as tf

url = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.tar.gz'

!wget -nc http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.tar.gz
!tar -xvf ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.tar.gz

# Set path to the extracted folder
PATH_TO_MODEL_DIR = 'ssd_resnet101_v1_fpn_640x640_coco17_tpu-8'


In [13]:
PATH_TO_SAVED_MODEL = PATH_TO_MODEL_DIR + "/saved_model"

print('Loading model...', end='')
start_time = time.time()

detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)

end_time = time.time()
print('Done! Took {} seconds'.format(end_time - start_time))


Loading model...Done! Took 92.07883763313293 seconds


In [7]:
PATH_TO_MODEL_DIR

'/root/.keras/datasets/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8'

In [None]:
#Download and Load Labels File
url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/mscoco_label_map.pbtxt'

PATH_TO_LABELS = tf.keras.utils.get_file(
    fname='mscoco_label_map.pbtxt',
    origin=url,
    untar=False)


!head {PATH_TO_LABELS}

# Load in the labels
from object_detection.utils import label_map_util
category_index = label_map_util.create_category_index_from_labelmap(
    PATH_TO_LABELS,
    use_display_name=True)


item {
  name: "/m/01g317"
  id: 1
  display_name: "person"
}
item {
  name: "/m/0199g"
  id: 2
  display_name: "bicycle"
}


In [9]:
PATH_TO_LABELS

'/root/.keras/datasets/mscoco_label_map.pbtxt'

In [17]:
category_index

{1: {'id': 1, 'name': 'person'},
 2: {'id': 2, 'name': 'bicycle'},
 3: {'id': 3, 'name': 'car'},
 4: {'id': 4, 'name': 'motorcycle'},
 5: {'id': 5, 'name': 'airplane'},
 6: {'id': 6, 'name': 'bus'},
 7: {'id': 7, 'name': 'train'},
 8: {'id': 8, 'name': 'truck'},
 9: {'id': 9, 'name': 'boat'},
 10: {'id': 10, 'name': 'traffic light'},
 11: {'id': 11, 'name': 'fire hydrant'},
 13: {'id': 13, 'name': 'stop sign'},
 14: {'id': 14, 'name': 'parking meter'},
 15: {'id': 15, 'name': 'bench'},
 16: {'id': 16, 'name': 'bird'},
 17: {'id': 17, 'name': 'cat'},
 18: {'id': 18, 'name': 'dog'},
 19: {'id': 19, 'name': 'horse'},
 20: {'id': 20, 'name': 'sheep'},
 21: {'id': 21, 'name': 'cow'},
 22: {'id': 22, 'name': 'elephant'},
 23: {'id': 23, 'name': 'bear'},
 24: {'id': 24, 'name': 'zebra'},
 25: {'id': 25, 'name': 'giraffe'},
 27: {'id': 27, 'name': 'backpack'},
 28: {'id': 28, 'name': 'umbrella'},
 31: {'id': 31, 'name': 'handbag'},
 32: {'id': 32, 'name': 'tie'},
 33: {'id': 33, 'name': 'suitc

In [None]:
#object detection
import imageio
def detect_objects_in_image(image_np):
    #convert the input to a tensor.
    input_tensor = tf.convert_to_tensor(image_np)

    #add an axis with `tf.newaxis` because the model expects a batch of images.
    input_tensor = input_tensor[tf.newaxis, ...]
    detections = detect_fn(input_tensor)

    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                   for key, value in detections.items()}
    detections['num_detections'] = num_detections


    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'],
          detections['detection_classes'],
          detections['detection_scores'],
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=200,
          min_score_thresh=.30,
          agnostic_mode=False)
    return image_np_with_detections


def detect_objects_in_video(input_video):
    print(f'Running inference for {input_video}.mp4... ', end='')

    video_reader = imageio.get_reader(f'{input_video}.mp4')
    video_writer = imageio.get_writer(f'{input_video}_annotated.mp4', fps=10)

    # loop through and process each frame
    t0 = time.time()
    n_frames = 0
    for frame in video_reader:
        n_frames += 1
        new_frame = detect_objects_in_image(frame)
        video_writer.append_data(new_frame)

    fps = n_frames / (time.time() - t0)
    print("Frames processed: %s, Speed: %s fps" % (n_frames, fps))

    video_writer.close()

In [23]:
detect_objects_in_video(INPUT_VIDEOS[0])

Running inference for catdog.mp4... Frames processed: 50, Speed: 0.20182779286327174 fps


In [25]:
from IPython.display import HTML
from base64 import b64encode

def play_video(filename):
    mp4 = open(filename,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML(f"""
      <video width=640 controls>
            <source src="{data_url}" type="video/mp4">
      </video>""")

play_video('catdog_annotated.mp4')

In [26]:
detect_objects_in_video(INPUT_VIDEOS[2])

Running inference for traffic.mp4... Frames processed: 70, Speed: 0.20581698715799993 fps


In [None]:
from IPython.display import HTML
from base64 import b64encode

def play_video(filename):
    mp4 = open(filename,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML(f"""
      <video width=640 controls>
            <source src="{data_url}" type="video/mp4">
      </video>""")

play_video('traffic_annotated.mp4')  