## Tensorflow Object Detection Setup For Tensorflow 2.0

In [1]:
import tensorflow as tf
print(tf.__version__)

2.10.0


In [2]:
%matplotlib inline


# Detect Objects Using Your Webcam or from a video strem


This demo will take you through the steps of running an "out-of-the-box" detection model to
detect objects in the video stream extracted from your camera.



## Create the data directory
The snippet shown below will create the ``data`` directory where all our data will be stored. The
code will create a directory structure as shown bellow:

.. code-block:: bash

    data
    └── models

where the ``models`` folder will will contain the downloaded models.



In [4]:
import os

DATA_DIR = os.path.join(os.getcwd(), 'data')
MODELS_DIR = os.path.join(DATA_DIR, 'models')
for dir in [DATA_DIR, MODELS_DIR]:
    if not os.path.exists(dir):
        os.mkdir(dir)

## Download the model
The code snippet shown below is used to download the object detection model checkpoint file,
as well as the labels file (.pbtxt) which contains a list of strings used to add the correct
label to each detection (e.g. person).

The particular detection algorithm we will use is the `SSD ResNet101 V1 FPN 640x640`. More
models can be found in the `TensorFlow 2 Detection Model Zoo <https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md>`_.
To use a different model you will need the URL name of the specific model. This can be done as
follows:

1. Right click on the `Model name` of the model you would like to use;
2. Click on `Copy link address` to copy the download link of the model;
3. Paste the link in a text editor of your choice. You should observe a link similar to ``download.tensorflow.org/models/object_detection/tf2/YYYYYYYY/XXXXXXXXX.tar.gz``;
4. Copy the ``XXXXXXXXX`` part of the link and use it to replace the value of the ``MODEL_NAME`` variable in the code shown below;
5. Copy the ``YYYYYYYY`` part of the link and use it to replace the value of the ``MODEL_DATE`` variable in the code shown below.

For example, the download link for the model used below is: ``download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.tar.gz``



In [5]:
import tarfile
import urllib.request

# Download and extract model
MODEL_DATE = '20200711'
MODEL_NAME = 'ssd_mobilenet_v2_320x320_coco17_tpu-8'
MODEL_TAR_FILENAME = MODEL_NAME + '.tar.gz'
MODELS_DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/tf2/'
MODEL_DOWNLOAD_LINK = MODELS_DOWNLOAD_BASE + MODEL_DATE + '/' + MODEL_TAR_FILENAME
PATH_TO_MODEL_TAR = os.path.join(MODELS_DIR, MODEL_TAR_FILENAME)
PATH_TO_CKPT = os.path.join(MODELS_DIR, os.path.join(MODEL_NAME, 'checkpoint/'))
PATH_TO_CFG = os.path.join(MODELS_DIR, os.path.join(MODEL_NAME, 'pipeline.config'))
if not os.path.exists(PATH_TO_CKPT):
    print('Downloading model. This may take a while... ', end='')
    urllib.request.urlretrieve(MODEL_DOWNLOAD_LINK, PATH_TO_MODEL_TAR)
    tar_file = tarfile.open(PATH_TO_MODEL_TAR)
    tar_file.extractall(MODELS_DIR)
    tar_file.close()
    os.remove(PATH_TO_MODEL_TAR)
    print('Done')

# Download labels file
LABEL_FILENAME = 'mscoco_label_map.pbtxt'
LABELS_DOWNLOAD_BASE = \
    'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/'
PATH_TO_LABELS = os.path.join(MODELS_DIR, os.path.join(MODEL_NAME, LABEL_FILENAME))
if not os.path.exists(PATH_TO_LABELS):
    print('Downloading label file... ', end='')
    urllib.request.urlretrieve(LABELS_DOWNLOAD_BASE + LABEL_FILENAME, PATH_TO_LABELS)
    print('Done')

## Load the model
Next we load the downloaded model



In [6]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

tf.get_logger().setLevel('ERROR')           # Suppress TensorFlow logging (2)

# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(PATH_TO_CFG)
model_config = configs['model']
detection_model = model_builder.build(model_config=model_config, is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(PATH_TO_CKPT, 'ckpt-0')).expect_partial()

@tf.function
def detect_fn(image):
    """Detect objects in image."""

    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)

    return detections, prediction_dict, tf.reshape(shapes, [-1])

## Load label map data (for plotting)
Label maps correspond index numbers to category names, so that when our convolution network
predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility
functions, but anything that returns a dictionary mapping integers to appropriate string labels
would be fine.



In [7]:
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS,
                                                                    use_display_name=True)

## Define the video stream
We will use `OpenCV <https://pypi.org/project/opencv-python/>`_ to capture the video stream
generated by our webcam. For more information you can refer to the `OpenCV-Python Tutorials <https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html#capture-video-from-camera>`_



In [8]:
import cv2

cap = cv2.VideoCapture(0)

## Putting everything together
The code shown below loads an image, runs it through the detection model and visualizes the
detection results, including the keypoints.

Note that this will take a long time (several minutes) the first time you run this code due to
tf.function's trace-compilation --- on subsequent runs (e.g. on new images), things will be
faster.

Here are some simple things to try out if you are curious:

* Modify some of the input images and see if detection still works. Some simple things to try out here (just uncomment the relevant portions of code) include flipping the image horizontally, or converting to grayscale (note that we still expect the input image to have 3 channels).
* Print out `detections['detection_boxes']` and try to match the box locations to the boxes in the image.  Notice that coordinates are given in normalized form (i.e., in the interval [0, 1]).
* Set ``min_score_thresh`` to other values (between 0 and 1) to allow more detections in or to filter out more detections.



In [8]:
#REAL TIME object detection using videocapture 
import numpy as np
import pandas as pd

while True:
    # Read frame from camera
    ret, image_np = cap.read()

    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)

    # Things to try:
    # Flip horizontally
    # image_np = np.fliplr(image_np).copy()

    # Convert image to grayscale
    # image_np = np.tile(
    #     np.mean(image_np, 2, keepdims=True), (1, 1, 3)).astype(np.uint8)

    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections, predictions_dict, shapes = detect_fn(input_tensor)
    label_id_offset = 1
    image_np_with_detections = image_np.copy()
    
    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'][0].numpy(),
          (detections['detection_classes'][0].numpy() + label_id_offset).astype(int),
          detections['detection_scores'][0].numpy(),
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=200,
          min_score_thresh=0.6,
          agnostic_mode=False)

    # Display output
    cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))

    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

In [14]:
#preprocessing of the video strem -----
import numpy as np
import cv2
import tensorflow as tf
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import label_map_util

# Path to the video file
video_path = r'C:\Users\sudar\compVision\models\research\video.mp4'  # Replace with the actual path to your video file

# Initialize video capture
cap = cv2.VideoCapture(r'C:\Users\sudar\compVision\models\research\video.mp4')

if not cap.isOpened():
    print(f"Error: Unable to open video file {video_path}")
    exit()

# Load detection function and category index
detect_fn = tf.saved_model.load(r'C:\Users\sudar\compVision\models\research\TFOD\data\models\ssd_mobilenet_v2_320x320_coco17_tpu-8\saved_model') # Replace with your object detection function

# Load the label map and category index
# Replace 'label_map.pbtxt' with the path to your label map file
label_map_path = r'C:\Users\sudar\compVision\models\research\TFOD\data\models\ssd_mobilenet_v2_320x320_coco17_tpu-8\mscoco_label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(label_map_path, use_display_name=True)

while True:
    ret, image_np = cap.read()
    if not ret:
        print("End of video or error reading frame.")
        break

    # Preprocess frame for model input
    #input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)

    #message part
    print("pre-processing starting \n")
    
    # Preprocess frame for model input
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.uint8)  # Ensure dtype matches model expectation

    #message part
    print("starting to print detection keys \n")
    
    # Run object detection
    detections = detect_fn(input_tensor)

    # Print the available keys for verification
    print(detections.keys())  # Inspect the keys in the dictionary

    #message part
    print("detection keys are printed \n ")
    
    # Access the relevant values from the dictionary
    detection_boxes = detections['detection_boxes']
    detection_scores = detections['detection_scores']
    detection_classes = detections['detection_classes']
    num_detections = detections['num_detections']

    # Create a copy of the original image for visualization
    image_np_with_detections = np.copy(image_np)

    # Visualize the results
    viz_utils.visualize_boxes_and_labels_on_image_array(
        image_np_with_detections,  # Use the copied image for visualization
        detection_boxes[0].numpy(),
        (detection_classes[0].numpy() + 1).astype(int),  # Class IDs
        detection_scores[0].numpy(),
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=200,
        min_score_thresh=0.6,
        agnostic_mode=False,
            )


    # Display the processed frame
    cv2.imshow('Object Detection', cv2.resize(image_np_with_detections, (800, 600)))

    # Exit on pressing 'q'
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break
        
print("the code is done relequishing control \n")
# Release resources
cap.release()
cv2.destroyAllWindows()
#------------------preprocessing output---------


pre-processing starting 

starting to print detection keys 

dict_keys(['detection_scores', 'detection_anchor_indices', 'detection_multiclass_scores', 'detection_classes', 'raw_detection_scores', 'raw_detection_boxes', 'num_detections', 'detection_boxes'])
detection keys are printed 
 
pre-processing starting 

starting to print detection keys 

dict_keys(['detection_scores', 'detection_anchor_indices', 'detection_multiclass_scores', 'detection_classes', 'raw_detection_scores', 'raw_detection_boxes', 'num_detections', 'detection_boxes'])
detection keys are printed 
 
pre-processing starting 

starting to print detection keys 

dict_keys(['detection_scores', 'detection_anchor_indices', 'detection_multiclass_scores', 'detection_classes', 'raw_detection_scores', 'raw_detection_boxes', 'num_detections', 'detection_boxes'])
detection keys are printed 
 
pre-processing starting 

starting to print detection keys 

dict_keys(['detection_scores', 'detection_anchor_indices', 'detection_multicla

In [15]:
#this is the webCam version , so i have inserted a new block for the video.mp4 file RUN THAT 
import numpy as np
import cv2
import cv2

# Using OpenCV to initialize the webcam
cap = cv2.VideoCapture('video.mp4')



while cap.isOpened():
    ret, image_np = cap.read()
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections, predictions_dict, shapes = detect_fn(input_tensor)

    label_id_offset = 1
    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'][0].numpy(),
          (detections['detection_classes'][0].numpy() + label_id_offset).astype(int),
          detections['detection_scores'][0].numpy(),
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=200,
          min_score_thresh=.50,
          line_thickness=1,
          agnostic_mode=False)

    cv2.imshow('ssd_mobilenet', image_np_with_detections)
    if cv2.waitKey(1) == 13: #13 is the Enter Key
        break
            
# Release camera and close windows
cap.release()
cv2.destroyAllWindows()  

In [4]:
#Object detection in a video stream i.e an mp4 file ------------------------------------
import numpy as np
import cv2
import tensorflow as tf
from object_detection.utils import visualization_utils as viz_utils

# Load the pre-trained object detection model
model_dir = r'C:\Users\sudar\compVision\models\research\TFOD\data\models\ssd_mobilenet_v2_320x320_coco17_tpu-8\saved_model'  # Provide the correct path to your model
detect_fn = tf.saved_model.load(model_dir).signatures['serving_default']

# Initialize video capture
cap = cv2.VideoCapture(r'C:\Users\sudar\compVision\models\research\video.mp4')  # Make sure the path to the video is correct

# Ensure the video file is opened correctly
if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

# Define category index (example)
category_index = {1: {'id': 1, 'name': 'person'}, 2: {'id': 2, 'name': 'car'}}  # Example labels

while cap.isOpened():
    ret, image_np = cap.read()
    
    if not ret:
        break
    
    # Convert image to uint8 before passing to the model
    image_np = np.asarray(image_np, dtype=np.uint8)

    # Preprocess the frame for the model
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.uint8)
    
    # Run object detection
    detections = detect_fn(input_tensor)

    #label_id_offset = 1
    image_np_with_detections = np.copy(image_np)

    # Visualize detections on the frame
    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'][0].numpy(),
          (detections['detection_classes'][0].numpy() ).astype(int),
          detections['detection_scores'][0].numpy(),
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=200,
          min_score_thresh=0.50,
          line_thickness=1,
          agnostic_mode=False)

    # Display the processed frame
    cv2.imshow('Object Detection - SSD Mobilenet', image_np_with_detections)
    
    # Exit if the Enter key is pressed
    if cv2.waitKey(1) == 13:  # 13 is the Enter key
        break

# Release the video capture and close OpenCV windows
cap.release()
cv2.destroyAllWindows()


In [9]:
import numpy as np
import cv2

cap = cv2.VideoCapture('video.mp4')

while(cap.isOpened()):
    ret, frame = cap.read()

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    cv2.imshow('frame',gray)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [4]:
#this is the code that i have changed 
import numpy as np
import cv2
import tensorflow as tf
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import label_map_util

# Load the detection model and category index
detect_fn = ...  # Replace with the actual function to load your model
category_index = ...  # Replace with the category index dictionary

# Video file source
video_source = 'C:\Users\sudar\compVision\models\research\video.mp4'  # Replace with the path to your video file
cap = cv2.VideoCapture(video_source)

if not cap.isOpened():
    print(f"Error: Unable to open video file {video_source}")
    exit()

while cap.isOpened():
    ret, image_np = cap.read()
    if not ret:
        print("End of video or cannot read frame.")
        break

    # Preprocess and detect objects
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections = detect_fn(input_tensor)

    # Copy frame for visualization
    image_np_with_detections = image_np.copy()
    viz_utils.visualize_boxes_and_labels_on_image_array(
        image_np_with_detections,
        detections['detection_boxes'][0].numpy(),
        (detections['detection_classes'][0].numpy() + 1).astype(int),
        detections['detection_scores'][0].numpy(),
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=200,
        min_score_thresh=0.50,
        line_thickness=2,
        agnostic_mode=False,
    )

    # Display results
    cv2.imshow('Object Detection', image_np_with_detections)
    if cv2.waitKey(1) == 13:  # Press Enter to exit
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (3999147310.py, line 13)