<a href="https://colab.research.google.com/github/HollowMike8/object-tracking-dlib/blob/main/single_object_tracking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git
!git init

usage: git [--version] [--help] [-C <path>] [-c <name>=<value>]
           [--exec-path[=<path>]] [--html-path] [--man-path] [--info-path]
           [-p | --paginate | --no-pager] [--no-replace-objects] [--bare]
           [--git-dir=<path>] [--work-tree=<path>] [--namespace=<name>]
           <command> [<args>]

These are common Git commands used in various situations:

start a working area (see also: git help tutorial)
   clone      Clone a repository into a new directory
   init       Create an empty Git repository or reinitialize an existing one

work on the current change (see also: git help everyday)
   add        Add file contents to the index
   mv         Move or rename a file, a directory, or a symlink
   reset      Reset current HEAD to the specified state
   rm         Remove files from the working tree and from the index

examine the history and state (see also: git help revisions)
   bisect     Use binary search to find the commit that introduced a bug
   grep       Prin

In [2]:
!git clone https://github.com/HollowMike8/object-tracking-dlib.git

Cloning into 'object-tracking-dlib'...
remote: Enumerating objects: 68, done.[K
remote: Counting objects: 100% (68/68), done.[K
remote: Compressing objects: 100% (61/61), done.[K
remote: Total 68 (delta 36), reused 23 (delta 6), pack-reused 0[K
Unpacking objects: 100% (68/68), done.


In [3]:
!pip install --upgrade imutils



In [4]:
%cd object-tracking-dlib/

/content/object-tracking-dlib


In [5]:
import os
import cv2
import dlib
import imutils
import datetime
import numpy as np
from google.colab.patches import cv2_imshow

In [6]:
path_dir: str = r"/content/object-tracking-dlib"

import sys
sys.path.insert(0,path_dir)
import single_object_config as soc
from centroidtracker import CentroidTracker

In [7]:
# list of all the classes mobilenet_ssd was trained on
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", 
           "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", 
           "motorbike", "person", "pottedplant", "sheep", "sofa", "train", 
           "tvmonitor"]

In [8]:
# load the mobilenet_ssd caffe model
prototxt_path = os.path.join(soc.cnn_caffe_dir , "MobileNetSSD_deploy.prototxt")
model_path = os.path.join(soc.cnn_caffe_dir , "MobileNetSSD_deploy.caffemodel")

net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)

In [27]:
# load input video (race.mp4), intitialize the writer, tracker 
vs = cv2.VideoCapture(os.path.join(soc.input_dir, "race.mp4"))

tracker = None
writer = None

# intitialize the CentroidTracker, objects
ct = CentroidTracker(maxDisappeared=40, maxDistance=40)
objects = None

# refresh rate for object detection (object detection every N frames)
refresh_rate = 60

# initiate totalFrames processed
totalFrames = 0

In [None]:
start_time = datetime.datetime.now()

# initialize list to capture the bounding box coordinates
rects = []

# loop over thr frames in the input video
while True:
  (grab, frame) = vs.read()

  # to break out of loop after the end of video
  if grab == False:
    break

  # convert from BGR to RGB for dlib tracker
  frame = imutils.resize(frame, width=600)
  img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

  # writing the video
  if writer is None:
    fourcc = cv2.VideoWriter_fourcc(*"MJPG")
    writer = cv2.VideoWriter(os.path.join(soc.output_dir, "race_dlib.avi"), 
                             fourcc, 30, (frame.shape[1], frame.shape[0]), True)

  # object detection (for every N frames)
  if totalFrames % refresh_rate == 0:
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 0.007843, (w, h), 127.5)
    net.setInput(blob)
    detections = net.forward()

    # find the index of the detection with the largest confidence (single obj)
    if objects == None and len(detections) > 0:
      i = np.argmax(detections[0, 0, :, 2])
      conf = detections[0, 0, i, 2]
      label = CLASSES[int(detections[0, 0, i, 1])]

    # find the index of the previouly exiting single obj detection
    else:
      for i in range(0, detections.shape[2]):
        conf = detections[0, 0, i, 2]
        label = CLASSES[int(detections[0, 0, i, 1])]
        # print("Label of progressive detection:%s"% label)

        temp = detections[0, 0, i, 3:7]*np.array([w, h, w, h])
        (startX, startY, endX, endY) = temp.astype("int")
        # print("Rect of progressive detection:%s"% temp)

        if label == soc.label:
          rects = []
          objects_old = objects.copy()
          rects.append((startX, startY, endX, endY))
          objects = ct.update(rects)

          # check the new rect is already detected/tracked single obj
          if (objects_old[0] == objects[0]).all():
            continue
          else:
            break

        elif label == 'background':
          label = soc.label

    if conf > soc.thres_confidence and label == soc.label:
      # compute the bounding box coordinates
      box = detections[0, 0, i, 3:7]*np.array([w, h, w, h])
      (startX, startY, endX, endY) = box.astype("int")

      # construct the dlib correlation tracker using bouding box coordinates
      tracker = dlib.correlation_tracker()
      rect = dlib.rectangle(startX, startY, endX, endY)
      tracker.start_track(img, rect)

      # draw the bouding box rectangle and label in the frame
      cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)
      cv2.putText(frame, label, (startX, startY-15), cv2.FONT_HERSHEY_SIMPLEX, 
                  0.45, (0, 255, 0), 2)
      
      # empty the rect list and update the centroid/centroids
      rects = []
      rects.append((startX, startY, endX, endY))
      objects = ct.update(rects)
  
  # object tracking     
  else:
    tracker.update(img)
    pos = tracker.get_position()

    # unpack the position object
    startX = int(pos.left())
    startY = int(pos.top())
    endX = int(pos.right())
    endY = int(pos.bottom())

    # draw the bouding box rectangle and label in the frame
    cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)
    cv2.putText(frame, label, (startX, startY-15), cv2.FONT_HERSHEY_SIMPLEX, 
                0.45, (0, 255, 0), 2)
    
    # empty the rect list and update the centroid/centroids
    rects = []
    rects.append((startX, startY, endX, endY))
    objects = ct.update(rects)

  # write the sketched frame     
  if writer is not None:
    writer.write(frame)

  # show the output frame
  cv2_imshow(frame)
  key = cv2.waitKey(1) & 0xFF

  # if the `q` key was pressed, break from the loop
  if key == ord("q"):
    break

  # update the totalFrames processed
  totalFrames += 1

end_time = datetime.datetime.now()
elapsed_time = (end_time-start_time).total_seconds()
print("Elapsed time: {:.2f}".format(elapsed_time))
print("Approx. FPS: {:.2f}".format(totalFrames/elapsed_time))

# check to see if we need to release the video writer pointer
if writer is not None:
  writer.release()

# do a bit of cleanup
cv2.destroyAllWindows()
vs.release()

# **Notes**
1. Object detection is performed once in every 60 frames
2. mobilenet_ssd fails in some detection steps due to occlusion
3. Unsuccessful detection steps are skipped and tracking is continued as before
4. Next steps: Try better detection algorithm that handles occlusions 