<a href="https://colab.research.google.com/github/HollowMike8/object-tracking/blob/main/single_object_tracking_yolo_dlib_kalman.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git
!git init

In [2]:
!git clone https://github.com/HollowMike8/object-tracking.git

Cloning into 'object-tracking'...
remote: Enumerating objects: 129, done.[K
remote: Counting objects: 100% (129/129), done.[K
remote: Compressing objects: 100% (109/109), done.[K
remote: Total 129 (delta 66), reused 54 (delta 16), pack-reused 0[K
Receiving objects: 100% (129/129), 61.53 MiB | 36.36 MiB/s, done.
Resolving deltas: 100% (66/66), done.


In [None]:
!pip install --upgrade imutils
!pip install filterpy

In [4]:
%cd object-tracking/

/content/object-tracking


In [5]:
import os
import cv2
import imutils
import datetime
import numpy as np
from google.colab.patches import cv2_imshow

In [6]:
path_dir: str = r"/content/object-tracking"

import sys
sys.path.insert(0,path_dir)
import single_object_config as soc
from centroidtracker import CentroidTracker
from correlation_tracker import CorrelationTracker
from first_order_kalman_tracker import KalmanBoxTracker

In [46]:
# load input video (race.mp4), intitialize the writer, tracker 
vs = cv2.VideoCapture(os.path.join(soc.input_dir, "race.mp4"))

tracker = None
writer = None

# intitialize the CentroidTracker, objects
ct = CentroidTracker(maxDisappeared=40, maxDistance=30)
objects = None

# refresh rate for object detection (object detection every N frames)
refresh_rate = 60

# initiate totalFrames processed
totalFrames = 0

# initialize the dictionaries to capture detections and trackings
dets_dict = {}
trks_dict = {}

#### **Import yolo model and class labels**

In [None]:
# download the files if required

# change the dir to location where downloads save
%cd /content/object-tracking/yolo-coco/

!wget "https://pjreddie.com/media/files/yolov3.weights"

!wget "https://raw.githubusercontent.com/pjreddie/darknet/master/cfg/yolov3.cfg"

# !wget "https://raw.githubusercontent.com/pjreddie/darknet/master/data/coco.names"

#change the dir back to root dir
%cd /content/object-tracking/

In [9]:
# load the COCO class labels for YOLO model
labels_path = os.path.join(path_dir, "yolo-coco", "coco.names")
labels = open(labels_path, "r").read().strip().split("\n")

# assign random colours to all COCO class labels
np.random.seed(42)
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype = "uint8")

In [10]:
# load the yolov3 model
weightsPath = os.path.join(path_dir, "yolo-coco", "yolov3.weights")
configPath = os.path.join(path_dir, "yolo-coco", "yolov3.cfg")

net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

#### **main**

In [None]:
# initialize list to capture the bounding box coordinates
rects = []

# loop over thr frames in the input video
while True:
  (grab, frame) = vs.read()

  # to break out of loop at the end of video
  if grab == False:
    break
  
  # convert from BGR to RGB and resize
  frame = imutils.resize(frame, width=600)
  img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

  # writing the video
  if writer is None:
    fourcc = cv2.VideoWriter_fourcc(*"MJPG")
    writer = cv2.VideoWriter(os.path.join(soc.output_dir, "race_yolo_dlib_kalman.avi"), 
                             fourcc, 30, (frame.shape[1], frame.shape[0]), True)
  
  # object detection (for every N frames)
  # initialize lists to append the bounding boxes, confidences and class IDs
  boxes = []
  confidences = []
  classIDs = []

  if totalFrames % refresh_rate == 0:
    (h,w) = frame.shape[:2]

    # determine only *output* layer names we need from yolo (3 output layers)
    layer_names = net.getLayerNames()
    layer_names = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

    # construct a blob from the input image and then perform a forward pass
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, 
                                 crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(layer_names)

    # loop over each layer of the outputs (3)
    for output in layerOutputs:
      # loop over the detections in each output
      for detection in output:
        scores = detection[5:]
        classID = np.argmax(scores)
        confidence = scores[classID]

        # consider only predictions with confidence > threshold
        if confidence > soc.yolo_thres_confidence:
          # scale the bounding box parameters
          box = detection[0:4] * np.array([w, h, w, h])
          (centerX, centerY, width, height) = box.astype("int")

          # find the corner points for cv2.rectangle
          startX = int(centerX - (width/2))
          startY = int(centerY - (height/2))
          endX = int(centerX + (width/2))
          endY = int(centerY + (height/2))

          boxes.append([startX, startY, endX, endY])
          confidences.append(float(confidence))
          classIDs.append(classID)
    
    # apply non-max supression with threshold IoU= 0.3 and 
    # threshold confidence=soc.yolo_thres_confidence
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, soc.yolo_thres_confidence, 0.3)

    # capture the all detections with class labels "person"
    if len(idxs) > 0:
      person_list = {confidences[i]:boxes[i] for i in idxs.flatten() 
                    if labels[classIDs[i]]==soc.label}
    
    # find the fisrt time detection with the largest confidence (single obj)
    if objects == None and bool(person_list):
      label = soc.label
      max_conf = max(person_list.keys())
      (startX, startY, endX, endY) = person_list[max_conf]
    
    # find the index of the previouly existing single obj detection
    elif objects != None and bool(person_list):
      label = soc.label
      rects_old = rects

      for key in person_list.keys():
        conf = key
        (startX, startY, endX, endY) = person_list[conf]

        rects = []
        objects_old = objects.copy()
        rects.append((startX, startY, endX, endY))
        objects = ct.update(rects)

        # check the new rect is already detected/tracked single obj
        if (objects_old[0] == objects[0]).all():
          (startX, startY, endX, endY) = rects_old[0]
          continue
        else:
          break
    
    # capture the detections in the dictionary
    bbox = [startX, startY, endX, endY]
    dets_dict[totalFrames] = bbox

    # construct the dlib correlation tracker using bounding box coordinates
    if totalFrames==0:
      tracker_1 = CorrelationTracker(bbox, img)
    else:
      tracker_1.update(bbox, img)

    # construct the kalman tracker using bounding box coordinates
    if totalFrames==0:
      tracker_2 = KalmanBoxTracker(bbox=bbox)

    # draw the bounding box rectangle and label in the frame
    cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)
    cv2.putText(frame, label, (int((startX + endX)/2), int((startY + endY)/2)), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)

    # write the detection step in the frame
    cv2.putText(frame, "DETECTION", (420, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                (0, 0, 255), 2)

    # empty the rect list and update the centroid/centroids
    rects = []
    rects.append((startX, startY, endX, endY))
    objects = ct.update(rects)

  # object tracking
  else:
    # predict using dlib correlation tracker
    (startX, startY, endX, endY) = tracker_1.predict(img)

    bbox_dlib = [startX, startY, endX, endY]

    # predict and update using kalman tracker
    trks = tracker_2.predict(img)
    pos_2 = [int(coor) for coor in trks]

    # capture the kalman tracking in the dictionary
    trks_dict[totalFrames] = pos_2

    if totalFrames % 5 == 0:
      # update/correct kalman prediction using bounding box from dlib tracker
      tracker_2.update(bbox_dlib, img)

    # find the distance between bounding box centers from dlib and kalman
    center_d = np.array([(startX + endX)/2, (startY + endY)/2])
    center_k = np.array([(pos_2[0] + pos_2[2])/2, (pos_2[1] + pos_2[3])/2])
    dist = np.linalg.norm(center_d-center_k)
    # print("dist: %s"% dist)

    # draw bounding box rectangle (for correlation tracker)
    cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 255), 2)
    cv2.putText(frame, "dlib", (startX, startY-15), cv2.FONT_HERSHEY_SIMPLEX, 
                0.45, (0, 255, 255), 2)
       
    # draw bounding box rectangle (for kalman tracker)
    cv2.rectangle(frame,(pos_2[0],pos_2[1]),(pos_2[2],pos_2[3]), (0, 0, 255), 2)
    cv2.putText(frame,"kalman",(pos_2[0], pos_2[3]+15),cv2.FONT_HERSHEY_SIMPLEX, 
                0.45, (0, 0, 255), 2)  
     
    # write the tracking step and label in the frame
    cv2.putText(frame, label, (int((startX + endX)/2), int((startY + endY)/2)), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
    cv2.putText(frame, "TRACKING", (420, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, 
                (0, 0, 255), 2)

    # empty the rect list and update the centroid/centroids
    rects = []
    if dist > 40 and 0.8 < (np.divide(center_k, center_d)).all() < 1.2:
      (startX, startY, endX, endY) = pos_2
    rects.append((startX, startY, endX, endY))
    objects = ct.update(rects)

  # write the sketched frame     
  if writer is not None:
    writer.write(frame)

  # show the output frame
  cv2_imshow(frame)
  key = cv2.waitKey(1) & 0xFF

  # if the `q` key was pressed, break from the loop
  if key == ord("q"):
    break

  # update the totalFrames processed
  totalFrames += 1

# check to see if we need to release the video writer pointer
if writer is not None:
  writer.release()

# do a bit of cleanup
cv2.destroyAllWindows()
vs.release()