# MeanShift Tracking With OpenCV

In [None]:
import numpy as np
import cv2

In [None]:
print(F'cv2 version = {cv2.__version__}')
# ouput: cv2 version = 4.5.0

videoCaptureApi = cv2.CAP_ANY # autodetect default API

# this works if conda-forge::opencv=4.5.0 is installed in the local environment
cap = cv2.VideoCapture("/dev/video0", videoCaptureApi)

if not cap.isOpened():
    raise RuntimeError("ERROR! Unable to open camera")

ret, frame = cap.read()

# setting up the initial tracking window
# Previously we were using a corner detection to track 10 best corners detected in the 1st frame.
# We now want to perform Face Tracking. We're gonna first do object detection in the first frame to grab a face location.
# Then we're gonna detect a face as a bunch of pixels that we're gonna track. We'll then apply MeanShift tracking on that face.
# We detect face one time at the beginning and then tell MeanShift algorithm to track that set of pixels. 
face_cascade = cv2.CascadeClassifier('../data/haarcascades/haarcascade_frontalface_default.xml')

# returns a list of all NumPy arrays where it's detecting a face
face_rects = face_cascade.detectMultiScale(frame)

# We only want to track a single face so we'll grab the first face.

# tuple of points is required form
(face_x, face_y, w, h) = tuple(face_rects[0])

# tracking window (a rectangle where face was detected in the first frame that we want to track)
track_window = (face_x, face_y, w, h)

# setup a ROI for tracking
roi = frame[face_y:face_y+h, face_x:face_x+w]

# using HSV color mapping
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

roi_hist = cv2.calcHist([hsv_roi], [0], None, [180], [0, 180])

cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)

# termination criteria (go for 10 iterations or at least one epsilon)
term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)

while True:
    ret, frame = cap.read()
    
    if ret == True:
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        dst = cv2.calcBackProject([hsv], [0], roi_hist, [0, 180], 1)
        ret, track_window = cv2.meanShift(dst, track_window, term_crit)
        x, y, w, h = track_window
        img2 = cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 5)
        cv2.imshow('img', img2)
        
        k = cv2.waitKey(1) &  0xFF
        if k == 27:
            break
    else:
        break
    
cv2.destroyAllWindows()
cap.release()

Here are three screenshots of the video output for three different positions of my face.

![](../img/MeanShift-1.png)
![](../img/MeanShift-2.png)
![](../img/MeanShift-3.png)

Note that the red square keeps the same size even if I put my face closer to the camera. Its size is frozen to the size which matches the face detected in the first frame. It does not change later to match the different size of the face.

# CamShift Tracking with OpenCV

In [None]:
print(F'cv2 version = {cv2.__version__}')
# ouput: cv2 version = 4.5.0

videoCaptureApi = cv2.CAP_ANY # autodetect default API

# this works if conda-forge::opencv=4.5.0 is installed in the local environment
cap = cv2.VideoCapture("/dev/video0", videoCaptureApi)

if not cap.isOpened():
    raise RuntimeError("ERROR! Unable to open camera")

ret, frame = cap.read()

# setting up the initial tracking window
# Previously we were using a corner detection to track 10 best corners detected in the 1st frame.
# We now want to perform Face Tracking. We're gonna first do object detection in the first frame to grab a face location.
# Then we're gonna detect a face as a bunch of pixels that we're gonna track. We'll then apply MeanShift tracking on that face.
# We detect face one time at the beginning and then tell MeanShift algorithm to track that set of pixels. 
face_cascade = cv2.CascadeClassifier('../data/haarcascades/haarcascade_frontalface_default.xml')

# returns a list of all NumPy arrays where it's detecting a face
face_rects = face_cascade.detectMultiScale(frame)

# We only want to track a single face so we'll grab the first face.

# tuple of points is required form
(face_x, face_y, w, h) = tuple(face_rects[0])

# tracking window (a rectangle where face was detected in the first frame that we want to track)
track_window = (face_x, face_y, w, h)

# setup a ROI for tracking
roi = frame[face_y:face_y+h, face_x:face_x+w]

# using HSV color mapping
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

roi_hist = cv2.calcHist([hsv_roi], [0], None, [180], [0, 180])

cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)

# termination criteria (go for 10 iterations or at least one epsilon)
term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1)

while True:
    ret, frame = cap.read()
    
    if ret == True:
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        dst = cv2.calcBackProject([hsv], [0], roi_hist, [0, 180], 1)
        
        ret, track_window = cv2.CamShift(dst, track_window, term_crit)
        pts = cv2.boxPoints(ret)
        
        # turn floating points to integers
        pts = np.int0(pts)
        img2 = cv2.polylines(frame, [pts], True, (0, 0, 255), 5)
        
        cv2.imshow('img', img2)
        
        k = cv2.waitKey(1) &  0xFF
        if k == 27:
            break
    else:
        break
    
cv2.destroyAllWindows()
cap.release()

Red square now gets resized to follow the size of the face.
It's important to make sure that face is facing straight to camera when the first video frame is captured.