In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import math
from PIL import Image
import numpy.linalg as linalg
from copy import deepcopy


In [2]:
def get_iou(boxA, boxB):
    """
    Get the IoU between two rectangles.
    """
    
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    intersect_Area = abs(xB - xA) * abs(yB - yA)

    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    
    iou = intersect_Area / float(boxAArea + boxBArea - intersect_Area)

    return iou

Method 1

In [4]:

input_video_path = 'KylianMbappe.mp4'
# input_video_path = 'zoom_2.mp4'

cap = cv2.VideoCapture(input_video_path) # read mp4

# capture one frame
ret,frame = cap.read()

# detect a face on the first frame
face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') 
face_boxes = face_detector.detectMultiScale(frame) 
print(face_boxes)

if len(face_boxes)==0:
    print('no face detected')
    assert(False)

# initialize the tracing window around the (first) detected face
(x,y,w,h) = tuple(face_boxes[0]) 
track_window = (x,y,w,h)

#  region of interest for tracking
roi = frame[y:y+h, x:x+w]
print("roi", roi.shape)

# convert the roi to HSV so we can construct a histogram of Hue 
hsv_roi =  cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
print("hsv_roi", hsv_roi.shape)

# why do we need this mask? (remember the cone?)
# read the description for Figure 3 in the original Cam Shift paper: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.14.7673 
mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
print("mask",mask.shape)


# form histogram of hue in the roi
roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
print("roi_hist", roi_hist.shape)

# normalize the histogram array values so they are in the min=0 to max=255 range
cv2.normalize(roi_hist,roi_hist,0,255,cv2.NORM_MINMAX)

# termination criteria for mean shift: 10 iteration or shift less than 1 pixel
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )

IoU_list = []
keep_img = []
count = 0

while True:
    count += 1
    
    # grab a frame
    ret ,frame = cap.read() 
    
    if ret == True: 
  
        # convert to HSV
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        
        # histogram back projection using roi_hist 
        dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180],1)
        
        # use meanshift to shift the tracking window
        ret, track_window = cv2.meanShift(dst, track_window, term_crit)
        
        # display tracked window
        x1,y1,w1,h1 = track_window
        img = cv2.rectangle(frame, (x1,y1), (x1+w1,y1+h1), (0,0,255),5)
        
        # default detector
        temp_face_boxes = face_detector.detectMultiScale(frame)
        if len(temp_face_boxes)> 1:
            print("ite ", count, " len: ", len(temp_face_boxes))
            for temp_face in temp_face_boxes:
                (x2,y2,w2,h2) = tuple(temp_face_boxes[0]) 
                temp_track_window = (x2,y2,w2,h2)
                img = cv2.rectangle(frame, (x2,y2), (x2+w2,y2+h2), (0,255,0),5)
                
                box_tracked = [x1, y1, x1+w1, y1+h1]
                box_bounded = [x2, y2, x2+w2, y2+h2]
                IoU = get_iou(box_tracked, box_bounded)
            IoU_list.append(IoU)
        
        if IoU > 0.73:
            keep_img.append(img)
        
        cv2.imshow('mean shift tracking demo',img)
        
        if cv2.waitKey(33) & 0xFF == 27: # wait a bit and exit is ESC is pressed
            break
        
    else:
        break
        
cv2.waitKey(1)
cv2.destroyAllWindows()
cap.release()

[[329  72 110 110]]
roi (110, 110, 3)
hsv_roi (110, 110, 3)
mask (110, 110)
roi_hist (180, 1)
ite  26  len:  2
ite  30  len:  2
ite  31  len:  2
ite  32  len:  2
ite  33  len:  2
ite  43  len:  2
ite  44  len:  2
ite  46  len:  2
ite  49  len:  2
ite  50  len:  2
ite  51  len:  2
ite  53  len:  2
ite  54  len:  2
ite  64  len:  2
ite  67  len:  2
ite  68  len:  3
ite  70  len:  2
ite  71  len:  2
ite  72  len:  2
ite  73  len:  2
ite  74  len:  2
ite  75  len:  2
ite  80  len:  2
ite  81  len:  2
ite  85  len:  2
ite  86  len:  2
ite  91  len:  2


In [30]:
n = len(IoU_list)
print(max(IoU_list))
print(len(keep_img))

m = 0
for item in IoU_list:
    if item > 0.7:
        m+= 1
print(m/n)

# plot sample image

cv2.imshow("image with low IoU", keep_img[0])
cv2.waitKey(1)
cv2.destroyAllWindows()

# plt.plot(list(range(2,n+2)), IoU_list)
# plt.xlabel("time")
# plt.ylabel("IoU")
# plt.title("IoU curve under method of gradient")

0.7356562745611737
4
0.052083333333333336


Method 2

In [29]:
input_video_path = 'KylianMbappe.mp4'

cap = cv2.VideoCapture(input_video_path) # read mp4

# capture one frame
ret,frame = cap.read()

# detect a face on the first frame
face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml') 
face_boxes = face_detector.detectMultiScale(frame) 

if len(face_boxes)==0:
    print('no face detected')
    assert(False)

# initialize the tracing window around the (first) detected face
(x,y,w,h) = tuple(face_boxes[0]) 
track_window = (x,y,w,h)

#  region of interest for tracking
roi = frame[y:y+h, x:x+w]

# convert the roi to gray so we can construct a histogram of Hue 
hsv_roi =  cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)

Ix = cv2.Sobel(hsv_roi, cv2.CV_64F, 1,0, ksize = 5)
Iy = cv2.Sobel(hsv_roi, cv2.CV_64F, 0,1, ksize = 5)

mag, ang = cv2.cartToPolar(Ix,Iy,angleInDegrees=True)
ang = np.array([ang], dtype=np.uint8)

mask = cv2.inRange(mag, np.array([np.max(mag)/10]), np.array([np.max(mag)]))

# # form histogram of hue in the roi
# roi_hist = cv2.calcHist([hsv_roi],[0],mask,[180],[0,180])
roi_hist = cv2.calcHist(ang,[0],mask,[24],[0,360])

# normalize the histogram array values so they are in the min=0 to max=360 range
cv2.normalize(roi_hist,roi_hist,0,360,cv2.NORM_MINMAX)

# termination criteria for mean shift: 10 iteration or shift less than 1 pixel
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )

IoU_list = []
keep_img = []

box_bounded = [x,y,x+w, y+h]

while True:
    
    # grab a frame
    ret ,frame = cap.read() 
    
    if ret == True: 
  
        # convert to HSV
        gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) 
        
        # histogram back projection using roi_hist 
        dst = cv2.calcBackProject([gray_image],[0],roi_hist,[0,360],1)
        
        # use meanshift to shift the tracking window
        ret, track_window = cv2.meanShift(dst, track_window, term_crit)
        
        frame2 = deepcopy(frame)
        
        # display tracked window
        x1,y1,w1,h1 = track_window
        img = cv2.rectangle(frame, (x1,y1), (x1+w1,y1+h1), (0,0,255),5)

        # default detector
        temp_face_boxes = face_detector.detectMultiScale(frame)
        
        find_max = -1
        box_tracked = [x1, y1, x1+w1, y1+h1]
        box_bounded_final = temp_face_boxes[0]
        for temp_face in temp_face_boxes:
            (x2,y2,w2,h2) = tuple(temp_face) 
            box_bounded_temp = [x2, y2, x2+w2, y2+h2]
            if get_iou(box_bounded_temp, box_bounded)> find_max:
                box_bounded_final = box_bounded_temp
                find_max = get_iou(box_bounded_temp, box_bounded)
        img = cv2.rectangle(frame, (box_bounded_final[0],box_bounded_final[1]), (box_bounded_final[2],box_bounded_final[3]), (0,255,0),5)
        
        box_bounded = box_bounded_final
        IoU = get_iou(box_tracked, box_bounded)
        IoU_list.append(IoU)
        
        if IoU < 0.57:
            keep_img.append(img)
        
        cv2.imshow('mean shift tracking demo',img)
        
        if cv2.waitKey(33) & 0xFF == 27: # wait a bit and exit is ESC is pressed
            break
        
    else:
        break
        
cv2.waitKey(1)
cv2.destroyAllWindows()
cap.release()
