## Computer Vision
### Lab 6
#### Project 2  - Tracking

In [1]:
import numpy as np
import cv2 as cv
import os
import glob
import matplotlib.pyplot as plt

### Template matching for detecting the snooker balls

In [2]:
# read the templates balls
templates = []
base_folder_matching = 'template_matching'
images_names = glob.glob(os.path.join(base_folder_matching, "*.jpg")) 
for image_name in images_names:      
    template = cv.imread(image_name) 
    templates.append(template) 
    cv.imshow("template", template)
    cv.waitKey(2000)
    cv.destroyAllWindows()
    
color_dict = {0: "black",
              1: "blue",
              2: "brown",
              3: "green",
              4: "pink",
              5: "red",
              6: "white",
              7: "yellow"}

In [3]:
# read the first frame from a video
video_path = os.path.join('videos_table', "4_table.mp4")

cap = cv.VideoCapture(video_path)
assert cap.isOpened() is True

ret, first_frame = cap.read()
cap.release()

In [4]:
# run template matching using a threshold
frame = first_frame.copy() 
idx = -1
for template in templates:    
    idx = idx + 1
    template_gray = cv.cvtColor(template, cv.COLOR_BGR2GRAY)    
    w, h = template_gray.shape[::-1]
    frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    
    res = cv.matchTemplate(frame_gray, template_gray, cv.TM_CCOEFF_NORMED)
    cv.imshow("Map", res)
    threshold = 0.75
    loc = np.where( res >= threshold)
    frame_draw = first_frame.copy() 
    for pt in zip(*loc[::-1]):
        cv.rectangle(frame_draw, pt, (pt[0] + w, pt[1] + h), (0,0,255), 1)
        
    print(color_dict[idx])
    cv.imshow("Template_matching " + color_dict[idx], frame_draw)
    cv.waitKey(0)
    cv.destroyAllWindows()

(array([130, 131, 131, 131, 131, 132, 132, 132, 132, 132, 458, 458, 458,
       458, 458, 459, 459, 459, 459, 459, 459, 460, 460, 460]), array([516, 515, 516, 517, 518, 515, 516, 517, 518, 519, 345, 346, 347,
       348, 349, 345, 346, 347, 348, 349, 350, 348, 349, 350]))
black
(array([132, 132, 132, 132, 132, 132, 133, 133, 133, 133, 133, 133, 133,
       134, 134, 134, 134, 134, 283, 283, 283, 283, 284, 284, 284, 284,
       460, 460, 460, 460, 461, 461, 461]), array([628, 629, 630, 631, 741, 742, 628, 629, 630, 631, 740, 741, 742,
       629, 630, 740, 741, 742, 628, 629, 630, 631, 628, 629, 630, 631,
       349, 350, 351, 352, 349, 350, 351]))
blue
(array([133, 133, 133, 134, 134, 134, 134, 134, 135, 284, 284, 284, 285,
       285, 536]), array([629, 630, 741, 629, 630, 740, 741, 742, 741, 628, 629, 630, 629,
       630, 630]))
brown
(array([535, 536]), array([627, 627]))
green
(array([132, 133, 133, 133, 133, 133, 134, 134, 134, 134, 134, 134, 135,
       135, 284, 284, 284, 285, 

In [12]:
# run template matching using different methods and min/max value
frame = first_frame.copy()

# All the 6 methods for comparison in a list
methods = ['cv.TM_SQDIFF', 'cv.TM_SQDIFF_NORMED','cv.TM_CCORR',
            'cv.TM_CCORR_NORMED', 'cv.TM_CCOEFF', 'cv.TM_CCOEFF_NORMED' ]

# methods = ['cv.TM_CCOEFF_NORMED']

idx = -1
for template in templates:                
    idx = idx + 1
    template_gray = cv.cvtColor(template, cv.COLOR_BGR2GRAY)    
    w, h = template_gray.shape[::-1]
    frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
       
    for meth in methods:        
        method = eval(meth)
        frame = first_frame.copy()
        
        # apply template Matching
        res = cv.matchTemplate(frame_gray, template_gray,method)
         
        min_val, max_val, min_loc, max_loc = cv.minMaxLoc(res)

        # If the method is TM_SQDIFF or TM_SQDIFF_NORMED, take minimum
        if method in [cv.TM_SQDIFF, cv.TM_SQDIFF_NORMED]:
            top_left = min_loc                      
        else:
            top_left = max_loc
            
        bottom_right = (top_left[0] + w, top_left[1] + h)

        cv.rectangle(frame,top_left, bottom_right, 255, 2)
        
        cv.imshow("Template_matching " + color_dict[idx] +' ' + meth, frame)
        cv.waitKey(0)  
        cv.destroyAllWindows()

### Histrogram of colors for detecting the snooker balls

In [6]:
# define a method to compute the histogram of a window in the quantized BGR color space
def compute_hist(img, bins_0, bins_1, bins_2):
    histogram = np.zeros((bins_0, bins_1, bins_2))
    length_0 = 256 // bins_0
    length_1 = 256 // bins_1
    length_2 = 256 // bins_2
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            pixel = img[i][j]
            interval_0 = pixel[0] // length_0
            interval_1 = pixel[1] // length_1
            interval_2 = pixel[2] // length_2
            histogram[interval_0, interval_1, interval_2] += 1            
    return histogram

In [7]:
# Select ROI
frame = first_frame.copy()
x_min, y_min, w, h = cv.selectROI(frame)
x_max = x_min + w
y_max = y_min + h

# Crop image
img_crop = frame[y_min:y_max, x_min:x_max]

# Display cropped image 
cv.imshow("Image", img_crop)
cv.waitKey(0)
cv.destroyAllWindows()

In [8]:
# compute the color histogram of the window in the quantized BGR color space
# use our function
histogram_img = compute_hist(img_crop, 4, 4, 4)
print(histogram_img)
print(histogram_img.sum())
print(img_crop.shape[0] * img_crop.shape[1])

[[[318.   0.   0.   0.]
  [127.   1.   0.   0.]
  [ 55.   0.   0.   0.]
  [  0.   0.   0.   0.]]

 [[  0.   0.   0.   0.]
  [  0.   5.   0.   0.]
  [  0.   7.   0.   0.]
  [  0.   0.   0.   0.]]

 [[  0.   0.   0.   0.]
  [  0.   0.   0.   0.]
  [  0.   0.   2.   0.]
  [  0.   0.   5.   5.]]

 [[  0.   0.   0.   0.]
  [  0.   0.   0.   0.]
  [  0.   0.   0.   0.]
  [  0.   0.   0.   4.]]]
529.0
529


In [9]:
# compute the color histogram of the window in the quantized BGR color space
# use the function provided by OpenCV
hist_img = cv.calcHist([img_crop], [0, 1, 2], None, [4, 4, 4], [0, 256, 0, 256, 0, 256]) 
print(hist_img)

[[[318.   0.   0.   0.]
  [127.   1.   0.   0.]
  [ 55.   0.   0.   0.]
  [  0.   0.   0.   0.]]

 [[  0.   0.   0.   0.]
  [  0.   5.   0.   0.]
  [  0.   7.   0.   0.]
  [  0.   0.   0.   0.]]

 [[  0.   0.   0.   0.]
  [  0.   0.   0.   0.]
  [  0.   0.   2.   0.]
  [  0.   0.   5.   5.]]

 [[  0.   0.   0.   0.]
  [  0.   0.   0.   0.]
  [  0.   0.   0.   0.]
  [  0.   0.   0.   4.]]]


In [10]:
# now, we are going to compute the histograms for our templates
hist_templates = []
for template in templates:
    template_hist = cv.calcHist([template], [0, 1, 2], None, [4, 4, 4], [0, 256, 0, 256, 0, 256])
    hist_templates.append(template_hist)

In [6]:
# Select ROI
frame = first_frame.copy()
x_min, y_min, w, h = cv.selectROI(frame)
x_max = x_min + w
y_max = y_min + h

# Crop image
img_crop = frame[y_min:y_max, x_min:x_max]

# Display cropped image 
cv.imshow("Image", img_crop)
cv.waitKey(0)
cv.destroyAllWindows()

hist_img = cv.calcHist([img_crop], [0, 1, 2], None, [4, 4, 4], [0, 256, 0, 256, 0, 256]) 

# do normalization for each histogram
distances = []
for i in range(len(templates)):
    hist_img_norm = hist_img / (hist_img.sum())
    hist_template_norm = hist_templates[i] / (hist_templates[i].sum())    
    # use one of the possible distances between histograms - see function cv.compareHist
    dist = cv.compareHist(hist_img_norm, hist_template_norm, cv.HISTCMP_CHISQR_ALT)
    distances.append(dist)
print(distances)
print(color_dict)
print(color_dict[np.argmin(distances)])

NameError: name 'hist_templates' is not defined

### Tracking

In [5]:
# tracking
# change the path to match on your machine
base_folder = 'videos'
path_video1 = os.path.join(base_folder, "3.mp4")
path_video2 = os.path.join(base_folder, "3_annotated.mp4")

#sanity check - run the video
cap = cv.VideoCapture(path_video2)  
current_frame = 0
max_number_of_frame_to_run = 750

while(cap.isOpened()): 
    ret, frame = cap.read() # Read the frame
    if ret is True: 
        current_frame = current_frame + 1 
        
        cv.imshow("Frame",frame)
        
        if current_frame > max_number_of_frame_to_run:
            break
            
        if cv.waitKey(25) & 0xFF == ord('q'):
            break
    else:
        break

# after playing the video, release the video capture    
cap.release()
# close all the frames
cv.destroyAllWindows()

In [6]:
# load the ground-truth file
red_ball_gt = np.loadtxt(os.path.join(base_folder, 'video_8_red_1.txt'))
white_ball_gt = np.loadtxt(os.path.join(base_folder, 'video_8_white.txt'))

In [7]:
# the first line contains the lenght (number of frames) of the video (followed by -1 in order to keep the dimension of the array)
red_ball_gt[0]

array([314.,  -1.,  -1.,  -1.,  -1.])

In [8]:
cv.imshow("Frame", first_frame)
cv.waitKey(0)
cv.destroyAllWindows()

In [9]:
# the other lines contains the frame index and the coordinates of the bounding box
black_ball_gt[1]
# frame_idx, x_min, y_min, x_max, y_max

NameError: name 'black_ball_gt' is not defined

In [10]:
def track_ball_using_hist_of_colors(video_path):
    
    bboxes = []
    
    cap = cv.VideoCapture(video_path)
    ret, first_frame = cap.read() # Read the first frame 
    
    x, y, w, h = cv.selectROI(first_frame) 
    track_window = (x, y, w, h)
    
    roi = first_frame[y: y + h, x: x + w]
    annotated_frame = cv.rectangle(first_frame, (x, y), (x+w,y+h), 255, 2)
 
    cv.imshow('First frame initialization', annotated_frame)
    cv.waitKey(10000)
    
    
    roi_hist = cv.calcHist([roi], [0 ,1, 2], None, [4, 4, 4], [0, 256, 0, 256, 0, 256]) 
    roi_hist_norm = roi_hist / roi_hist.sum()

    roi_gray = cv.cvtColor(roi, cv.COLOR_BGR2GRAY)
    
    frame_idx = 0
    while cap.isOpened():
        frame_idx += 1
        ret, frame = cap.read()

        if ret is True: 
            frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
            mask1 = np.int8(np.zeros(frame_gray.shape))
            center = (y + h//2, x + h//2)

            y_min = np.max((0, center[0] - (2*h)))
            y_max = np.min((frame.shape[0], center[0] + (2*h)))
            x_min = np.max((0, center[1] - (2*w)))
            x_max = np.min((frame.shape[1], center[1] + (2*w)))
            
            mask1[y_min: y_max, x_min: x_max] = 255

            frame_gray_mask = cv.bitwise_and(frame_gray,frame_gray,mask=mask1)
            cv.imshow('frame gray mask', frame_gray_mask)
            cv.waitKey(500)

            res = cv.matchTemplate(frame_gray_mask, roi_gray, cv.TM_CCOEFF_NORMED)        
            min_val, max_val, min_loc, max_loc = cv.minMaxLoc(res)

            y = max_loc[1]
            x = max_loc[0]
            bboxes.append([frame_idx, x, y, x + w, y + h])
            img2 = cv.rectangle(frame, (x, y), (x + w, y + h), 255, 2)
            cv.imshow('img2', img2)

            if cv.waitKey(1) & 0xFF == ord('q'):
                break
                
        else:
            break
    # after playing the video, release the video capture    
    cap.release()
    # close all the frames
    cv.destroyAllWindows()
    return bboxes

In [11]:
video_name = "8_table.mp4"
bboxes = track_ball_using_hist_of_colors(os.path.join("videos_table", video_name))

In [25]:
def bb_intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the intersection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    # return the intersection over union value
    return iou

In [26]:
def compute_percentage_tracking(gt_bboxes, predicted_bboxes, num_frames):
    num_frames = int(num_frames)
    
    tp = 0
    fp = 0
    
    gt_dict = {}
    for gt_box in gt_bboxes:
        gt_dict[gt_box[0]] = gt_box[1:]
    
    pred_dict = {}
    for pred_bbox in predicted_bboxes:
        pred_dict[pred_bbox[0]] = pred_bbox[1:]
        
    for i in range(num_frames):
        if gt_dict.get(i, None) is None and pred_dict.get(i, None) is None: # the ball is not on the table
            tp += 1 
        
        elif gt_dict.get(i, None) is not None and pred_dict.get(i, None) is None: # the ball is not detected
            fp += 1
            
        elif gt_dict.get(i, None) is None and pred_dict.get(i, None) is not None: # the ball is not on the table, but it is 'detected'
            fp += 1
            
        elif gt_dict.get(i, None) is not None and pred_dict.get(i, None) is not None: # the ball is on the table and it is detected
            
            iou = bb_intersection_over_union(gt_dict[i], pred_dict[i])
            if iou >= 0.2:
                tp += 1
            else:
                fp += 1 
             
            
    print(tp, fp)
    assert tp + fp == num_frames
    perc = tp / (tp + fp)
    
    return perc

In [28]:
compute_percentage_tracking(black_ball_gt[1:], bboxes, black_ball_gt[0][0])

76 120


0.3877551020408163