## Computer Vision
### Lab 7
#### Project 2  - Tracking

In [1]:
import numpy as np
import cv2 as cv
import os
import glob
import matplotlib.pyplot as plt
from numpy.random import uniform
import pdb

### Non-maxima suppresion for detecting snooker balls

In [2]:
# Malisiewicz et al.
# non-maxima suppresion: remove all the bounding boxes considered as detections
# if they overlap with a higher scored bounding box
# each box has format [xmin, ymin, xmax, ymax, score]
def non_max_suppression_fast(boxes, overlapThresh):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []

    # if the bounding boxes integers, convert them to floats --
    # this is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    # initialize the list of picked indexes	
    pick = []

    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    scores = boxes[:, 4]
    
    # compute the area of the bounding boxes and sort the bounding
    # boxes by the bottom-right y-coordinate of the bounding box
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(scores)

    # keep looping while some indexes still remain in the indexes
    # list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the
        # index value to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # find the largest (x, y) coordinates for the start of
        # the bounding box and the smallest (x, y) coordinates
        # for the end of the bounding box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])

        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]
        
        # delete all indexes from the index list that have higher overlap
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))

    # return only the bounding boxes that were picked    
    return boxes[pick]

In [3]:
# read the templates balls
templates = []
base_folder_matching = 'template_matching'
images_names = glob.glob(os.path.join(base_folder_matching, "*.jpg")) 
for image_name in images_names:      
    template = cv.imread(image_name) 
    templates.append(template) 
    print(image_name,template.shape)
    cv.imshow("template", template)
    cv.waitKey(2000)
    cv.destroyAllWindows()
    
color_dict = {0: "black",
              1: "blue",
              2: "brown",
              3: "green",
              4: "pink",
              5: "red",
              6: "white",
              7: "yellow"}

template_matching/yellow.jpg (18, 19, 3)
template_matching/brown.jpg (18, 16, 3)
template_matching/blue.jpg (18, 16, 3)
template_matching/black.jpg (23, 23, 3)
template_matching/green.jpg (16, 15, 3)
template_matching/white.jpg (25, 27, 3)
template_matching/pink.jpg (22, 21, 3)
template_matching/red.jpg (19, 16, 3)


In [4]:
# read the first frame from a video
video_path = os.path.join('videos_table', "4_table.mp4")

cap = cv.VideoCapture(video_path)
assert cap.isOpened() is True

ret, first_frame = cap.read()
cap.release()

In [5]:
# run template matching using a threshold
frame = first_frame.copy() 
idx = -1
for template in templates:    
    idx = idx + 1
    template_gray = cv.cvtColor(template, cv.COLOR_BGR2GRAY)    
    w, h = template_gray.shape[::-1]
    frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    
    res = cv.matchTemplate(frame_gray, template_gray, cv.TM_CCOEFF_NORMED)
    
    cv.imshow("Map", res)
    threshold = 0.75
    loc = np.where( res >= threshold)
    frame_draw = first_frame.copy() 
    for pt in zip(*loc[::-1]):
        cv.rectangle(frame_draw, pt, (pt[0] + w, pt[1] + h), (0,0,255), 1)
    
    cv.imshow("Template_matching " + color_dict[idx], frame_draw)
    boxes = np.float32(np.zeros((len(loc[0]),5)))
    for i in range(len(loc[0])):
        boxes[i,0] = loc[1][i]
        boxes[i,1] = loc[0][i]
        boxes[i,2] = loc[1][i] + w - 1
        boxes[i,3] = loc[0][i] + h - 1
        boxes[i,4] = res[loc[0][i],loc[1][i]]
    overlapThresh = 0.4
    bb = non_max_suppression_fast(boxes, overlapThresh)
    print("Number of initial detections", boxes.shape[0])
    print("After NMS detections", bb.shape[0])
    frame_draw_2 = first_frame.copy()
    for i in range(len(bb)):
        cv.rectangle(frame_draw_2, (bb[i,0], bb[i,1]), (bb[i,2], bb[i,3]), (0,0,255), 1)
    
    print(color_dict[idx])
    cv.imshow("Template_matching " + color_dict[idx] + "_nms", frame_draw_2)
    cv.waitKey(0)
    cv.destroyAllWindows()

Number of initial detections 24
After NMS detections 2
black
Number of initial detections 33
After NMS detections 4
blue
Number of initial detections 15
After NMS detections 4
brown
Number of initial detections 2
After NMS detections 1
green
Number of initial detections 19
After NMS detections 3
pink
Number of initial detections 90
After NMS detections 4
red
Number of initial detections 51
After NMS detections 4
white
Number of initial detections 24
After NMS detections 5
yellow


### Background subtraction using frame difference

In [6]:
# do frame difference
# change the path to match on your machine
base_folder = 'videos'
path_video = os.path.join(base_folder, "8.mp4")

cap = cv.VideoCapture(path_video)  
current_frame = 0
max_number_of_frame_to_run = 750

ret, frame = cap.read() # Read the frame
frame_gray = cv.cvtColor(frame,cv.COLOR_BGR2GRAY)
old_frame_gray = frame_gray

while(cap.isOpened()): 
    ret, frame = cap.read() # Read the frame
    if ret is True: 
        current_frame = current_frame + 1 
        frame_gray = cv.cvtColor(frame,cv.COLOR_BGR2GRAY)
                    
        # compute frame diff            
        temp_1 = np.float16(frame_gray) - np.float16(old_frame_gray)           
        temp_2 = np.abs(temp_1)
        diff_frame = np.uint8(temp_2)
        
        cv.imshow("Frame diff",diff_frame)
        
        old_frame_gray = frame_gray
        
        if current_frame > max_number_of_frame_to_run:
            break
            
        if cv.waitKey(25) & 0xFF == ord('q'):
            break
    else:
        break

# after playing the video, release the video capture    
cap.release()
# close all the frames
cv.destroyAllWindows()

### Particle filter

In [7]:
# a particle is a bounding box, represented by the top left corner 
# and fixed width and hieght
def create_uniform_particles(x_range, y_range, N):
    particles = np.empty((N, 2))
    particles[:, 0] = uniform(x_range[0], x_range[1], size=N)
    particles[:, 1] = uniform(y_range[0], y_range[1], size=N) 
    return particles

In [8]:
# predict where the particles will be at the nex frame by applying some dynamics
# take into account velocity and some random noise

def predict(particles, velocity, std, frame, w, h):
    
    N = len(particles)    
    
    noise = np.random.randn(N) * std[0]  
    for i in range(N):
        particles[i, 0] = particles[i, 0] + velocity[0] + noise[i]
        # check that the particle is not outside of the image
        if(particles[i, 0] > frame.shape[1] -w):
            particles[i, 0] = frame.shape[1] - w
        if(particles[i, 0] < 0):
            particles[i, 0] = 0
            
    noise = np.random.randn(N) * std[1]
    for i in range(N):
        particles[i, 1] = particles[i, 1] + velocity[1] + noise[i]
        if(particles[i, 1] > frame.shape[0] - h):
            particles[i, 1] = frame.shape[0] - h
        if(particles[i, 1] < 0):
            particles[i, 1] = 0 
    return particles

In [6]:
# update the weight of each particle based on how similar is to the target window
# use a simple color histogram model
# essential step: how to update the weights
def update(particles, frame, hist_roi_norm, w, h):
    particles = np.int32(particles)   
    weights = np.zeros((particles.shape[0],1))
    for i in range(particles.shape[0]):
        img_particle = frame[particles[i, 1]: particles[i, 1] + h - 1, particles[i, 0]:particles[i, 0] + w - 1].copy()
        particle_hist = cv.calcHist([img_particle], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) 
        particle_hist_norm = particle_hist/ particle_hist.sum()
        distance = cv.compareHist(hist_roi_norm, particle_hist_norm, cv.HISTCMP_CHISQR_ALT)
        # higher chi-square distance is bad, smaller chi-square distance is better
        weights[i] = 1 / (distance)
    weights += 1.e-10 # avoid round-off to zero
    prints(weigths.min())
    prints(weigths.max())
    # normalize the wights such that we have a probability distribution
    weights /= sum(weights)
    return weights

In [7]:
# estimate the center of the cloud of particles
def estimate(particles, weights):    
    mean = np.float64(np.array([0,0]))
    N = particles.shape[0]  
    for i in range(N): 
        mean += weights[i] * particles[i, :]   
    return mean

In [8]:
# resample particles based on their weight
def resample(weights):
    w = weights.flatten()
    N = len(w)    
    tries = np.random.multinomial(N, w) 
    indexes = np.zeros(N, 'i')
    cumsum_vector = np.cumsum(tries)
    pos = -1 
    for i in range(len(tries)):
        for j in range(tries[i]):            
            pos = pos + 1
            indexes[pos] = i
            
    return indexes

def resample_from_index(particles, weights, indexes):
    particles[:] = particles[indexes]
    weights[:] = weights[indexes]
    weights /= np.sum(weights)
    return particles, weights

In [9]:
# change the path to match on your machine
base_folder = 'videos'
path_video = os.path.join(base_folder, "11.mp4")

cap = cv.VideoCapture(path_video)  
current_frame = 0
max_number_of_frame_to_run = 750

ret, frame = cap.read() # Read the frame
frame_gray = cv.cvtColor(frame,cv.COLOR_BGR2GRAY)
old_frame_gray = frame_gray

ret, first_frame = cap.read() # Read the first frame         
x, y, w, h = cv.selectROI(first_frame) 
img_roi = frame[y: y + h, x: x + w].copy()
hist_roi = cv.calcHist([img_roi], [0 ,1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) 
hist_roi_norm = hist_roi/ hist_roi.sum()
bb = np.array([x, y, x + w - 1, y + h - 1])

N = 1000
particles = create_uniform_particles([x, x], [y, y], N)
velocity = [0, 0]
std = [25, 25] 

while(cap.isOpened()): 
    ret, frame = cap.read() # Read the frame   
    if ret is True: 
        current_frame = current_frame + 1 
        print("current_frame", current_frame)
        frame_gray = cv.cvtColor(frame,cv.COLOR_BGR2GRAY)
                    
        
        particles = predict(particles, velocity, std, frame, w, h)        
        
        weights = update(particles, frame, hist_roi_norm, w, h) 
        
        obj = np.int32(estimate(particles, weights))       
       
        velocity[0] = obj[0] - bb[0]
        velocity[1] = obj[1] - bb[1]            
        print('velocity = ', velocity)
        bb = obj.copy()
        
        indexes = resample(weights)
        particles, weights = resample_from_index(particles, weights, indexes)               
        
        for i in range(N):            
            img2 = cv.rectangle(frame, (np.int32(particles[i,0]), np.int32(particles[i,1])), (np.int32(particles[i,0]) + w, np.int32(particles[i,1]) + h), (0,255,0), 1)
        
        img2 = cv.rectangle(frame, (obj[0], obj[1]), (obj[0] + w - 1, obj[1] + h - 1), (0, 255, 255), 4)
        cv.imshow('img2', img2)   
        cv.waitKey(100)
        if current_frame > max_number_of_frame_to_run:
            break
            
        if cv.waitKey(25) & 0xFF == ord('q'):
            break
    else:
        break

# after playing the video, release the video capture    
cap.release()                
# close all the frames
cv.destroyAllWindows()

NameError: name 'create_uniform_particles' is not defined

In [79]:
cv.destroyAllWindows()