### Single-Object Tracking 


In [1]:
## Initialization function to load data and create slider to update the path of sequence of images

import numpy as np
import cv2
import os
import glob
import ipywidgets as widgets
from IPython.display import display

# Creating a slider to choose which sequence images to test (ranging from 1 to 5 with default value is 1)
seq_slider = widgets.IntSlider(value=1, min=1, max=5, description='Which Sequence of Images:')
display(seq_slider) # to display the slider in the terminal output

# implement function to update the path based on selected sequence of images
def update_paths(seq_number):
    # directory and file for sequence of images, firsttrack and groundtruth data
    image_seq = f"data/Task 1/seq_{seq_number}/img"
    firsttrack_seq = f"data/Task 1/seq_{seq_number}/firsttrack.txt"
    groundtruth_seq = f"data/Task 1/seq_{seq_number}/groundtruth.txt"
    return image_seq, firsttrack_seq, groundtruth_seq

# implement function to update the path when the slider value changes
def update_slider(change):
    global image_seq, firsttrack_seq, groundtruth_seq
    global image_sequences, template_coords, ground_truth_coords
    
    image_seq, firsttrack_seq, groundtruth_seq = update_paths(change.new) # this will call update_paths function with a new slider value
    image_sequences = read_sequenceimage(image_seq) # used to load and read sequence of images by calling function read_sequenceimage
    template_coords = read_firsttrack(firsttrack_seq) # used to read firsttrack data by calling function read_firsttrack
    ground_truth_coords = read_groundtruth(groundtruth_seq) # used to read ground truth data by calling function read_groundtruth

# implement function to load and read sequence images by using "cv2.imread"
def read_sequenceimage(image_path):
    image_seq = []
    image_files = sorted(glob.glob(os.path.join(os.getcwd(), image_path, '*.jpg' ))) # sorted all images path into image_files list

    # loop into each path of image and read by "cv2.imread" and store it into image_seq list
    for image_file in image_files:
        image = cv2.imread(image_file)
        image_seq.append(image)
    return image_seq

# implement function to read groundtruth data
def read_groundtruth(ground_path):
    with open(ground_path, 'r') as file:
        lines = file.readlines() # read line by line and store it into lines variable
        ground_coords = [list(map(int, line.strip().split(','))) for line in lines] # write ground truth coordinates that split by "," from line to line into ground_coords list
    return ground_coords
    
# implement function to read firstract data
def read_firsttrack(firsttrack_path):
    with open(firsttrack_path, 'r') as file:
        lines = file.readlines()  # read line by line and store it into lines variable
        template_coords = [list(map(int, line.strip().split(','))) for line in lines][0] # write firsttrack coordinates that split by "," from line to line into template_coords list
    return template_coords

# load data to variable based on given function
image_seq, firsttrack_seq, groundtruth_seq = update_paths(seq_slider.value) # this will call update_paths function with a selected value from slider
image_sequences = read_sequenceimage(image_seq)
template_coords = read_firsttrack(firsttrack_seq)
ground_truth_coords = read_groundtruth(groundtruth_seq)

# update data to variable if the slider value changed
seq_slider.observe(update_slider, names="value")

IntSlider(value=1, description='Which Sequence of Images:', max=5, min=1)

Using Template Matching Algorithm


In [2]:
## template matching algorithm

def template_matching_algorithm(image_sequences, template_coords, ground_truth_coords):
    
    image_results = [] # use to store the result of tracking object on the image
    template_coords_results = [] # use to store the coordinate of boundary box for template object
    template_coords_results.append(template_coords)
    update_interval = 50 # used for update the template coordinate using the data from groundtruth

    template = image_sequences[0][template_coords[1]:template_coords[1] + template_coords[3],
                    template_coords[0]:template_coords[0] + template_coords[2]] # cropping the first image by boundary box coordinate to get the template image

    # looping through all of sequence of images to do single object tracking
    for i in range(1, len(image_sequences)):
        current_frame = image_sequences[i] # obtain current image

        if i % update_interval == 0: # condition to update the template coordinate based on groundtruth data
            template_coords = ground_truth_coords[i] # update template coordinates from selected ground truth data

            template = current_frame[template_coords[1]:template_coords[1] + template_coords[3], 
                                     template_coords[0]:template_coords[0] + template_coords[2]]
            
        result = cv2.matchTemplate(current_frame, template, cv2.TM_CCOEFF_NORMED) # use cv2.matchTemplate to match between template with corresponding image 
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) # get the min and max location as well as the value from cv2.minMaxLoc

        template_coords = [max_loc[0], max_loc[1],template_coords[2],template_coords[3]] # update template coordinates by using max loc for x and y, and width and height from previous template

        template = current_frame[template_coords[1]:template_coords[1] + template_coords[3], template_coords[0]:template_coords[0] + template_coords[2]] # update the template image by using current template coordinates

        template_coords_results.append(template_coords) # append the template coordinates (the boundary box of the target object)

        current_frame_result = current_frame.copy()

        cv2.rectangle(current_frame_result,(template_coords[0],template_coords[1]),(template_coords[0] + template_coords[2], template_coords[1] + template_coords[3]),
                        (0, 255, 0), 2) # draw tracked object boundary box in current image using cv2.rectangle
        
        cv2.putText(current_frame_result, "Template Matching", (template_coords[0],template_coords[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) # put text name of "Template Matching" in the upper of boundary box of tracked object

        cv2.rectangle(current_frame_result,(ground_truth_coords[i][0],ground_truth_coords[i][1]),(ground_truth_coords[i][0] + ground_truth_coords[i][2], ground_truth_coords[i][1] + ground_truth_coords[i][3]),
                        (0, 0, 255), 2) # draw ground truth boundary box in current image using cv2.rectangle
        
        cv2.putText(current_frame_result, "Ground Truth", (ground_truth_coords[i][0],ground_truth_coords[i][1]-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) # put text name of "Ground Truth" in the upper of boundary box of ground truth
        
        image_results.append(current_frame_result) # append the result image that have boundary box of target object
    
    return image_results, template_coords_results

Using Kalman Filter Algorithm


In [3]:
# Kalman Filter Implementation

# this function used to implement the calculation step of kalman filter (prediction state, correction step and update step)
def kalman_filter_implementation(initial_state, F, H, P, R, Q, measurement):

    state = np.array(initial_state, dtype=np.float32).reshape(-1,1) # used to reshape initial state to corresponding vector size
    F = np.array(F, dtype=np.float32) # State transition matrix
    H = np.array(H, dtype=np.float32) # Measurement matrix
    P = np.array(P, dtype=np.float32) # State transition covariance matrix (predicted error covariance matrix)
    R = np.array(R, dtype=np.float32) # Measurement noise covariance matrix
    Q = np.array(Q, dtype=np.float32) # Process noise covariance matrix

    # Start with prediction step
    state = F @ state # not used control input (u) as there is no data input for control input as well as gausian noise (w)
    P = F @ P @ F.T + Q # compute predicted state covariance matrix

    # Correction and update step using measurement from groundtruth data
    measurement = np.array(measurement, dtype=np.float32).reshape(-1,1) # used to reshape measurement into corresponding vector size
    y = measurement - H @ state
    S = H @ P @ H.T + R # calculate measurement covariance matrix
    K = P @ H.T @ np.linalg.inv(S) # calculate kalman gain
    state += K @ y # update step for state
    P = (np.eye(4) - K @ H) @ P # update step for state covariance matrix

    return state.flatten()

# this function use to do single object tracking by using kalman filter method
def kalman_filter_algorithm(initial_state, image_sequences, groundtruth_data):

    image_results = [] # use to store the result of tracking object on the image
    template_coords_result = [] # use to store the coordinate of boundary box for template object
    groundtruth_data = np.array(groundtruth_data).reshape(-1,4) # reshaping ground truth data with 4 column matrix

    # Initialize Kalman Filter Parameters
    F = np.array([[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]) # State Transition Matrix

    H = np.array([[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]) # Measurement Matrix

    P = np.eye(4) * 1e2  # Covariance matrix initialization
    R = np.eye(4) * 1e1  # Measurement noise covariance matrix (increasing value will make the filter not trust measurement so success and precision decrease)
    Q = np.eye(4) * 1e1  # Process noise covariance matrix (increasing value will make the filter trust prediction more so success and precision may increase)

    # looping through all sequence of images to do single object tracking
    for i, image in enumerate(image_sequences):

        measurement = groundtruth_data[i] # get measurement data from corresponding ground truth

        predicted_state = kalman_filter_implementation(initial_state, F, H, P, R, Q, measurement) # calling kalman filter implementation function to give the result of predicted state

        template_coords_result.append(predicted_state) # append the result of predicted state into template coords list (the boundary box of the target object)

        current_image = image.copy()

        cv2.rectangle(current_image, (int(predicted_state[0]), int(predicted_state[1])), (int(predicted_state[0]) + int(predicted_state[2]), int(predicted_state[1]) + int(predicted_state[3])), 
                      (255, 0, 0), 2) # draw tracked object boundary box in current image using cv2.rectangle
        
        cv2.putText(current_image, "Kalman Filter", (int(predicted_state[0]), int(predicted_state[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2) # put text name of "Kalman Filter" in the upper of boundary box of tracked object

        cv2.rectangle(current_image,(groundtruth_data[i,0],groundtruth_data[i,1]),(groundtruth_data[i,0] + groundtruth_data[i,2], groundtruth_data[i,1] + groundtruth_data[i,3]),
                        (0, 0, 255), 2) # draw ground truth boundary box in current image using cv2.rectangle
        
        cv2.putText(current_image, "Ground Truth", (groundtruth_data[i,0],groundtruth_data[i,1]-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) # put text name of "Ground Truth" in the upper of boundary box of ground truth
        
        image_results.append(current_image) # append the result image that have boundary box of target object
    
    return image_results, template_coords_result


Evaluate the performance of the Single object tracking algorithm.

In [8]:
## Metric Evaluation of performance for single object tracking algorithm

# function to calculate the distance between the center coordinate of boundary box of template with the center coordinate4 of boundary box of ground truth
def calculate_distance(box1, box2):
    center1 = (box1[0] + box1[2] / 2, box1[1] + box1[3] / 2)
    center2 = (box2[0] + box2[2] / 2, box2[1] + box2[3] / 2)
    distance = np.sqrt((center1[0] - center2[0]) ** 2 + (center1[1] - center2[1]) ** 2)
    return distance

# function to calculate success by computing IOU
def calculate_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[0] + box1[2], box2[0] + box2[2])
    y2 = min(box1[1] + box1[3], box2[1] + box2[3])
    overlap_area = max(0, x2 - x1) * max(0, y2 - y1)
    union_area = box1[2] * box1[3] + box2[2] * box2[3] - overlap_area
    iou = overlap_area / union_area
    return iou

# function to evaluate the performance for template coordinate (target boundary box) with respect to ground truth boundary box
def evaluate_performance(template_coords, ground_truth_coords):
    distances = [] # to store distance value
    ious = [] # to store iou value
    pnorms = [] # to store normalize precision value

    # loop for every template coordinates and corresponding ground truths in each frame of sequence images
    for target, groundtruth in zip(template_coords, ground_truth_coords):
        distance = calculate_distance(target, groundtruth) # calling function calculate_distance to compute center of boundary box of the target object with center of boundary box of ground truth
        iou = calculate_iou(target, groundtruth) # calling function calculate_iou to compute IOU where overlap area of boundary box divided by union area
        pnorm = distance / np.sqrt(groundtruth[2] * groundtruth[3]) # calling function to normalize precision (distance) by dividing with the square root of the size of width and height of ground truth boundary box

        distances.append(distance)
        ious.append(iou)
        pnorms.append(pnorm)

    precision_threshold = 20 # set precision threshold as 20 pixels
    precision = np.mean(np.array(distances) < precision_threshold) # calculate precision based on the distance value below precision threshold and find the average from this
    success = np.mean(np.array(ious) > 0.5)  # calculate success based on iou value above iou threshold (set to 0.5) and find the average

    return precision, success, np.mean(pnorms)

# Evaluate performance for template matching algorithm
result_images, result_template_coords = template_matching_algorithm(image_sequences,template_coords,ground_truth_coords) # calling function that implement object tracking using template matching algorithm
precision, success, pnorm = evaluate_performance(result_template_coords, ground_truth_coords) # calling function to evaluate the performance of this algorithm from template coordinates vs ground truth coordinates
print(f"Template Matching Performance for seq_{seq_slider.value} --> Precision: {precision}, Success: {success}, Normalized Precision: {pnorm}")

# Evaluate performance for Kalman Filter Algorithm
result_images, result_template_coords = kalman_filter_algorithm(template_coords,image_sequences, ground_truth_coords) # calling function that implement object tracking using kalman filter algorithm
precision, success, pnorm = evaluate_performance(result_template_coords, ground_truth_coords) # calling function to evaluate the performance of this algorithm from template coordinates vs ground truth coordinates
print(f"Kalman Filter Performance for seq_{seq_slider.value} --> Precision: {precision}, Success: {success}, Normalized Precision: {pnorm}")


0.0
1.5811388300841898
4.123105625617661
7.0178344238090995
6.576473218982953
5.024937810560445
2.692582403567252
2.23606797749979
2.0615528128088303
2.5
2.9154759474226504
3.3541019662496847
2.5495097567963922
3.3541019662496847
3.5355339059327378
3.5
4.6097722286464435
4.6097722286464435
7.810249675906654
10.307764064044152
10.307764064044152
10.735455276791944
13.46291201783626
12.36931687685298
14.713938969562161
15.508062419270823
14.5
16.62077013859466
30.0
31.48412298286233
33.94849039353591
34.007352146263905
35.04639781775011
39.2587569849072
41.548164821084455
38.99358921669048
42.32316150761897
40.74616546375867
39.38591118661596
38.91336531321854
38.46101922726437
40.65095324835569
40.99390198553927
42.87773314903669
42.02975136733502
41.743262929483606
42.573465914816005
34.61574786134195
34.23448553724738
32.687918257362305
0.0
0.7071067811865476
2.692582403567252
2.692582403567252
2.0
2.23606797749979
2.1213203435596424
2.9154759474226504
2.5
6.020797289396148
23.5053185


Visualise the results of object tracking


In [7]:
## Visualization the result of single object tracking algorithm

# Visulaization result and saving location of tracked object in each frame for Template Matching Algorithm
result_images, result_template_coords = template_matching_algorithm(image_sequences,template_coords,ground_truth_coords) # calling function that implement object tracking using template matching algorithm

#result_template_coords_array = np.array(result_template_coords)
np.savetxt(os.path.join('result', f'result_template_coords_template_matching_seq{seq_slider.value}.txt'), result_template_coords, delimiter=',', fmt='%d') # use to save the boundary box coordinates of tracked object into txt file

cv2.namedWindow("Single Object Tracking with template matching algorithm", cv2.WINDOW_NORMAL)

for i, result_image in enumerate(result_images):
    cv2.imshow("Single Object Tracking with template matching algorithm", result_image)
    if cv2.waitKey(75) & 0xFF == 27:
        break

cv2.destroyAllWindows()


# Visualization result and saving location of tracked object in each frame for Kalman Filter Algorithm
result_images, result_template_coords = kalman_filter_algorithm(template_coords, image_sequences, ground_truth_coords)

#result_template_coords_array = np.array(result_template_coords)
np.savetxt(os.path.join('result', f'result_template_coords_kalman_filter_seq{seq_slider.value}.txt'), result_template_coords, delimiter=',', fmt='%d') # use to save the boundary box coordinates of tracked object into txt file

cv2.namedWindow("Single Object Tracking with Kalman Filter Algorithm", cv2.WINDOW_NORMAL)

for i, result_image in enumerate(result_images):
    cv2.imshow("Single Object Tracking with Kalman Filter Algorithm", result_image)
    if cv2.waitKey(75) & 0xFF == 27:
        break

cv2.destroyAllWindows()
