In [1]:
#!pip install ultralytics

# Video creation from images
import cv2
import numpy as np
import glob
from os import listdir
from os.path import isfile, join

In [3]:
# Uncomment and run once to create the .avi movie file
"""
img_array = []
path = r'data\frames'
files = listdir(path)
sorted_files = sorted(files, key=lambda x: int(x.split('.')[0][5:]))

for filename in sorted_files:
    img = cv2.imread(join(path,filename))
    height, width, layers = img.shape
    size = (width,height)
    img_array.append(img)
    
out = cv2.VideoWriter('video.avi',cv2.VideoWriter_fourcc(*'DIVX'), 15, size)

for i in range(len(img_array)):
   out.write(img_array[i])
out.release()
"""

In [None]:
# Object traccking
from ultralytics import YOLO

# Configure the tracking parameters and run the tracker
model = YOLO('yolov8x.pt')

# Perform tracking
#source = 'data\MOT20-01-raw.webm'#https://youtu.be/LNwODJXcvt4'
source = 'video.avi'
results = model.track(source, show=True, tracker="bytetrack.yaml", save_txt = False)
#results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True)

In [None]:
# Extract cup ids and box info

# INF6804 submission format
frame_id = 0
with open('tracking_results_submission.txt', 'w') as file:
    for r in results:
        frame_id += 1
        
        # Extract cup indexes in list of detected objects
        class_ids = r.boxes.cls
        indices = []
        
        for i in range(len(class_ids)):
            if class_ids[i] == 41.:
                indices.append(i)
        
        # Extract cup ids    
        object_ids = r.boxes.id
        ids = []
        boxes = [] # format (x1,y1,w,h)
        for i in indices:
            ids.append(object_ids[i])
            boxes.append(r.boxes.xywh[i])
            
            file.write(f"{frame_id} {int(object_ids[i])} {int(r.boxes.xywh[i][0])} {int(r.boxes.xywh[i][1])} {int(r.boxes.xywh[i][2])} {int(r.boxes.xywh[i][3])}\n")
"""  

# HOTA metric evaluation format
frame_id = 0
with open('tracking_results.txt', 'w') as file:
    for r in results:
        frame_id += 1
        
        # Extract cup indexes in list of detected objects
        class_ids = r.boxes.cls
        
        indices = []
        
        # TODO: Update class label depending on the dataset
        
        for i in range(len(class_ids)):
            if class_ids[i] == 0.: # class 0 for person in MOT17, clas 41 for cups in INF6904 video   
                indices.append(i)
        
        # Extract cup ids    
        object_ids = r.boxes.id
        ids = []
        boxes = [] # format (x1,y1,w,h)
        for i in indices:
            ids.append(object_ids[i])
            boxes.append(r.boxes.xywh[i])
            
            file.write(f"{frame_id}, {int(object_ids[i])}, {int(r.boxes.xywh[i][0])}, {int(r.boxes.xywh[i][1])}, {int(r.boxes.xywh[i][2])}, {int(r.boxes.xywh[i][3])}, -1, -1, -1, -1\n")
        
        if frame_id == 1:
            print(f"{frame_id}, {int(object_ids[i])}, {int(r.boxes.xywh[i][0])}, {int(r.boxes.xywh[i][1])}, {int(r.boxes.xywh[i][2])}, {int(r.boxes.xywh[i][3])}, -1, -1, -1, -1\n")
"""      

# Hota score

In [36]:
import numpy as np

def calculate_iou(box1, box2):
    """
    Calculate Intersection over Union (IoU) of two bounding boxes.
    """
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2
    
    intersection_x = max(0, min(x1 + w1, x2 + w2) - max(x1, x2))
    intersection_y = max(0, min(y1 + h1, y2 + h2) - max(y1, y2))
    intersection_area = intersection_x * intersection_y
    
    area1 = w1 * h1
    area2 = w2 * h2
    
    union_area = area1 + area2 - intersection_area
    
    iou = intersection_area / union_area if union_area > 0 else 0
    return iou

# Example usage:
# Read ground truth and predictions from file
# gt_boxes = [[x1, y1, w1, h1], [x2, y2, w2, h2], ...]
# pred_boxes = [[x1, y1, w1, h1], [x2, y2, w2, h2], ...]
# hota = calculate_hota(gt_boxes, pred_boxes)

def group_by_frame(data):
    """
    Group the data by frame ID.
    """
    grouped_data = {}
    for item in data:
        frame_id = item[0]
        if frame_id not in grouped_data:
            grouped_data[frame_id] = []
        grouped_data[frame_id].append(item)
    return grouped_data

def read_detection_file(file_path):
    """
    Read the detection file and return a list of tuples containing frame ID, object ID,
    bounding box coordinates (x, y, w, h) for each detected object.
    """
    detections = []
    with open(file_path, 'r') as file:
        for line in file:
            data = line.strip().split(',')
            frame_id = int(data[0])
            object_id = int(data[1])
            x, y, w, h = map(int, data[2:6])
            detections.append((frame_id, object_id, x, y, w, h))
    return detections

def calculate_hota(gt_data, pred_data, threshold=0.05):
    """
    Calculate Higher Order Tracking Accuracy (HOTA) metric.
    """
    # Group data by frame ID
    grouped_gt = group_by_frame(gt_data)
    grouped_pred = group_by_frame(pred_data)
    
    # Calculate HOTA score
    true_positives = 0
    false_positives = 0
    false_negatives = 0
 
    for frame_id in sorted(grouped_gt):
        #print(frame_id)
        gt_boxes = grouped_gt[frame_id]
        pred_boxes = grouped_pred.get(frame_id, [])
    
        # Calculate IoU matrix and assign matches
        matches = []
        for gt_box in gt_boxes:
            max_iou = 0
            best_match_idx = 0
            for j, pred_box in enumerate(pred_boxes):
                iou = calculate_iou(gt_box[2:], pred_box[2:])
                
                if iou > max_iou:
                    max_iou = iou
                    best_match_idx = j
            if max_iou > threshold:
                matches.append(best_match_idx)
        
        true_positives += len(matches)
        false_positives += len(pred_boxes) - len(matches)
        false_negatives += len(gt_boxes) - len(matches)
        
    if true_positives == 0:
        return 0.0
    
    hota_score = true_positives / (true_positives + 0.5 * (false_positives + false_negatives))
    return hota_score

In [37]:
# Read ground truth and detection files
detection_file = 'tracking_results_HOTA_MOT20-01.txt'  # Update with the actual filename
ground_truth_file = 'MOT20-01gt.txt'  # Update with the actual filename

ground_truth = read_detection_file(ground_truth_file)
detections = read_detection_file(detection_file)

# Calculate HOTA score
hota_score = calculate_hota(ground_truth, detections)
print("HOTA Score:", hota_score)

HOTA Score: 0.16956045674956985
