Project By: Jordan Anderson

# Section 1: Imports and Installs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

BASE_PATH = "/content/drive/My Drive/Colab Notebooks/Computer Vision/Data Project/"

In [None]:
!pip install ultralytics opencv-python mediapipe

In [None]:
!pip install deep-sort-realtime

In [None]:
import cv2
import time
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
import numpy as np
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
import json

In [None]:
BASE_PATH = "/content/drive/My Drive/Colab Notebooks/Computer Vision/Data Project/"
DATA_MOT   = BASE_PATH + "MOT17/"

In [None]:
!mkdir -p "$DATA_MOT"

In [None]:
# MOT17 Direct Download (Train set)
# Total: ~2.3 GB

!wget -P "$DATA_MOT" https://motchallenge.net/data/MOT17.zip
!unzip "$DATA_MOT/MOT17.zip" -d "$DATA_MOT"

In [None]:
"""
    Run Every Time
"""
BASE_PATH = "/content/drive/My Drive/Colab Notebooks/Computer Vision/Data Project/"
DATA_MOT = BASE_PATH + "MOT17/"
GT_PATH = DATA_MOT + "MOT17/train/"

# Section 2: Tracking Methods

In [None]:
"""
    Frame skipping
"""
def run_yolo_track(video_path, gt_init_box, frame_step=1):
    model = YOLO("yolov8n.pt")
    cap = cv2.VideoCapture(video_path)

    preds, times = [], []

    prev_box = gt_init_box
    frame_idx = 0

    while True:
        #Assume no cost for taking the x frame.
        ret, frame = cap.read()
        if not ret:
            break

        # Skip frames
        if frame_idx % frame_step != 0:
            frame_idx += 1
            continue

        start = time.time()
        results = model.predict(frame, conf=0.5)[0]
        end = time.time()
        times.append(end - start)

        boxes = results.boxes
        if boxes is None or len(boxes) == 0:
            preds.append(prev_box)
        else:
            best_box = None
            best_iou = 0
            for b in boxes:
                x1, y1, x2, y2 = b.xyxy[0].cpu().numpy()
                iou = compute_iou(prev_box, [x1, y1, x2, y2])
                if iou > best_iou:
                    best_iou = iou
                    best_box = [x1, y1, x2, y2]

            preds.append(best_box if best_box else prev_box)

        prev_box = preds[-1]
        frame_idx += 1

    cap.release()
    return preds, times


In [None]:
"""
    Frame Skipping
"""
def run_tld(video_path, gt_init_box, frame_step=1):
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    if not ret:
        return [], []

    x1, y1, x2, y2 = gt_init_box
    init_bb = (x1, y1, x2-x1, y2-y1)

    tracker = cv2.legacy.TrackerTLD_create()
    tracker.init(frame, init_bb)

    preds = [gt_init_box]
    times = [0]

    prev_box = gt_init_box
    frame_idx = 1

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % frame_step != 0:
            frame_idx += 1
            continue

        start = time.time()
        ok, bb = tracker.update(frame)
        end = time.time()
        times.append(end - start)

        if ok:
            x, y, w, h = bb
            pred_box = [x, y, x+w, y+h]
        else:
            pred_box = prev_box

        preds.append(pred_box)
        prev_box = pred_box
        frame_idx += 1

    cap.release()
    return preds, times


In [None]:
def run_fairmot(video_path, gt_init_box, frame_step=1):
    model = YOLO("yolov8n.pt")
    tracker = DeepSort(max_age=30)
    cap = cv2.VideoCapture(video_path)

    preds, times = [], []
    prev_box = gt_init_box

    frame_idx = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % frame_step != 0:
            frame_idx += 1
            continue

        # --------------------------
        # Measure FULL tracking time
        # --------------------------
        start = time.time()

        # Detection step (YOLO)
        result = model.predict(frame)[0]

        # Build DeepSORT detections
        detections = []
        for b in result.boxes:
            x1, y1, x2, y2 = b.xyxy[0].cpu().numpy()
            detections.append(
                ([x1, y1, x2-x1, y2-y1], b.conf.item(), b.cls.item())
            )

        # DeepSORT tracking step
        tracks = tracker.update_tracks(detections, frame=frame)

        end = time.time()
        times.append(end - start)
        # --------------------------

        # Track selection
        best_box = None
        best_iou = 0
        for t in tracks:
            if t.is_confirmed():
                x1, y1, x2, y2 = t.to_ltrb()
                iou = compute_iou(prev_box, [x1, y1, x2, y2])
                if iou > best_iou:
                    best_iou = iou
                    best_box = [x1, y1, x2, y2]

        preds.append(best_box if best_box else prev_box)
        prev_box = preds[-1]

        frame_idx += 1

    cap.release()
    return preds, times


# Section 3: Helper Methods and Cleaning

## Helper Fuctions

In [None]:
def compute_iou(boxA, boxB):
    if boxA is None or boxB is None:
        return 0

    Ax1, Ay1, Ax2, Ay2 = boxA
    Bx1, By1, Bx2, By2 = boxB

    inter_x1 = max(Ax1, Bx1)
    inter_y1 = max(Ay1, By1)
    inter_x2 = min(Ax2, Bx2)
    inter_y2 = min(Ay2, By2)

    inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
    areaA = max(0, Ax2 - Ax1) * max(0, Ay2 - Ay1)
    areaB = max(0, Bx2 - Bx1) * max(0, By2 - By1)

    union = areaA + areaB - inter_area
    if union <= 0:
        return 0

    return inter_area / union


In [None]:
def load_mot17_gt(gt_file):
    data = np.loadtxt(gt_file, delimiter=",")
    gt = {}

    for row in data:
        frame, obj_id, x, y, w, h = row[:6]
        if int(obj_id) != 1:
            continue  # Only track object ID = 1

        x1, y1, x2, y2 = x, y, x+w, y+h
        frame = int(frame)

        gt[frame] = [x1, y1, x2, y2]

    return gt


In [None]:
# ------------------ Plotting ------------------
def plot_iou_per_video(result):
    frames = range(len(result["iou"]["yolo"]))
    plt.figure(figsize=(10,5))
    plt.plot(frames, result["iou"]["yolo"], label="YOLOv8")
    plt.plot(frames, result["iou"]["fair"], label="FairMOT")
    plt.plot(frames, result["iou"]["tld"], label="TLD")
    plt.xlabel("Frame")
    plt.ylabel("IoU")
    plt.title(f"IoU per Frame – {result['video']}")
    plt.legend()
    plt.show()

def plot_avg_iou_all_videos(results):
    methods = ["yolo","fair","tld"]
    avg_ious = {m:[] for m in methods}
    videos = [r["video"] for r in results]
    for r in results:
        for m in methods:
            avg_ious[m].append(np.mean(r["iou"][m]))

    plt.figure(figsize=(10,5))
    for m in methods:
        plt.plot(videos, avg_ious[m], marker="o", label=m)
    plt.ylabel("Average IoU")
    plt.title("Average IoU per Video")
    plt.xticks(rotation=45)
    plt.legend()
    plt.show()

def plot_time_per_video(result):
    frames = range(len(result["time"]["yolo"]))
    plt.figure(figsize=(10,5))
    plt.plot(frames, result["time"]["yolo"], label="YOLOv8")
    plt.plot(frames, result["time"]["fair"], label="FairMOT")
    plt.plot(frames, result["time"]["tld"], label="TLD")
    plt.xlabel("Frame")
    plt.ylabel("Time per Frame (s)")
    plt.title(f"Time per Frame – {result['video']}")
    plt.legend()
    plt.show()

"""
    Plot to show aveerage time.
"""
def plot_avg_time_all_videos(results):
    methods = ["yolo", "fair", "tld"]
    avg_times = {m: [] for m in methods}
    videos = [r["video"] for r in results]

    for r in results:
        for m in methods:
            avg_times[m].append(r["avg_time"][m])

    plt.figure(figsize=(10,5))
    for m in methods:
        plt.plot(videos, avg_times[m], marker="o", label=m)

    plt.ylabel("Average Time per Frame (s)")
    plt.title("Average Processing Time per Video")
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
"""
    Save the individual plots for later observation.
"""
def save_all_stored_plots(plots, folder="saved_plots", base_path="/content/drive/My Drive/Colab Notebooks/Computer Vision/Data Project/"):
    # Combine base_path + folder
    output_root = os.path.join(base_path, folder)

    # Create base folder
    os.makedirs(output_root, exist_ok=True)

    for category, video_dict in plots.items():
        # Folder for specific plot type (iou_per_video, time_per_video)
        cat_folder = os.path.join(output_root, category)
        os.makedirs(cat_folder, exist_ok=True)

        for video_name, fig in video_dict.items():
            save_path = os.path.join(cat_folder, f"{video_name}.png")
            fig.savefig(save_path)
            print(f"Saved: {save_path}")


In [None]:
"""
      Display all of the individual plots.
"""
def show_all_stored_plots(plots):
    for category, video_dict in plots.items():
        print(f"\nShowing plots in category: {category}\n")
        for video_name, fig in video_dict.items():
            print(f"Showing: {category} – {video_name}")
            fig.show()


In [None]:
"""
    Save Results to a specified txt file.
"""
def save_results_to_txt(results, filename="results.txt", base_path="/content/drive/My Drive/Colab Notebooks/Computer Vision/Data Project/"):
    """
    Saves the results structure to a .txt file using JSON formatting.
    The base_path determines the directory where the file is stored.
    """
    # Ensure base path exists
    if base_path:
        os.makedirs(base_path, exist_ok=True)

    filepath = os.path.join(base_path, filename)

    with open(filepath, "w") as f:
        json.dump(results, f, indent=4)

    print(f"Saved results to: {filepath}")


In [None]:
"""
    Load results from a txt file.
"""
def load_results_from_txt(filename="results.txt", base_path="/content/drive/My Drive/Colab Notebooks/Computer Vision/Data Project/"):
    """
    Loads the results structure from a JSON-formatted .txt file.
    The base_path determines the directory where the file is read from.
    """
    filepath = os.path.join(base_path, filename)

    if not os.path.exists(filepath):
        raise FileNotFoundError(f"File does not exist: {filepath}")

    with open(filepath, "r") as f:
        results = json.load(f)

    print(f"Loaded results from: {filepath}")
    return results

## Clean Data

In [None]:
def folder_to_mp4(input_folder, output_file, fps=30):
    images = sorted([img for img in os.listdir(input_folder) if img.endswith(".jpg")])
    if len(images) == 0:
        return

    first_image = cv2.imread(os.path.join(input_folder, images[0]))
    h, w, _ = first_image.shape

    out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

    for img in images:
        frame = cv2.imread(os.path.join(input_folder, img))
        out.write(frame)

    out.release()
    print("Saved:", output_file)


In [None]:
# Convert all MOT17 sequences
for seq in os.listdir(DATA_MOT + "MOT17/train/"):
    img_folder = DATA_MOT + f"MOT17/train/{seq}/img1/"
    if not os.path.isdir(img_folder): continue

    output_mp4 = DATA_MOT + f"{seq}.mp4"
    folder_to_mp4(img_folder, output_mp4)


# Section 4: Processing Main Method

In [None]:
"""
    Frame Skipping
    Process only the x frame.
"""
def process_video(video_path, frame_step=1):
    video_name = os.path.basename(video_path)
    seq_name = video_name.replace(".mp4","")

    gt_file = f"{GT_PATH}{seq_name}/gt/gt.txt"
    gt = load_mot17_gt(gt_file)
    init_box = gt[1]

    yolo_preds, yolo_times = run_yolo_track(video_path, init_box, frame_step)
    fair_preds, fair_times = run_fairmot(video_path, init_box, frame_step)
    tld_preds, tld_times = run_tld(video_path, init_box, frame_step)

    yolo_iou, fair_iou, tld_iou = [], [], []

    # Only evaluate GT at frames we actually processed
    processed_frames = list(range(1, len(yolo_preds)*frame_step+1, frame_step))

    for i, f in enumerate(processed_frames):
        gt_box = gt.get(f, None)
        yolo_iou.append(compute_iou(yolo_preds[i], gt_box))
        fair_iou.append(compute_iou(fair_preds[i], gt_box))
        tld_iou.append(compute_iou(tld_preds[i], gt_box))

    return {
        "video": video_name,
        "frames": processed_frames,
        "iou": {"yolo": yolo_iou, "fair": fair_iou, "tld": tld_iou},
        "time": {"yolo": yolo_times, "fair": fair_times, "tld": tld_times},
        "avg_time": {
            "yolo": np.mean(yolo_times),
            "fair": np.mean(fair_times),
            "tld": np.mean(tld_times)
        }
    }


In [None]:
"""
    Frame Skipping
    Get the results for x frames.
    Most Recent Main method.
"""
def main(frame_step=1):
    video_files = [f for f in os.listdir(DATA_MOT) if f.endswith(".mp4")]
    results = []
    plots = {"iou_per_video": {}, "time_per_video": {}}

    for v in video_files:
        video_path = os.path.join(DATA_MOT, v)
        print(f"Processing: {video_path} (step={frame_step})")

        res = process_video(video_path, frame_step)
        results.append(res)

        # ---------------------------
        # Generate IOU plot figure object
        # ---------------------------
        frames = range(len(res["iou"]["yolo"]))
        fig_iou = plt.figure(figsize=(10, 5))
        plt.plot(frames, res["iou"]["yolo"], label="YOLOv8")
        plt.plot(frames, res["iou"]["fair"], label="FairMOT")
        plt.plot(frames, res["iou"]["tld"], label="TLD")
        plt.xlabel("Frame")
        plt.ylabel("IoU")
        plt.title(f"IoU per Frame – {res['video']}")
        plt.legend()
        plots["iou_per_video"][res["video"]] = fig_iou
        plt.close(fig_iou)  # prevent immediate display

        # ---------------------------
        # Generate Time plot figure object
        # ---------------------------
        frames = range(len(res["time"]["yolo"]))
        fig_time = plt.figure(figsize=(10, 5))
        plt.plot(frames, res["time"]["yolo"], label="YOLOv8")
        plt.plot(frames, res["time"]["fair"], label="FairMOT")
        plt.plot(frames, res["time"]["tld"], label="TLD")
        plt.xlabel("Frame")
        plt.ylabel("Time per Frame (s)")
        plt.title(f"Time per Frame – {res['video']}")
        plt.legend()
        plots["time_per_video"][res["video"]] = fig_time
        plt.close(fig_time)

    return results, plots

# Section 5: Testing and Parameters

## Base

In [None]:
results_base, plots_base = main()

In [None]:
plot_avg_iou_all_videos(results_base)

In [None]:
# Save
save_results_to_txt(results_base, "mot_tracking_results_base_frames_3.txt")

# Load
#loaded_results = load_results_from_txt("mot_tracking_results_base_frames.txt")

In [None]:
save_all_stored_plots(plots_base, folder="saved_plots_base_frame_3")

## 5 Frame Skips

In [None]:
results_5_frames, plots_5_frames =main(5)

In [None]:
plot_avg_iou_all_videos(results_5_frames)

In [None]:
plot_avg_time_all_videos(results_5_frames)

In [None]:
# Save
save_results_to_txt(results_5_frames, "mot_tracking_results_5_frames_3.txt")

# Load
#loaded_results = load_results_from_txt("mot_tracking_results_5_frames.txt")

In [None]:
save_all_stored_plots(plots_5_frames, folder="saved_plots_5_frame_3")

## 10 Frames

In [None]:
results_10_frames, plots_10_frames =main(10)

In [None]:
plot_avg_iou_all_videos(results_10_frames)

In [None]:
plot_avg_time_all_videos(results_10_frames)

In [None]:
# Save
save_results_to_txt(results_10_frames, "mot_tracking_results_10_frames_3.txt")

# Load
#loaded_results = load_results_from_txt("mot_tracking_results_10_frames.txt")

Saved results to: /content/drive/My Drive/Colab Notebooks/Computer Vision/Data Project/mot_tracking_results_10_frames_3.txt


In [None]:
save_all_stored_plots(plots_10_frames, folder="saved_plots_10_frame_3")

## 20 Frames

In [None]:
results_20_frames, plots_20_frames =main(20)

In [None]:
plot_avg_iou_all_videos(results_20_frames)

In [None]:
plot_avg_time_all_videos(results_20_frames)

In [None]:
# Save
save_results_to_txt(results_20_frames, "mot_tracking_results_20_frames_3.txt")

# Load
#loaded_results = load_results_from_txt("mot_tracking_results_20_frames.txt")

In [None]:
save_all_stored_plots(plots_20_frames, folder="saved_plots_20_frame_3")

In [None]:
save_all_stored_plots(plots_20_frames, folder="saved_plots_20_frame_3_real")

## 30 Frames

In [None]:
results_30_frames, plots_30_frames =main(30)

In [None]:
plot_avg_iou_all_videos(results_30_frames)

In [None]:
plot_avg_time_all_videos(results_30_frames)

In [None]:
# Save
save_results_to_txt(results_30_frames, "mot_tracking_results_30_frames_3.txt")

# Load
#loaded_results = load_results_from_txt("mot_tracking_results_30_frames.txt")

In [None]:
save_all_stored_plots(plots_30_frames, folder="saved_plots_30_frame_3")

## 40 Frames

In [None]:
results_40_frames, plots_40_frames =main(40)

In [None]:
plot_avg_iou_all_videos(results_40_frames)

In [None]:
plot_avg_time_all_videos(results_40_frames)

In [None]:
# Save
save_results_to_txt(results_40_frames, "mot_tracking_results_40_frames_3.txt")

# Load
#loaded_results = load_results_from_txt("mot_tracking_results_40_frames.txt")

In [None]:
save_all_stored_plots(plots_40_frames, folder="saved_plots_40_frame_3")

## 40 Frames

In [None]:
results_40_frames, plots_40_frames =main(40)

In [None]:
plot_avg_iou_all_videos(results_40_frames)

In [None]:
plot_avg_time_all_videos(results_40_frames)

In [None]:
# Save
save_results_to_txt(results_40_frames, "mot_tracking_results_40_frames_3.txt")

# Load
#loaded_results = load_results_from_txt("mot_tracking_results_40_frames.txt")

In [None]:
save_all_stored_plots(plots_40_frames, folder="saved_plots_40_frame_3")

## 50 Frames

In [None]:
results_50_frames, plots_50_frames =main(50)

In [None]:
plot_avg_iou_all_videos(results_50_frames)

In [None]:
plot_avg_time_all_videos(results_50_frames)

In [None]:
# Save
save_results_to_txt(results_50_frames, "mot_tracking_results_50_frames_3.txt")

# Load
#loaded_results = load_results_from_txt("mot_tracking_results_50_frames.txt")

In [None]:
save_all_stored_plots(plots_50_frames, folder="saved_plots_50_frame_3")