[![Labellerr](https://storage.googleapis.com/labellerr-cdn/%200%20Labellerr%20template/notebook.webp)](https://www.labellerr.com)

# ByteTrack

---

[![labellerr](https://img.shields.io/badge/Labellerr-BLOG-black.svg)](https://www.labellerr.com/blog/<BLOG_NAME>)
[![Youtube](https://img.shields.io/badge/Labellerr-YouTube-b31b1b.svg)](https://www.youtube.com/@Labellerr)
[![Github](https://img.shields.io/badge/Labellerr-GitHub-green.svg)](https://github.com/Labellerr/Hands-On-Learning-in-Computer-Vision)
[![Scientific Paper](https://img.shields.io/badge/Official-Paper-blue.svg)](<PAPER LINK>)


In [None]:
!pip install ultralytics opencv-python

## Helper function

In [None]:
import base64
from IPython.display import HTML, display

def show_mp4(path, width=640):
    """
    Reads an MP4 from disk and embeds it as an HTML5 video in Colab.
    
    Args:
        path (str): path to the .mp4 file
        width (int): display width in pixels
    """
    # Read & encode
    with open(path, 'rb') as f:
        mp4_bytes = f.read()
    b64 = base64.b64encode(mp4_bytes).decode()
    # Build HTML
    html = f'''
    <video width="{width}" controls autoplay loop muted playsinline>
      <source src="data:video/mp4;base64,{b64}" type="video/mp4">
      Your browser does not support HTML5 video.
    </video>
    '''
    display(HTML(html))

In [None]:
from ultralytics import YOLO

# Load YOLOv8 model (e.g., yolov8n.pt)
model = YOLO("yolov8x.pt")

# Track objects in a video
def bytetrack(path):
    results = model.track(
        source=path,
        tracker="bytetrack.yaml",  # Built-in ByteTrack config
        persist=True,  # Maintain track IDs across frames
        stream=True,  # Process video in real-time
    )
    
    # Process results frame by frame
    for frame_id, result in enumerate(results):
        # Get tracking information
        if result.boxes is not None and result.boxes.id is not None:
            track_ids = result.boxes.id.int().cpu().tolist()
            bboxes = result.boxes.xyxy.cpu().tolist()
            confidences = result.boxes.conf.cpu().tolist()
            class_ids = result.boxes.cls.int().cpu().tolist()
            
            # Print tracking results
            for track_id, bbox, conf, cls_id in zip(track_ids, bboxes, confidences, class_ids):
                print(f"Frame {frame_id}: Track ID {track_id}, Class {cls_id}, "
                    f"Confidence {conf:.2f}, BBox {bbox}")

In [None]:
show_mp4("sample/sample-6.mp4")

In [None]:
bytetrack("sample/sample-6.mp4")  # tracking 2 people in video

In [None]:
import cv2
import random
from ultralytics import YOLO

# Load YOLOv8 model
model = YOLO("yolov8x.pt")

def bytetrack(path, output_path="output.mp4", target_classes=None):
    # Initialize video capture
    cap = cv2.VideoCapture(path)
    if not cap.isOpened():
        print("Error opening video file")
        return
    
    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    cap.release()

    # Initialize video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Dictionary to store unique colors for each track ID
    track_id_colors = {}
    # Class IDs for person (0) and football (32)
    target_classes = target_classes

    # Track objects in video
    results = model.track(
        source=path,
        tracker="bytetrack.yaml",
        persist=True,
        stream=True,
        classes=target_classes
    )
    
    for frame_id, result in enumerate(results):
        frame = result.orig_img.copy()  # Original frame without annotations
        
        if result.boxes is not None and result.boxes.id is not None:
            # Get detection data
            track_ids = result.boxes.id.int().cpu().tolist()
            bboxes = result.boxes.xyxy.cpu().tolist()
            class_ids = result.boxes.cls.int().cpu().tolist()
            
            # Process detections
            for track_id, bbox, cls_id in zip(track_ids, bboxes, class_ids):
                # Only process person and football classes
                if cls_id not in target_classes:
                    continue
                
                # Assign unique color for each track ID
                if track_id not in track_id_colors:
                    track_id_colors[track_id] = (
                        random.randint(0, 255),
                        random.randint(0, 255),
                        random.randint(0, 255)
                    )
                color = track_id_colors[track_id]
                
                # Convert coordinates to integers
                x1, y1, x2, y2 = map(int, bbox)
                
                # Draw bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 3)
                
                # Display track ID with white background
                text = f"ID: {track_id}"
                (text_width, text_height), baseline = cv2.getTextSize(
                    text, cv2.FONT_HERSHEY_SIMPLEX, 1.0, 2
                )
                
                # Calculate background rectangle coordinates
                bg_x1 = x1
                bg_y1 = y1 - 15 - text_height  # Position above bounding box
                bg_x2 = x1 + text_width
                bg_y2 = y1 - 15 + baseline
                
                # Ensure background stays within frame
                bg_y1 = max(0, bg_y1)  # Don't go above top of frame
                bg_y2 = min(frame.shape[0], bg_y2)  # Don't go below bottom
                
                # Draw filled white rectangle
                cv2.rectangle(frame, (bg_x1, bg_y1), (bg_x2, bg_y2), (255, 255, 255), -1)
                
                # Draw text
                cv2.putText(frame, text, 
                            (x1, y1 - 15), 
                            cv2.FONT_HERSHEY_SIMPLEX, 
                            1.0, color, 2)
        
        # Write frame to output video
        out.write(frame)
        print(f"Processed frame {frame_id}", end='\r')
    
    # Release video writer
    out.release()
    print(f"\nSaved video with enhanced track IDs: '{output_path}'")

In [None]:
show_mp4('sample/sample-2.mp4')

In [None]:
bytetrack('sample/sample-2.mp4', 'result/result-2.mp4', [0, 32])  # tracking person and football

In [None]:
show_mp4('sample/sample-3.mp4')

In [None]:
bytetrack('sample/sample-3.mp4', 'result/result-3.mp4', [4])

In [None]:
show_mp4('sample/sample-5.mp4')

In [None]:
bytetrack('sample/sample-5.mp4', 'result/result-5.mp4', [2]) # tracking car