# YOLO + ByteTrack Tracking

Use YOLOv8's ByteTrack Tracker for champion icons trackingï¼š
1. **Video file input**
2. **Real-time Game Input**


In [10]:
import os
import cv2
import numpy as np
from ultralytics import YOLO
import time
from pathlib import Path

# Screen capture library (for real-time game)
try:
    import mss
    MSS_AVAILABLE = True
except ImportError:
    print("Install mss: pip install mss")
    MSS_AVAILABLE = False

print("Library imported")


Library imported


In [11]:
model_path = './yolo/yolo_runs/minimap_detection/weights/best.pt'
conf_threshold = 0.25
iou_threshold = 0.45

minimap_region = None

# check model file
assert os.path.exists(model_path), f"Model file not found: {model_path}"

# load model
model = YOLO(model_path)
print(f"Model loaded: {model_path}")
print(f"Number of classes: {len(model.names)}")
print(f"Minimap region: {minimap_region if minimap_region else 'Not set (will process full screen)'}")


Model loaded: ./yolo/yolo_runs/minimap_detection/weights/best.pt
Number of classes: 172
Minimap region: Not set (will process full screen)


In [12]:
def extract_minimap_region(frame, region=None):
    """
    Extract minimap region from frame
    
    Parameters:
        frame: input frame (numpy array, BGR format)
        region: minimap region (left, top, width, height), if None return original frame
    
    Returns:
        extracted minimap region (numpy array)
    """
    if region is None:
        return frame
    
    left, top, width, height = region
    h, w = frame.shape[:2]
    
    # ensure region in image range
    left = max(0, min(left, w))
    top = max(0, min(top, h))
    right = max(left, min(left + width, w))
    bottom = max(top, min(top + height, h))
    
    minimap = frame[top:bottom, left:right]
    return minimap

def get_minimap_region_interactive(video_path=None, frame_index=0):
    """
    Interactive tool: help user determine minimap region coordinates
    
    Parameters:
        video_path: video file path
        frame_index: video frame index
    
    Returns:
        (left, top, width, height) or None
    """
    if video_path:
        cap = cv2.VideoCapture(video_path)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
        ret, frame = cap.read()
        cap.release()
        if not ret:
            print("cannot read video frame")
            return None
    else:
        if not MSS_AVAILABLE:
            print("need mss library to capture screen")
            return None
        import mss
        with mss.mss() as sct:
            monitor = sct.monitors[1]
            screenshot = sct.grab(monitor)
            frame = np.array(screenshot)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR)
    
    print("Please drag the mouse to select the minimap region, press 'Enter' to confirm, press 'Esc' to cancel")
    
    # use mouse to select region
    drawing = False
    ix, iy = -1, -1
    fx, fy = -1, -1
    
    def mouse_callback(event, x, y, flags, param):
        nonlocal drawing, ix, iy, fx, fy, frame_copy
        if event == cv2.EVENT_LBUTTONDOWN:
            drawing = True
            ix, iy = x, y
        elif event == cv2.EVENT_MOUSEMOVE:
            if drawing:
                frame_copy = frame.copy()
                cv2.rectangle(frame_copy, (ix, iy), (x, y), (0, 255, 0), 2)
        elif event == cv2.EVENT_LBUTTONUP:
            drawing = False
            fx, fy = x, y
            frame_copy = frame.copy()
            cv2.rectangle(frame_copy, (ix, iy), (fx, fy), (0, 255, 0), 2)
    
    frame_copy = frame.copy()
    cv2.namedWindow('Select Minimap Region')
    cv2.setMouseCallback('Select Minimap Region', mouse_callback)
    
    while True:
        cv2.imshow('Select Minimap Region', frame_copy)
        key = cv2.waitKey(1) & 0xFF
        if key == 13:  # Enter
            if ix != -1 and fx != -1:
                left = min(ix, fx)
                top = min(iy, fy)
                width = abs(fx - ix)
                height = abs(fy - iy)
                cv2.destroyAllWindows()
                print(f"Selected minimap region: ({left}, {top}, {width}, {height})")
                return (left, top, width, height)
        elif key == 27:  # Esc
            cv2.destroyAllWindows()
            return None
    
    cv2.destroyAllWindows()
    return None

print("Minimap region extraction tool defined")


Minimap region extraction tool defined


In [13]:
# check ByteTrack config file
import os
import yaml
from pathlib import Path

# find by ultralytics package path
try:
    import ultralytics
    ultralytics_path = Path(ultralytics.__file__).parent
    tracker_dir = ultralytics_path / "cfg" / "trackers"
    
    print(f"Ultralytics package path: {ultralytics_path}")
    print(f"Tracker config directory: {tracker_dir}")
    
    # find bytetrack.yaml
    bytetrack_file = tracker_dir / "bytetrack.yaml"
    if bytetrack_file.exists():
        print(f"\nFound config file: {bytetrack_file}")
        print("\n=== ByteTrack config content ===")
        with open(bytetrack_file, 'r', encoding='utf-8') as f:
            config_content = f.read()
            print(config_content)
            
        # parse to dictionary format
        print("\n=== Config dictionary format ===")
        with open(bytetrack_file, 'r', encoding='utf-8') as f:
            config_dict = yaml.safe_load(f)
            import json
            print(json.dumps(config_dict, indent=2, ensure_ascii=False))
    else:
        print(f"File not found: {bytetrack_file}")
        # list all available tracker config files
        if tracker_dir.exists():
            print(f"\nAvailable tracker config files:")
            for f in tracker_dir.glob("*.yaml"):
                print(f"  - {f.name}")
except Exception as e:
    print(f"Error: {e}")

Ultralytics package path: c:\Users\82530\anaconda3\envs\env\Lib\site-packages\ultralytics
Tracker config directory: c:\Users\82530\anaconda3\envs\env\Lib\site-packages\ultralytics\cfg\trackers

Found config file: c:\Users\82530\anaconda3\envs\env\Lib\site-packages\ultralytics\cfg\trackers\bytetrack.yaml

=== ByteTrack config content ===
# Ultralytics YOLO ðŸš€, AGPL-3.0 license
# Default YOLO tracker settings for ByteTrack tracker https://github.com/ifzhang/ByteTrack

tracker_type: bytetrack # tracker type, ['botsort', 'bytetrack']
track_high_thresh: 0.5 # threshold for the first association
track_low_thresh: 0.1 # threshold for the second association
new_track_thresh: 0.6 # threshold for init new track if the detection does not match any tracks
track_buffer: 30 # buffer to calculate the time when to remove tracks
match_thresh: 0.8 # threshold for matching tracks
fuse_score: True # Whether to fuse confidence scores with the iou distances before matching
# min_box_area: 10  # threshold 

In [None]:
# ByteTrack tracker custom config
tracker_config = {
    "tracker_type": "bytetrack",
    "track_high_thresh": 0.45,
    "track_low_thresh": 0.15,
    "new_track_thresh": 0.6,
    "track_buffer": 200,
    "match_thresh": 0.65,
    "fuse_score": True,
    "min_box_area": 100
}

# create custom config file
import yaml
custom_tracker_path = "./bytetrack_custom.yaml"
with open(custom_tracker_path, 'w', encoding='utf-8') as f:
    yaml.dump(tracker_config, f, default_flow_style=False, allow_unicode=True)
print(f"Custom tracker config created: {custom_tracker_path}")

Custom tracker config created: ./bytetrack_custom.yaml


## 1. Video Tracking


In [20]:
def track_video(video_path, output_path=None, show_preview=True, save_output=True, minimap_region=None):
    """
    Track video file (automatically extract minimap region)
    
    Parameters:
        video_path: input video path
        output_path: output video path (if None, auto generate)
        show_preview: whether to show real-time preview
        save_output: whether to save output video
        minimap_region: minimap region (left, top, width, height), if None use global config or process full screen
    """
    if not os.path.exists(video_path):
        print(f"Error: video file not found: {video_path}")
        return
    
    # use global config or pass parameter
    if minimap_region is None:
        minimap_region = globals().get('minimap_region', None)
    
    if minimap_region is None:
        print("Warning: Minimap region not specified, will process the whole video (recommended to specify minimap region for better performance)")
    
    # auto generate output path
    if output_path is None:
        video_dir = os.path.dirname(video_path)
        video_name = Path(video_path).stem
        output_path = os.path.join(video_dir, f"{video_name}_tracked.mp4")
    
    print(f"Processing video: {video_path}")
    print(f"Minimap region: {minimap_region if minimap_region else 'Full screen'}")
    print(f"Output path: {output_path}")
    
    # if minimap region is specified, process frame by frame
    if minimap_region is not None:
        cap = cv2.VideoCapture(video_path)
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        # get minimap size
        left, top, w, h = minimap_region
        minimap_w, minimap_h = w, h
        
        # create output video writer
        if save_output:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, fps, (minimap_w, minimap_h))
        
        print(f"Video info: {width}x{height}, {fps} FPS, {total_frames} frames")
        print(f"Minimap size: {minimap_w}x{minimap_h}")
        
        frame_count = 0
        all_results = []
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            # extract minimap region
            minimap_frame = extract_minimap_region(frame, minimap_region)
            
            # run tracking
            results = model.track(
                source=minimap_frame,
                conf=conf_threshold,
                iou=iou_threshold,
                tracker="bytetrack.yaml",
                verbose=False,
                augment=True
            )
            
            all_results.append(results[0])
            
            # draw results
            annotated_frame = results[0].plot()
            
            if save_output:
                out.write(annotated_frame)
            
            if show_preview:
                cv2.imshow('Tracking', annotated_frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            
            frame_count += 1
            if frame_count % 30 == 0:
                print(f"Processing: {frame_count}/{total_frames} ({100*frame_count/total_frames:.1f}%)")
        
        cap.release()
        if save_output:
            out.release()
        if show_preview:
            cv2.destroyAllWindows()
        
        print(f"Tracking Output saved to: {output_path}")
        return all_results
    else:
        # if minimap region is not specified, process the whole video directly
        results = model.track(
            source=video_path,
            conf=conf_threshold,
            iou=iou_threshold,
            tracker="bytetrack.yaml",
            save=save_output,
            project='tracking_outputs',
            name='video_tracking',
            exist_ok=True,
            show=show_preview,
            verbose=True,
            imgsz=640,
            augment=True
        )
        
        print(f"Tracking completed! Output saved to: {output_path}")
        return results


## 2. Real-time Tracking


In [21]:
# def track_realtime_screen():
    


## 3. Get Tracking Data (API)


In [33]:
def get_tracking_data(results):
    """
    Extract data from tracking results (for API)
    
    Parameters:
        results: YOLO track() results
    
    Returns:
        list: 
        [
            {
                'track_id': int,
                'class_id': int,
                'class_name': str,
                'bbox': [x1, y1, x2, y2],
                'confidence': float,
                'center': [cx, cy]
            },
            ...
        ]
    """
    tracking_data = []
    
    if results is None or len(results) == 0:
        return tracking_data
    
    res = results[0]
    
    if len(res.boxes) == 0:
        return tracking_data
    
    # get tracking ID
    track_ids = res.boxes.id
    if track_ids is None:
        # if no tracking ID, use detection index as temporary ID
        track_ids = np.arange(len(res.boxes))
    
    for i, (track_id, box, cls_id, conf) in enumerate(zip(
        track_ids.cpu().numpy() if hasattr(track_ids, 'cpu') else track_ids,
        res.boxes.xyxy.cpu().numpy(),
        res.boxes.cls.cpu().numpy(),
        res.boxes.conf.cpu().numpy()
    )):
        x1, y1, x2, y2 = box
        cls_id = int(cls_id)
        class_name = res.names[cls_id] if hasattr(res, 'names') else str(cls_id)
        
        tracking_data.append({
            'track_id': int(track_id),
            'class_id': cls_id,
            'class_name': class_name,
            'bbox': [float(x1), float(y1), float(x2), float(y2)],
            'confidence': float(conf),
            'center': [float((x1 + x2) / 2), float((y1 + y2) / 2)]
        })
    
    return tracking_data

def process_frame_for_api(frame):
    """
    Process single frame and return tracking data (for API)
    
    Parameters:
        frame: numpy array (BGR format)
    
    Returns:
        list: tracking data list
    """
    results = model.track(
        source=frame,
        conf=conf_threshold,
        iou=iou_threshold,
        tracker="bytetrack.yaml",
        verbose=False
    )
    
    return get_tracking_data(results)

def get_tracking_data_json(results, frame_number=None, timestamp=None, fps=None):
    """
    generate JSON data with timestamp and frame number
    
    Parameters:
        results: YOLO track() 
        frame_number: frame number (optional)
        timestamp: timestamp, seconds (optional, if fps and frame_number are provided, it will be automatically calculated)
        fps: video frame rate (optional, for calculating timestamp)
    
    Returns:
        dict: JSON format dictionary, contains:
        {
            'timestamp': float,  # timestamp, seconds
            'frame_number': int,  # frame number
            'detections': [
                {
                    'class_id': int,
                    'class_name': str,
                    'bbox': [x1, y1, x2, y2],
                    'confidence': float,
                    'center': [cx, cy]
                },
                ...
            ]
        }
    """
    if isinstance(results, list) and len(results) > 0 and frame_number is not None:
        frame_index = frame_number - 1
        if 0 <= frame_index < len(results):
            frame_results = [results[frame_index]]
        else:
            print(f"Warning: frame_number {frame_number} out of range (total frames: {len(results)})")
            frame_results = [results[0]]  # use the first frame as default
    elif isinstance(results, list) and len(results) > 0:
        # if the results is a list but frame_number is not provided, use the first frame
        frame_results = [results[0]]
    else:
        frame_results = results
    
    # calculate timestamp
    if timestamp is None:
        if fps is not None and frame_number is not None:
            timestamp = frame_number / fps
        else:
            timestamp = time.time()  # use current timestamp
    
    # get tracking data
    tracking_data = get_tracking_data(frame_results)

    
    # build JSON format
    json_data = {
        'timestamp': float(timestamp),
        'frame_number': int(frame_number) if frame_number is not None else None,
        'detections': []
    }
    
    # add information of each detection target
    for track in tracking_data:
        json_data['detections'].append({
            'class_id': track['class_id'],
            'class_name': track['class_name'],
            'bbox': track['bbox'],
            'confidence': track['confidence'],
            'center': track['center']
        })
    
    return json_data


print("API functions defined")


API functions defined


## TEST

### Step 0: Minimap Region Selection



In [23]:
video_path = "./test_videos/video01_clip_4000frames.mp4"
minimap_region = get_minimap_region_interactive(video_path=video_path, frame_index=0)
if minimap_region:
    print(f"Minimap region: {minimap_region}")
    minimap_region = minimap_region 

Please drag the mouse to select the minimap region, press 'Enter' to confirm, press 'Esc' to cancel
Selected minimap region: (1640, 801, 267, 265)
Minimap region: (1640, 801, 267, 265)



### Step 1: Processing Video


In [None]:
video_path = "./test_videos/video01_clip_4000frames.mp4"  # replace with your video path
# minimap_region = (1200, 600, 400, 400)  # minimap region (left, top, width, height)
results = track_video(
    video_path=video_path,
    output_path=None,  # auto generate output path
    show_preview=True,  # show real-time preview
    save_output=True,  # save output video
    minimap_region=minimap_region  # specify minimap region (recommended)
)

Processing video: ./test_videos/video01_clip_4000frames.mp4
Minimap region: (1640, 801, 267, 265)
Output path: ./test_videos\video01_clip_4000frames_tracked.mp4
Video info: 1920x1080, 60 FPS, 4000 frames
Minimap size: 267x265
Processing: 30/4000 (0.8%)
Processing: 60/4000 (1.5%)
Processing: 90/4000 (2.2%)
Processing: 120/4000 (3.0%)
Processing: 150/4000 (3.8%)
Processing: 180/4000 (4.5%)
Processing: 210/4000 (5.2%)
Processing: 240/4000 (6.0%)
Processing: 270/4000 (6.8%)
Processing: 300/4000 (7.5%)
Processing: 330/4000 (8.2%)
Processing: 360/4000 (9.0%)
Processing: 390/4000 (9.8%)
Processing: 420/4000 (10.5%)
Processing: 450/4000 (11.2%)
Processing: 480/4000 (12.0%)
Processing: 510/4000 (12.8%)
Processing: 540/4000 (13.5%)
Processing: 570/4000 (14.2%)
Processing: 600/4000 (15.0%)
Processing: 630/4000 (15.8%)
Processing: 660/4000 (16.5%)
Processing: 690/4000 (17.2%)
Processing: 720/4000 (18.0%)
Processing: 750/4000 (18.8%)
Processing: 780/4000 (19.5%)
Processing: 810/4000 (20.2%)
Process

### Step2: Get JSON Output

In [34]:
json_data = get_tracking_data_json(results, frame_number=2000, fps=60)
print(json_data)

{'timestamp': 33.333333333333336, 'frame_number': 2000, 'detections': [{'class_id': 53, 'class_name': 'Jhin', 'bbox': [231.13299560546875, 203.41073608398438, 257.2855224609375, 230.72093200683594], 'confidence': 0.9581935405731201, 'center': [244.20925903320312, 217.06582641601562]}, {'class_id': 10, 'class_name': 'Ashe', 'bbox': [198.15374755859375, 181.00765991210938, 225.3982391357422, 208.10093688964844], 'confidence': 0.954704225063324, 'center': [211.7760009765625, 194.55429077148438]}, {'class_id': 111, 'class_name': 'Riven', 'bbox': [50.81669616699219, 45.971412658691406, 78.05683898925781, 72.90955352783203], 'confidence': 0.9384623169898987, 'center': [64.436767578125, 59.44048309326172]}, {'class_id': 0, 'class_name': 'Aatrox', 'bbox': [114.51884460449219, 75.92973327636719, 142.05706787109375, 103.0188217163086], 'confidence': 0.8760802745819092, 'center': [128.2879638671875, 89.47427368164062]}, {'class_id': 7, 'class_name': 'Anivia', 'bbox': [152.40902709960938, 109.2013