In [6]:
import cv2
import easyocr
import numpy as np
import os
import json
from tqdm import tqdm

class AutoLabelGenerator:
    def __init__(self):
        self.ocr = easyocr.Reader(['en'])
        self.hud_roi = (0, 800, 1920, 280)  # Bottom HUD area for 1920x1080
        self.skill_size = (80, 80)  # Expected skill icon size
        self.cooldown_color = ([90, 50, 50], [120, 255, 255])  # HSV blue range
        
    def process_video(self, video_path, output_dir):
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_count = 0
        
        os.makedirs(output_dir, exist_ok=True)
        annotations = {}
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret: break
        
            # 1. Extract HUD region
            hud = frame[self.hud_roi[1]:self.hud_roi[1]+self.hud_roi[3], 
                      self.hud_roi[0]:self.hud_roi[0]+self.hud_roi[2]]
            
            # 2. Find potential skill icons using color segmentation
            skill_boxes = self.find_cooldown_skills(hud)
            
            # 3. OCR validation and annotation
            frame_annotations = []
            for box in skill_boxes:
                x1, y1, x2, y2 = box
                skill_roi = hud[y1:y2, x1:x2]
                
                # Check for cooldown digits
                if self.is_cooldown_active(skill_roi):
                    class_id = 0  # Cooldown class
                else:
                    class_id = 1  # Ready class
                
                # Convert to YOLO format
                img_height, img_width = hud.shape[:2]
                x_center = (x1 + (x2-x1)/2) / img_width
                y_center = (y1 + (y2-y1)/2) / img_height
                width = (x2 - x1) / img_width
                height = (y2 - y1) / img_height
                
                frame_annotations.append(f"{class_id} {x_center} {y_center} {width} {height}")
                
                # Save skill icon image
                cv2.imwrite(f"{output_dir}/frame{frame_count}_skill{len(frame_annotations)}.jpg", skill_roi)
            
            # Save annotations
            if frame_annotations:
                with open(f"{output_dir}/frame{frame_count}.txt", 'w') as f:
                    f.write('\n'.join(frame_annotations))
            
            frame_count += 1
            if frame_count % 100 == 0:
                print(f"Processed {frame_count}/{total_frames} frames")
        
        cap.release()
        return annotations
    
    def find_cooldown_skills(self, hud):
        # Convert to HSV for color detection
        hsv = cv2.cvtColor(hud, cv2.COLOR_BGR2HSV)
        mask = cv2.inRange(hsv, *self.cooldown_color)
        
        # Find contours of blue regions
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        skill_boxes = []
        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            
            # Filter by size and aspect ratio
            if self.is_skill_icon(w, h):
                # Expand to expected skill size
                adjusted_box = (
                    max(0, x - 10),
                    max(0, y - 10),
                    min(hud.shape[1], x + w + 10),
                    min(hud.shape[0], y + h + 10)
                )
                skill_boxes.append(adjusted_box)
        
        return skill_boxes
    
    def is_skill_icon(self, w, h):
        # Validate icon dimensions
        return (self.skill_size[0]*0.8 < w < self.skill_size[0]*1.2 and 
                self.skill_size[1]*0.8 < h < self.skill_size[1]*1.2)
    
    def is_cooldown_active(self, skill_roi):
        # Preprocess for OCR
        processed = self.preprocess_skill(skill_roi)
        
        # OCR with EasyOCR
        results = self.ocr.readtext(processed, allowlist='0123456789',
                                   min_size=20, text_threshold=0.4)
        
        # Check for valid cooldown number
        return any(self.is_valid_cooldown(r[1]) for r in results)
    
    def preprocess_skill(self, img):
        # Enhance contrast and resize
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        enhanced = clahe.apply(gray)
        return cv2.resize(enhanced, (128, 128))
    
    def is_valid_cooldown(self, text):
        # Validate detected numbers
        try:
            num = int(text)
            return 1 <= num <= 300  # Reasonable cooldown range
        except:
            return False

# Usage
labeler = AutoLabelGenerator()
labeler.process_video("videos/1.mp4", "dataset")

# Create dataset.yaml
with open("dataset/dataset.yaml", 'w') as f:
    f.write("""path: dataset
train: images/train
val: images/val

names:
  0: cooldown
  1: ready

nc: 2
""")

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


error: OpenCV(4.11.0) :-1: error: (-5:Bad argument) in function 'inRange'
> Overload resolution failed:
>  - lowerb is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'lowerb'
