In [None]:
import os
import yaml
from ultralytics import YOLO
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import shutil
import random
from pathlib import Path

def analyze_image_size(image_path):
    """–ê–Ω–∞–ª–∏–∑–∏—Ä—É–µ—Ç —Ä–∞–∑–º–µ—Ä –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è –∏ –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç –Ω–æ—Ä–º–∞–ª–∏–∑–æ–≤–∞–Ω–Ω—ã–µ –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç—ã bbox"""
    img = cv2.imread(image_path)
    if img is None:
        return (0.5, 0.5, 0.8, 0.8)  # fallback values
    
    height, width = img.shape[:2]
    # –ü—Ä–µ–¥–ø–æ–ª–∞–≥–∞–µ–º, —á—Ç–æ –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç –∑–∞–Ω–∏–º–∞–µ—Ç —Ü–µ–Ω—Ç—Ä–∞–ª—å–Ω—É—é —á–∞—Å—Ç—å –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è
    bbox_width = 0.7 * width
    bbox_height = 0.7 * height
    x_center = width / 2
    y_center = height / 2
    
    # –ù–æ—Ä–º–∞–ª–∏–∑–æ–≤–∞–Ω–Ω—ã–µ –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç—ã –¥–ª—è YOLO
    x_center_norm = x_center / width
    y_center_norm = y_center / height
    width_norm = bbox_width / width
    height_norm = bbox_height / height
    
    return (x_center_norm, y_center_norm, width_norm, height_norm)

def copy_images_with_annotations(src_dir, output_dir, class_idx, split, images, is_single_tool=True):
    """–ö–æ–ø–∏—Ä—É–µ—Ç –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è –∏ —Å–æ–∑–¥–∞–µ—Ç –∞–Ω–Ω–æ—Ç–∞—Ü–∏–∏"""
    for img_name in images:
        try:
            # –ö–æ–ø–∏—Ä—É–µ–º –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ
            src_img = os.path.join(src_dir, img_name)
            dst_img = os.path.join(output_dir, 'images', split, img_name)
            shutil.copy2(src_img, dst_img)
            
            # –°–æ–∑–¥–∞–µ–º YOLO –∞–Ω–Ω–æ—Ç–∞—Ü–∏—é
            txt_name = os.path.splitext(img_name)[0] + '.txt'
            
            if is_single_tool:
                # –î–ª—è –æ–¥–∏–Ω–æ—á–Ω—ã—Ö –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–≤ —Å–æ–∑–¥–∞–µ–º bbox –ø–æ —Ü–µ–Ω—Ç—Ä—É
                bbox_coords = analyze_image_size(src_img)
                with open(os.path.join(output_dir, 'labels', split, txt_name), 'w') as f:
                    f.write(f"{class_idx} {bbox_coords[0]} {bbox_coords[1]} {bbox_coords[2]} {bbox_coords[3]}\n")
            else:
                # –î–ª—è –≥—Ä—É–ø–ø–æ–≤—ã—Ö –∏ –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–≤ —Å –ª–∏–Ω–µ–π–∫–æ–π - –ø—Ä–æ–≤–µ—Ä—è–µ–º —Å—É—â–µ—Å—Ç–≤—É—é—â–∏–µ –∞–Ω–Ω–æ—Ç–∞—Ü–∏–∏
                annotation_path = os.path.join(src_dir, txt_name)
                if os.path.exists(annotation_path):
                    # –ö–æ–ø–∏—Ä—É–µ–º —Å—É—â–µ—Å—Ç–≤—É—é—â—É—é –∞–Ω–Ω–æ—Ç–∞—Ü–∏—é
                    dst_txt = os.path.join(output_dir, 'labels', split, txt_name)
                    shutil.copy2(annotation_path, dst_txt)
                else:
                    # –°–æ–∑–¥–∞–µ–º –ø—É—Å—Ç—É—é –∞–Ω–Ω–æ—Ç–∞—Ü–∏—é (—Ç—Ä–µ–±—É–µ—Ç —Ä—É—á–Ω–æ–π —Ä–∞–∑–º–µ—Ç–∫–∏)
                    dst_txt = os.path.join(output_dir, 'labels', split, txt_name)
                    open(dst_txt, 'w').close()
                    print(f"  ‚ö† –°–æ–∑–¥–∞–Ω–∞ –ø—É—Å—Ç–∞—è –∞–Ω–Ω–æ—Ç–∞—Ü–∏—è –¥–ª—è {img_name} (—Ç—Ä–µ–±—É–µ—Ç —Ä—É—á–Ω–æ–π —Ä–∞–∑–º–µ—Ç–∫–∏)")
                        
        except Exception as e:
            print(f"‚ùå –û—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ {img_name}: {e}")

def process_single_tools(dataset_path, output_dir, classes):
    """–û–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ—Ç –ø–∞–ø–∫–∏ —Å –æ—Ç–¥–µ–ª—å–Ω—ã–º–∏ –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–∞–º–∏"""
    print("\n=== –û–±—Ä–∞–±–æ—Ç–∫–∞ –æ—Ç–¥–µ–ª—å–Ω—ã—Ö –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–≤ ===")
    
    total_images = 0
    for class_idx, class_name in enumerate(classes):
        class_path = os.path.join(dataset_path, class_name)
        
        if not os.path.exists(class_path):
            print(f"‚ö† –ü—Ä–µ–¥—É–ø—Ä–µ–∂–¥–µ–Ω–∏–µ: –ø–∞–ø–∫–∞ {class_path} –Ω–µ —Å—É—â–µ—Å—Ç–≤—É–µ—Ç!")
            continue
            
        images = [f for f in os.listdir(class_path) 
                 if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff'))]
        
        print(f"{class_name}: {len(images)} –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π")
        
        if len(images) == 0:
            print(f"‚ö† –ü—Ä–µ–¥—É–ø—Ä–µ–∂–¥–µ–Ω–∏–µ: –≤ –ø–∞–ø–∫–µ {class_name} –Ω–µ—Ç –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π!")
            continue
        
        # –†–∞–∑–¥–µ–ª—è–µ–º –Ω–∞ train/val
        if len(images) == 1:
            train_imgs = images
            val_imgs = []
        else:
            train_imgs, val_imgs = train_test_split(images, test_size=0.2, random_state=42, shuffle=True)
        
        print(f"  Train: {len(train_imgs)}, Val: {len(val_imgs)}")
        total_images += len(images)
        
        # –ö–æ–ø–∏—Ä—É–µ–º –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è –∏ —Å–æ–∑–¥–∞–µ–º –∞–Ω–Ω–æ—Ç–∞—Ü–∏–∏
        copy_images_with_annotations(class_path, output_dir, class_idx, 'train', train_imgs, is_single_tool=True)
        copy_images_with_annotations(class_path, output_dir, class_idx, 'val', val_imgs, is_single_tool=True)
    
    print(f"‚úì –û–±—Ä–∞–±–æ—Ç–∞–Ω–æ –æ—Ç–¥–µ–ª—å–Ω—ã—Ö –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–≤: {total_images} –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π")
    return total_images

def process_tools_with_ruler(dataset_path, output_dir, ruler_folder, classes):
    """–û–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ—Ç –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç—ã —Å –ª–∏–Ω–µ–π–∫–æ–π"""
    print(f"\n=== –û–±—Ä–∞–±–æ—Ç–∫–∞ –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–≤ —Å –ª–∏–Ω–µ–π–∫–æ–π: {ruler_folder} ===")
    
    ruler_path = os.path.join(dataset_path, ruler_folder)
    
    if not os.path.exists(ruler_path):
        print(f"‚ö† –ü—Ä–µ–¥—É–ø—Ä–µ–∂–¥–µ–Ω–∏–µ: –ø–∞–ø–∫–∞ {ruler_path} –Ω–µ —Å—É—â–µ—Å—Ç–≤—É–µ—Ç!")
        return 0
    
    # –°–æ–∑–¥–∞–µ–º –º–∞–ø–ø–∏–Ω–≥ –∏–º–µ–Ω –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–≤ –∫ –∫–ª–∞—Å—Å–∞–º
    class_mapping = {}
    for class_idx, class_name in enumerate(classes):
        class_mapping[class_name] = class_idx
    
    total_images = 0
    images = [f for f in os.listdir(ruler_path) 
             if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff'))]
    
    print(f"–ù–∞–π–¥–µ–Ω–æ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π —Å –ª–∏–Ω–µ–π–∫–æ–π: {len(images)}")
    
    if len(images) == 0:
        print("‚ö† –í –ø–∞–ø–∫–µ —Å –ª–∏–Ω–µ–π–∫–æ–π –Ω–µ—Ç –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π!")
        return 0
    
    # –†–∞–∑–¥–µ–ª—è–µ–º –Ω–∞ train/val
    if len(images) == 1:
        train_imgs = images
        val_imgs = []
    else:
        train_imgs, val_imgs = train_test_split(images, test_size=0.2, random_state=42, shuffle=True)
    
    # –û–ø—Ä–µ–¥–µ–ª—è–µ–º –∫–ª–∞—Å—Å –¥–ª—è –∫–∞–∂–¥–æ–≥–æ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è –ø–æ –∏–º–µ–Ω–∏ —Ñ–∞–π–ª–∞
    for split, imgs in [('train', train_imgs), ('val', val_imgs)]:
        for img_name in imgs:
            try:
                # –û–ø—Ä–µ–¥–µ–ª—è–µ–º –∫–ª–∞—Å—Å –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–∞ –ø–æ –∏–º–µ–Ω–∏ —Ñ–∞–π–ª–∞
                tool_class = None
                for class_name in classes:
                    if class_name.lower() in img_name.lower():
                        tool_class = class_mapping[class_name]
                        break
                
                if tool_class is None:
                    print(f"‚ö† –ù–µ —É–¥–∞–ª–æ—Å—å –æ–ø—Ä–µ–¥–µ–ª–∏—Ç—å –∫–ª–∞—Å—Å –¥–ª—è {img_name}")
                    continue
                
                # –ö–æ–ø–∏—Ä—É–µ–º –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ
                src_img = os.path.join(ruler_path, img_name)
                dst_img = os.path.join(output_dir, 'images', split, img_name)
                shutil.copy2(src_img, dst_img)
                
                # –°–æ–∑–¥–∞–µ–º –∞–Ω–Ω–æ—Ç–∞—Ü–∏—é
                txt_name = os.path.splitext(img_name)[0] + '.txt'
                copy_images_with_annotations(ruler_path, output_dir, tool_class, split, [img_name], is_single_tool=False)
                
                total_images += 1
                
            except Exception as e:
                print(f"‚ùå –û—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ {img_name}: {e}")
    
    print(f"‚úì –û–±—Ä–∞–±–æ—Ç–∞–Ω–æ –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–≤ —Å –ª–∏–Ω–µ–π–∫–æ–π: {total_images} –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π")
    return total_images

def process_group_photos(dataset_path, output_dir, group_folders, classes):
    """–û–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ—Ç –≥—Ä—É–ø–ø–æ–≤—ã–µ —Ñ–æ—Ç–æ–≥—Ä–∞—Ñ–∏–∏ –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–≤"""
    print("\n=== –û–±—Ä–∞–±–æ—Ç–∫–∞ –≥—Ä—É–ø–ø–æ–≤—ã—Ö —Ñ–æ—Ç–æ ===")
    
    total_images = 0
    for group_folder in group_folders:
        group_path = os.path.join(dataset_path, group_folder)
        
        if not os.path.exists(group_path):
            print(f"‚ö† –ü—Ä–µ–¥—É–ø—Ä–µ–∂–¥–µ–Ω–∏–µ: –ø–∞–ø–∫–∞ {group_path} –Ω–µ —Å—É—â–µ—Å—Ç–≤—É–µ—Ç!")
            continue
            
        images = [f for f in os.listdir(group_path) 
                 if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff'))]
        
        print(f"{group_folder}: {len(images)} –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π")
        
        if len(images) == 0:
            print(f"‚ö† –ü—Ä–µ–¥—É–ø—Ä–µ–∂–¥–µ–Ω–∏–µ: –≤ –ø–∞–ø–∫–µ {group_folder} –Ω–µ—Ç –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π!")
            continue
        
        # –†–∞–∑–¥–µ–ª—è–µ–º –Ω–∞ train/val
        if len(images) == 1:
            train_imgs = images
            val_imgs = []
        else:
            train_imgs, val_imgs = train_test_split(images, test_size=0.2, random_state=42, shuffle=True)
        
        # –ö–æ–ø–∏—Ä—É–µ–º –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è –∏ –∞–Ω–Ω–æ—Ç–∞—Ü–∏–∏
        for split, imgs in [('train', train_imgs), ('val', val_imgs)]:
            for img_name in imgs:
                try:
                    # –ö–æ–ø–∏—Ä—É–µ–º –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ
                    src_img = os.path.join(group_path, img_name)
                    dst_img = os.path.join(output_dir, 'images', split, img_name)
                    shutil.copy2(src_img, dst_img)
                    
                    # –ö–æ–ø–∏—Ä—É–µ–º –∞–Ω–Ω–æ—Ç–∞—Ü–∏—é (–µ—Å–ª–∏ —Å—É—â–µ—Å—Ç–≤—É–µ—Ç)
                    txt_name = os.path.splitext(img_name)[0] + '.txt'
                    annotation_path = os.path.join(group_path, txt_name)
                    
                    if os.path.exists(annotation_path):
                        dst_txt = os.path.join(output_dir, 'labels', split, txt_name)
                        shutil.copy2(annotation_path, dst_txt)
                        print(f"  ‚úì –î–æ–±–∞–≤–ª–µ–Ω–∞ –∞–Ω–Ω–æ—Ç–∞—Ü–∏—è –¥–ª—è {img_name}")
                    else:
                        # –°–æ–∑–¥–∞–µ–º –ø—É—Å—Ç—É—é –∞–Ω–Ω–æ—Ç–∞—Ü–∏—é
                        dst_txt = os.path.join(output_dir, 'labels', split, txt_name)
                        open(dst_txt, 'w').close()
                        print(f"  ‚ö† –°–æ–∑–¥–∞–Ω–∞ –ø—É—Å—Ç–∞—è –∞–Ω–Ω–æ—Ç–∞—Ü–∏—è –¥–ª—è {img_name} (—Ç—Ä–µ–±—É–µ—Ç —Ä—É—á–Ω–æ–π —Ä–∞–∑–º–µ—Ç–∫–∏)")
                    
                    total_images += 1
                    
                except Exception as e:
                    print(f"‚ùå –û—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ –≥—Ä—É–ø–ø–æ–≤–æ–≥–æ —Ñ–æ—Ç–æ {img_name}: {e}")
    
    print(f"‚úì –û–±—Ä–∞–±–æ—Ç–∞–Ω–æ –≥—Ä—É–ø–ø–æ–≤—ã—Ö —Ñ–æ—Ç–æ: {total_images} –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π")
    return total_images

def check_dataset_stats(output_dir):
    """–ü—Ä–æ–≤–µ—Ä—è–µ—Ç —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫—É –ø–æ–¥–≥–æ—Ç–æ–≤–ª–µ–Ω–Ω–æ–≥–æ –¥–∞—Ç–∞—Å–µ—Ç–∞"""
    print("\n=== –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞ –¥–∞—Ç–∞—Å–µ—Ç–∞ ===")
    
    train_images = len(os.listdir(os.path.join(output_dir, 'images', 'train')))
    val_images = len(os.listdir(os.path.join(output_dir, 'images', 'val')))
    train_labels = len(os.listdir(os.path.join(output_dir, 'labels', 'train')))
    val_labels = len(os.listdir(os.path.join(output_dir, 'labels', 'val')))
    
    print(f"‚úì –¢—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã–µ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è: {train_images}")
    print(f"‚úì –í–∞–ª–∏–¥–∞—Ü–∏–æ–Ω–Ω—ã–µ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è: {val_images}")
    print(f"‚úì –¢—Ä–µ–Ω–∏—Ä–æ–≤–æ—á–Ω—ã–µ –∞–Ω–Ω–æ—Ç–∞—Ü–∏–∏: {train_labels}")
    print(f"‚úì –í–∞–ª–∏–¥–∞—Ü–∏–æ–Ω–Ω—ã–µ –∞–Ω–Ω–æ—Ç–∞—Ü–∏–∏: {val_labels}")
    print(f"‚úì –û–±—â–µ–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π: {train_images + val_images}")
    
    # –ü—Ä–æ–≤–µ—Ä—è–µ–º –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –æ–±—ä–µ–∫—Ç–æ–≤ –≤ –∞–Ω–Ω–æ—Ç–∞—Ü–∏—è—Ö
    total_objects = 0
    for split in ['train', 'val']:
        labels_dir = os.path.join(output_dir, 'labels', split)
        for label_file in os.listdir(labels_dir):
            if label_file.endswith('.txt'):
                with open(os.path.join(labels_dir, label_file), 'r') as f:
                    objects = len(f.readlines())
                    total_objects += objects
    
    print(f"‚úì –í—Å–µ–≥–æ –æ–±—ä–µ–∫—Ç–æ–≤ –≤ –∞–Ω–Ω–æ—Ç–∞—Ü–∏—è—Ö: {total_objects}")

def prepare_yolo_dataset(dataset_path, output_dir='yolo_dataset'):
    """
    –ü–æ–¥–≥–æ—Ç–∞–≤–ª–∏–≤–∞–µ—Ç –¥–∞—Ç–∞—Å–µ—Ç –≤ —Ñ–æ—Ä–º–∞—Ç–µ YOLO –¥–ª—è –≤—Å–µ—Ö —Ç–∏–ø–æ–≤ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π
    """
    # –ü–æ–ª—É—á–∞–µ–º –≤—Å–µ –ø–∞–ø–∫–∏
    all_folders = sorted([d for d in os.listdir(dataset_path) 
                         if os.path.isdir(os.path.join(dataset_path, d))])
    
    print(f"–ù–∞–π–¥–µ–Ω–æ –ø–∞–ø–æ–∫: {len(all_folders)}")
    print("–í—Å–µ –ø–∞–ø–∫–∏:", all_folders)
    
    # –°–æ–∑–¥–∞–µ–º –¥–∏—Ä–µ–∫—Ç–æ—Ä–∏–∏
    for split in ['train', 'val']:
        os.makedirs(os.path.join(output_dir, 'images', split), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'labels', split), exist_ok=True)
    
    # –û–ø—Ä–µ–¥–µ–ª—è–µ–º —Ç–∏–ø—ã –ø–∞–ø–æ–∫
    tools_folders = [folder for folder in all_folders 
                    if '–≥—Ä—É–ø–ø–æ–≤—ã–µ' not in folder.lower() 
                    and '–ª–∏–Ω–µ–π–∫' not in folder.lower()
                    and 'group' not in folder.lower()
                    and 'ruler' not in folder.lower()]
    
    ruler_folders = [folder for folder in all_folders 
                    if '–ª–∏–Ω–µ–π–∫' in folder.lower() 
                    or 'ruler' in folder.lower()]
    
    group_folders = [folder for folder in all_folders 
                    if '–≥—Ä—É–ø–ø–æ–≤—ã–µ' in folder.lower() 
                    or 'group' in folder.lower()]
    
    print(f"‚úì –ü–∞–ø–∫–∏ —Å –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–∞–º–∏: {len(tools_folders)}")
    print("–ò–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç—ã:", tools_folders)
    print(f"‚úì –ü–∞–ø–∫–∏ —Å –ª–∏–Ω–µ–π–∫–æ–π: {len(ruler_folders)}")
    print("–õ–∏–Ω–µ–π–∫–∞:", ruler_folders)
    print(f"‚úì –ì—Ä—É–ø–ø–æ–≤—ã–µ –ø–∞–ø–∫–∏: {len(group_folders)}")
    print("–ì—Ä—É–ø–ø–æ–≤—ã–µ:", group_folders)
    
    # –°–æ—Ö—Ä–∞–Ω—è–µ–º –∫–ª–∞—Å—Å—ã –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç–æ–≤
    with open(os.path.join(output_dir, 'classes.txt'), 'w') as f:
        for i, class_name in enumerate(tools_folders):
            f.write(f"{class_name}\n")
    
    # –û–±—Ä–∞–±–∞—Ç—ã–≤–∞–µ–º –≤—Å–µ —Ç–∏–ø—ã –¥–∞–Ω–Ω—ã—Ö
    single_count = process_single_tools(dataset_path, output_dir, tools_folders)
    ruler_count = 0
    for ruler_folder in ruler_folders:
        ruler_count += process_tools_with_ruler(dataset_path, output_dir, ruler_folder, tools_folders)
    
    group_count = process_group_photos(dataset_path, output_dir, group_folders, tools_folders)
    
    # –ü—Ä–æ–≤–µ—Ä—è–µ–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç
    check_dataset_stats(output_dir)
    
    print(f"\nüìä –ò–¢–û–ì–û–í–ê–Ø –°–¢–ê–¢–ò–°–¢–ò–ö–ê:")
    print(f"  –û–¥–∏–Ω–æ—á–Ω—ã–µ –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç—ã: {single_count}")
    print(f"  –ò–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç—ã —Å –ª–∏–Ω–µ–π–∫–æ–π: {ruler_count}")
    print(f"  –ì—Ä—É–ø–ø–æ–≤—ã–µ —Ñ–æ—Ç–æ: {group_count}")
    print(f"  –í—Å–µ–≥–æ: {single_count + ruler_count + group_count}")
    
    return tools_folders

def create_dataset_config(output_dir='yolo_dataset', output_yaml='dataset.yaml'):
    """
    –°–æ–∑–¥–∞–µ—Ç –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏–æ–Ω–Ω—ã–π —Ñ–∞–π–ª –¥–ª—è –¥–∞—Ç–∞—Å–µ—Ç–∞ YOLO
    """
    # –ß–∏—Ç–∞–µ–º –∫–ª–∞—Å—Å—ã –∏–∑ —Ñ–∞–π–ª–∞
    classes_path = os.path.join(output_dir, 'classes.txt')
    if not os.path.exists(classes_path):
        print(f"–û—à–∏–±–∫–∞: —Ñ–∞–π–ª {classes_path} –Ω–µ –Ω–∞–π–¥–µ–Ω!")
        return None, None
    
    with open(classes_path, 'r') as f:
        classes = [line.strip() for line in f.readlines()]
    
    # –°–æ–∑–¥–∞–µ–º —Å–ª–æ–≤–∞—Ä—å —Å –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏–µ–π
    config = {
        'path': os.path.abspath(output_dir),
        'train': 'images/train',
        'val': 'images/val',
        'nc': len(classes),
        'names': {i: class_name for i, class_name in enumerate(classes)}
    }
    
    # –°–æ—Ö—Ä–∞–Ω—è–µ–º –≤ YAML —Ñ–∞–π–ª
    with open(output_yaml, 'w') as f:
        yaml.dump(config, f, default_flow_style=False, allow_unicode=True)
    
    print(f"‚úì –°–æ–∑–¥–∞–Ω –∫–æ–Ω—Ñ–∏–≥ —Ñ–∞–π–ª: {output_yaml}")
    print(f"‚úì –ü—É—Ç—å –∫ –¥–∞–Ω–Ω—ã–º: {os.path.abspath(output_dir)}")
    print(f"‚úì –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –∫–ª–∞—Å—Å–æ–≤: {len(classes)}")
    print(f"‚úì –ö–ª–∞—Å—Å—ã: {classes}")
    return config, classes

def train_yolo_model(output_dir='yolo_dataset', model_size='s', epochs=100):
    """
    –û–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–∏ YOLOv8 —Å —É–ª—É—á—à–µ–Ω–Ω—ã–º–∏ –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏
    """
    # –°–æ–∑–¥–∞–µ–º –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏–æ–Ω–Ω—ã–π —Ñ–∞–π–ª
    config, classes = create_dataset_config(output_dir)
    
    if config is None:
        raise ValueError("–ù–µ —É–¥–∞–ª–æ—Å—å —Å–æ–∑–¥–∞—Ç—å –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏–æ–Ω–Ω—ã–π —Ñ–∞–π–ª")
    
    # –ó–∞–≥—Ä—É–∂–∞–µ–º –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–Ω—É—é –º–æ–¥–µ–ª—å
    print(f"üöÄ –ó–∞–≥—Ä—É–∑–∫–∞ –º–æ–¥–µ–ª–∏ YOLOv8{model_size}.pt...")
    model = YOLO(f'yolov8{model_size}.pt')
    
    # –†–∞—Å—à–∏—Ä–µ–Ω–Ω—ã–µ –ø–∞—Ä–∞–º–µ—Ç—Ä—ã –æ–±—É—á–µ–Ω–∏—è
    train_params = {
        'data': 'dataset.yaml',
        'epochs': epochs,
        'imgsz': 640,
        'batch': 16,
        'name': f'yolov8{model_size}_tools_detection',
        'patience': 20,
        'optimizer': 'AdamW',
        'lr0': 0.001,
        'lrf': 0.01,
        'momentum': 0.937,
        'weight_decay': 0.0005,
        'augment': True,
        'hsv_h': 0.015,
        'hsv_s': 0.7,
        'hsv_v': 0.4,
        'degrees': 45.0,
        'translate': 0.1,
        'scale': 0.5,
        'shear': 0.0,
        'perspective': 0.0,
        'flipud': 0.0,
        'fliplr': 0.5,
        'mosaic': 1.0,
        'mixup': 0.1,
        'copy_paste': 0.1,
        'erasing': 0.4,
        'dropout': 0.1,
        'val': True,
        'save': True,
        'save_period': 10,
        'device': 'cpu',  # –ú–æ–∂–Ω–æ –∏–∑–º–µ–Ω–∏—Ç—å –Ω–∞ 'cuda' –∏–ª–∏ 0 –¥–ª—è GPU
        'workers': 8,
        'single_cls': False,
        'verbose': True,
        'exist_ok': True
    }
    
    print("üéØ –ù–∞—á–∏–Ω–∞–µ–º –æ–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–∏...")
    print(f"üìä –ü–∞—Ä–∞–º–µ—Ç—Ä—ã –æ–±—É—á–µ–Ω–∏—è: {epochs} —ç–ø–æ—Ö, —Ä–∞–∑–º–µ—Ä –±–∞—Ç—á–∞: 16")
    print(f"üìÅ –î–∞–Ω–Ω—ã–µ: {config['path']}")
    
    # –û–±—É—á–∞–µ–º –º–æ–¥–µ–ª—å
    results = model.train(**train_params)
    
    print("‚úÖ –û–±—É—á–µ–Ω–∏–µ –∑–∞–≤–µ—Ä—à–µ–Ω–æ!")
    return model, results

def export_model_to_onnx(model, output_path='yolov8_tools.onnx'):
    """
    –≠–∫—Å–ø–æ—Ä—Ç –º–æ–¥–µ–ª–∏ –≤ —Ñ–æ—Ä–º–∞—Ç ONNX —Å –æ–ø—Ç–∏–º–∏–∑–∞—Ü–∏–µ–π
    """
    print("üì§ –≠–∫—Å–ø–æ—Ä—Ç –º–æ–¥–µ–ª–∏ –≤ ONNX...")
    
    # –ü–æ–ª—É—á–∞–µ–º –ø—É—Ç—å –∫ –ª—É—á—à–µ–π –º–æ–¥–µ–ª–∏
    model_path = model.trainer.best if hasattr(model.trainer, 'best') else 'runs/detect/yolov8s_tools_detection/weights/best.pt'
    
    if os.path.exists(model_path):
        # –ó–∞–≥—Ä—É–∂–∞–µ–º –ª—É—á—à—É—é –º–æ–¥–µ–ª—å –¥–ª—è —ç–∫—Å–ø–æ—Ä—Ç–∞
        best_model = YOLO(model_path)
        best_model.export(format='onnx', imgsz=640, simplify=True, dynamic=True, opset=12)
        print(f"‚úÖ –ú–æ–¥–µ–ª—å —ç–∫—Å–ø–æ—Ä—Ç–∏—Ä–æ–≤–∞–Ω–∞ –≤: {output_path}")
    else:
        # –≠–∫—Å–ø–æ—Ä—Ç–∏—Ä—É–µ–º —Ç–µ–∫—É—â—É—é –º–æ–¥–µ–ª—å
        model.export(format='onnx', imgsz=640, simplify=True, dynamic=True, opset=12)
        print(f"‚úÖ –ú–æ–¥–µ–ª—å —ç–∫—Å–ø–æ—Ä—Ç–∏—Ä–æ–≤–∞–Ω–∞ –≤: {output_path}")

def evaluate_model(model, data_path='yolo_dataset'):
    """
    –û—Ü–µ–Ω–∫–∞ –º–æ–¥–µ–ª–∏ –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö
    """
    print("üìä –û—Ü–µ–Ω–∫–∞ –º–æ–¥–µ–ª–∏...")
    
    try:
        metrics = model.val(data=os.path.join(data_path, 'dataset.yaml'), split='val')
        
        print("üìà –†–µ–∑—É–ª—å—Ç–∞—Ç—ã –æ—Ü–µ–Ω–∫–∏:")
        print(f"  mAP50: {metrics.box.map50:.4f}")
        print(f"  mAP50-95: {metrics.box.map:.4f}")
        print(f"  Precision: {metrics.box.mp:.4f}")
        print(f"  Recall: {metrics.box.mr:.4f}")
        
        return metrics
    except Exception as e:
        print(f"‚ùå –û—à–∏–±–∫–∞ –ø—Ä–∏ –æ—Ü–µ–Ω–∫–µ –º–æ–¥–µ–ª–∏: {e}")
        return None

def predict_on_image(model, image_path, conf_threshold=0.25, iou_threshold=0.45):
    """
    –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –æ–¥–Ω–æ–º –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–∏ —Å —É–ª—É—á—à–µ–Ω–Ω–æ–π –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–µ–π
    """
    # –ü—Ä–æ–≤–µ—Ä—è–µ–º —Å—É—â–µ—Å—Ç–≤–æ–≤–∞–Ω–∏–µ —Ñ–∞–π–ª–∞
    if not os.path.exists(image_path):
        print(f"‚ùå –ò–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ –Ω–µ –Ω–∞–π–¥–µ–Ω–æ: {image_path}")
        return None
    
    # –í—ã–ø–æ–ª–Ω—è–µ–º –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ
    results = model(image_path, conf=conf_threshold, iou=iou_threshold, augment=False)
    
    # –í–∏–∑—É–∞–ª–∏–∑–∏—Ä—É–µ–º —Ä–µ–∑—É–ª—å—Ç–∞—Ç—ã
    for i, result in enumerate(results):
        # –†–∏—Å—É–µ–º bounding boxes —Å —É–ª—É—á—à–µ–Ω–Ω–æ–π –≤–∏–∑—É–∞–ª–∏–∑–∞—Ü–∏–µ–π
        img = result.plot(line_width=2, font_size=1.0, conf=True, labels=True)
        
        # –°–æ–∑–¥–∞–µ–º –æ–∫–Ω–æ —Å —Ñ–∏–∫—Å–∏—Ä–æ–≤–∞–Ω–Ω—ã–º —Ä–∞–∑–º–µ—Ä–æ–º
        height, width = img.shape[:2]
        max_display_size = 1200
        if max(height, width) > max_display_size:
            scale = max_display_size / max(height, width)
            new_width = int(width * scale)
            new_height = int(height * scale)
            img = cv2.resize(img, (new_width, new_height))
        
        # –ü–æ–∫–∞–∑—ã–≤–∞–µ–º –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ
        cv2.imshow('–ò–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç—ã - –æ–±–Ω–∞—Ä—É–∂–µ–Ω–∏–µ', img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
        # –î–µ—Ç–∞–ª—å–Ω–∞—è –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—è –æ–± –æ–±–Ω–∞—Ä—É–∂–µ–Ω–Ω—ã—Ö –æ–±—ä–µ–∫—Ç–∞—Ö
        print(f"\nüîç –†–µ–∑—É–ª—å—Ç–∞—Ç—ã –æ–±–Ω–∞—Ä—É–∂–µ–Ω–∏—è –¥–ª—è –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è {i+1}:")
        if len(result.boxes) > 0:
            for j, box in enumerate(result.boxes):
                class_id = int(box.cls[0])
                confidence = float(box.conf[0])
                bbox = box.xyxy[0].cpu().numpy()
                class_name = model.names[class_id]
                print(f"  –û–±—ä–µ–∫—Ç {j+1}: {class_name} "
                      f"(—É–≤–µ—Ä–µ–Ω–Ω–æ—Å—Ç—å: {confidence:.3f}) "
                      f"BBox: {bbox.astype(int)}")
        else:
            print("  –û–±—ä–µ–∫—Ç—ã –Ω–µ –æ–±–Ω–∞—Ä—É–∂–µ–Ω—ã")
    
    return results

def predict_on_folder(model, folder_path, conf_threshold=0.25):
    """
    –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–µ –Ω–∞ –≤—Å–µ—Ö –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è—Ö –≤ –ø–∞–ø–∫–µ
    """
    if not os.path.exists(folder_path):
        print(f"‚ùå –ü–∞–ø–∫–∞ –Ω–µ –Ω–∞–π–¥–µ–Ω–∞: {folder_path}")
        return
    
    image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.tiff')
    images = [f for f in os.listdir(folder_path) if f.lower().endswith(image_extensions)]
    
    print(f"üîç –ù–∞–π–¥–µ–Ω–æ {len(images)} –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π –≤ –ø–∞–ø–∫–µ {folder_path}")
    
    for i, img_name in enumerate(images):
        img_path = os.path.join(folder_path, img_name)
        print(f"\nüìÑ –û–±—Ä–∞–±–æ—Ç–∫–∞ {i+1}/{len(images)}: {img_name}")
        predict_on_image(model, img_path, conf_threshold)

def main():
    # –ü—É—Ç–∏ –∫ –¥–∞–Ω–Ω—ã–º
    dataset_path = '/data/vscode/HacatonAeroflot/Aeroflot-project/datasets/raw'
    output_dir = '/data/vscode/HacatonAeroflot/Aeroflot-project/yolo_dataset'
    
    try:
        # –ü—Ä–æ–≤–µ—Ä—è–µ–º —Å—É—â–µ—Å—Ç–≤–æ–≤–∞–Ω–∏–µ –ø—É—Ç–∏
        if not os.path.exists(dataset_path):
            print(f"‚ùå –û—à–∏–±–∫–∞: –ø—É—Ç—å {dataset_path} –Ω–µ —Å—É—â–µ—Å—Ç–≤—É–µ—Ç!")
            return
        
        print("=" * 60)
        print("üõ†Ô∏è  –°–ò–°–¢–ï–ú–ê –û–ë–ù–ê–†–£–ñ–ï–ù–ò–Ø –ò–ù–°–¢–†–£–ú–ï–ù–¢–û–í (–£–õ–£–ß–®–ï–ù–ù–ê–Ø)")
        print("=" * 60)
        
        # –®–∞–≥ 1: –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –¥–∞—Ç–∞—Å–µ—Ç–∞
        print("\nüìÅ –®–ê–ì 1: –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –¥–∞—Ç–∞—Å–µ—Ç–∞...")
        classes = prepare_yolo_dataset(dataset_path, output_dir)
        
        # –®–∞–≥ 2: –°–æ–∑–¥–∞–Ω–∏–µ –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏–∏
        print("\n‚öôÔ∏è  –®–ê–ì 2: –°–æ–∑–¥–∞–Ω–∏–µ –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏–∏...")
        config, classes = create_dataset_config(output_dir)
        
        if config is None:
            raise ValueError("–ù–µ —É–¥–∞–ª–æ—Å—å —Å–æ–∑–¥–∞—Ç—å –∫–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏—é –¥–∞—Ç–∞—Å–µ—Ç–∞")
        
        # –®–∞–≥ 3: –û–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–∏
        print("\nüéì –®–ê–ì 3: –û–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–∏...")
        model, results = train_yolo_model(output_dir, model_size='s', epochs=100)
        
        # –®–∞–≥ 4: –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –º–æ–¥–µ–ª–∏
        print("\nüíæ –®–ê–ì 4: –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –º–æ–¥–µ–ª–∏...")
        model.save('best_tools_detection.pt')
        print("‚úÖ –ú–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞ –∫–∞–∫ 'best_tools_detection.pt'")
        
        # –®–∞–≥ 5: –≠–∫—Å–ø–æ—Ä—Ç –≤ ONNX
        print("\nüì§ –®–ê–ì 5: –≠–∫—Å–ø–æ—Ä—Ç –≤ ONNX...")
        export_model_to_onnx(model)
        
        # –®–∞–≥ 6: –û—Ü–µ–Ω–∫–∞ –º–æ–¥–µ–ª–∏
        print("\nüìä –®–ê–ì 6: –û—Ü–µ–Ω–∫–∞ –º–æ–¥–µ–ª–∏...")
        metrics = evaluate_model(model, output_dir)
        
        # –®–∞–≥ 7: –¢–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ –Ω–∞ —Ä–∞–∑–Ω—ã—Ö —Ç–∏–ø–∞—Ö –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π
        print("\nüß™ –®–ê–ì 7: –¢–µ—Å—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ –º–æ–¥–µ–ª–∏ –Ω–∞ —Ä–∞–∑–Ω—ã—Ö —Ç–∏–ø–∞—Ö –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π...")
        
        test_types = ['val']  # –ú–æ–∂–Ω–æ –¥–æ–±–∞–≤–∏—Ç—å 'train' –¥–ª—è –±–æ–ª—å—à–µ–≥–æ –∫–æ–ª–∏—á–µ—Å—Ç–≤–∞ —Ç–µ—Å—Ç–æ–≤
        
        for split in test_types:
            test_dir = os.path.join(output_dir, 'images', split)
            if os.path.exists(test_dir):
                test_images = os.listdir(test_dir)
                if test_images:
                    # –¢–µ—Å—Ç–∏—Ä—É–µ–º –Ω–∞ –Ω–µ—Å–∫–æ–ª—å–∫–∏—Ö –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è—Ö
                    for i, test_img in enumerate(test_images[:3]):  # –ü–µ—Ä–≤—ã–µ 3 –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è
                        test_image_path = os.path.join(test_dir, test_img)
                        print(f"\nüîç –¢–µ—Å—Ç {i+1}: {test_img}")
                        predict_on_image(model, test_image_path)
        
        print("\n" + "=" * 60)
        print("‚úÖ –í–°–ï –≠–¢–ê–ü–´ –ó–ê–í–ï–†–®–ï–ù–´ –£–°–ü–ï–®–ù–û!")
        print("=" * 60)
        
    except Exception as e:
        print(f"\n‚ùå –ö–†–ò–¢–ò–ß–ï–°–ö–ê–Ø –û–®–ò–ë–ö–ê: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()