In [None]:
import time
from tqdm import tqdm
from pathlib import Path
import shutil

def create_yolo_dataset_fast():
    
    print("🚀 Starting FAST conversion...")
    start_time = time.time()
    
    # Paths
    source_base = Path('./CAFD')
    target_base = Path('./CAFD_YOLO')
    
    # Create directories
    for split in ['train', 'val', 'test']:
        (target_base / split / 'images').mkdir(parents=True, exist_ok=True)
        (target_base / split / 'labels').mkdir(parents=True, exist_ok=True)
    
    # Class names
    class_names = [
        "achichuk", "airan-katyk", "asip", "bauyrsak", "beshbarmak-w-kazy",
        "beshbarmak-wo-kazy", "chak-chak", "cheburek", "doner-lavash", "doner-nan",
        "hvorost", "irimshik", "kattama-nan", "kazy-karta", "kurt", "kuyrdak",
        "kymyz-kymyran", "lagman-fried", "lagman-w-soup", "lagman-wo-soup", "manty",
        "naryn", "nauryz-kozhe", "orama", "plov", "samsa", "shashlyk-chicken",
        "shashlyk-chicken-v", "shashlyk-kuskovoi", "shashlyk-kuskovoi-v",
        "shashlyk-minced-meat", "sheep-head", "shelpek", "shorpa", "soup-plain",
        "sushki", "suzbe", "taba-nan", "talkan-zhent", "tushpara-fried",
        "tushpara-w-soup", "tushpara-wo-soup"
    ]
    
    class_to_id = {name: idx for idx, name in enumerate(class_names)}
    
    def process_split_fast(source_path, target_images, target_labels, split_name):
        if not source_path.exists():
            print(f"⚠️  {split_name} not found")
            return 0, 0
        
        total_files = sum(len(list((source_path / class_name).glob('*.jpg'))) 
                         for class_name in class_names 
                         if (source_path / class_name).exists())
        
        if total_files == 0:
            return 0, 0
        
        print(f"📊 {split_name}: {total_files} images")
        
        image_count = 0
        skipped = 0
        
        # Progress bar
        pbar = tqdm(total=total_files, desc=f"Processing {split_name}", unit="img")
        
        for class_name in class_names:
            class_path = source_path / class_name
            if not class_path.exists():
                continue
                
            class_id = class_to_id[class_name]
            
            for img_file in class_path.glob('*.jpg'):
                target_img = target_images / img_file.name
                target_txt = target_labels / f"{img_file.stem}.txt"
                
                # Skip if already exists
                if target_img.exists() and target_txt.exists():
                    skipped += 1
                    pbar.update(1)
                    continue
                
                # Copy image
                shutil.copy2(img_file, target_img)
                
                # Create annotation
                with open(target_txt, 'w') as f:
                    f.write(f"{class_id} 0.5 0.5 1.0 1.0\n")
                
                image_count += 1
                pbar.update(1)
        
        pbar.close()
        print(f"✅ {split_name}: {image_count} new, {skipped} skipped")
        return image_count, image_count
    
    # Process splits
    train_imgs, _ = process_split_fast(
        source_base / 'train', 
        target_base / 'train' / 'images',
        target_base / 'train' / 'labels',
        "train"
    )
    
    val_imgs, _ = process_split_fast(
        source_base / 'val',
        target_base / 'val' / 'images', 
        target_base / 'val' / 'labels',
        "val"
    )
    
    test_imgs, _ = process_split_fast(
        source_base / 'test',
        target_base / 'test' / 'images',
        target_base / 'test' / 'labels', 
        "test"
    )
    
    # Create YAML
    yaml_content = f"""
    train: {target_base.absolute()}/train
    val: {target_base.absolute()}/val
    test: {target_base.absolute()}/test
    nc: {len(class_names)}
    names: {class_names}
    """
    
    with open('./cafd_yolo.yaml', 'w') as f:
        f.write(yaml_content)
    
    duration = time.time() - start_time
    total_imgs = train_imgs + val_imgs + test_imgs
    
    print(f"\n🎉 DONE in {duration:.1f}s!")
    print(f"📊 {total_imgs} images processed")
    print(f"📁 Created: CAFD_YOLO/")
    print(f"📄 YAML: cafd_yolo.yaml")
    
    return target_base

# Run fast conversion
yolo_dataset = create_yolo_dataset_fast()

🚀 Starting FAST conversion...
📊 train: 10969 images


Processing train: 100%|██████████| 10969/10969 [14:35<00:00, 12.53img/s]


✅ train: 10969 new, 0 skipped
📊 val: 2735 images


Processing val: 100%|██████████| 2735/2735 [05:03<00:00,  9.02img/s]


✅ val: 2735 new, 0 skipped
📊 test: 2698 images


Processing test: 100%|██████████| 2698/2698 [04:53<00:00,  9.19img/s]

✅ test: 2698 new, 0 skipped

🎉 DONE in 1474.2s!
📊 16402 images processed
📁 Created: CAFD_YOLO/
📄 YAML: cafd_yolo.yaml



