In [17]:
# merge_datasets_specific.py
import shutil
from pathlib import Path

# Поменяй root на корень, где лежит папка data
root = Path("data")   # <-- у тебя data/
datasets = [
    root / "all-books",
    root / "book",
    root / "Book_200img",
    root / "book_detection2",
]

out = root / "combined"
imgs_out = out / "images"
lbls_out = out / "labels"

for split in ["train", "val"]:
    (imgs_out / split).mkdir(parents=True, exist_ok=True)
    (lbls_out / split).mkdir(parents=True, exist_ok=True)

IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}

def copy_pair(src_img, src_lbl, dest_img, dest_lbl):
    dest_img.parent.mkdir(parents=True, exist_ok=True)
    dest_lbl.parent.mkdir(parents=True, exist_ok=True)
    shutil.copy2(src_img, dest_img)
    shutil.copy2(src_lbl, dest_lbl)

summary = {}
for i, ds in enumerate(datasets, start=1):
    prefix = f"ds{i}"
    summary[prefix] = {"copied":0, "missing_label":0, "no_split":0}
    if not ds.exists():
        print(f"[WARN] dataset not found: {ds} — skipping")
        continue

    # ожидаем структуру ds/{train,val,test}/images labels
    for split in ["train","val"]:
        imgs_dir = ds / split / "images"
        lbls_dir = ds / split / "labels"
        if not imgs_dir.exists():
            # иногда папки называются images в корне ds/images/train - но ты сказал структура именно как в примере
            print(f"[INFO] {prefix}: no {split}/images — пропускаю {split}")
            summary[prefix]["no_split"] += 1
            continue
        img_files = [p for p in imgs_dir.iterdir() if p.suffix.lower() in IMG_EXTS]
        for img in img_files:
            lbl = lbls_dir / f"{img.stem}.txt"
            if not lbl.exists():
                summary[prefix]["missing_label"] += 1
                # пропускаем такие изображения
                continue
            new_img = imgs_out / split / f"{prefix}_{img.name}"
            new_lbl = lbls_out / split / f"{prefix}_{lbl.name}"
            copy_pair(img, lbl, new_img, new_lbl)
            summary[prefix]["copied"] += 1

    print(f"[DONE] {prefix}: copied={summary[prefix]['copied']}, missing_label={summary[prefix]['missing_label']}")

print("=== SUMMARY ===")
for k,v in summary.items():
    print(k, v)
print("Combined folder ready at:", out)


[DONE] ds1: copied=1863, missing_label=0
[DONE] ds2: copied=1061, missing_label=0
[DONE] ds3: copied=180, missing_label=0
[DONE] ds4: copied=2094, missing_label=0
=== SUMMARY ===
ds1 {'copied': 1863, 'missing_label': 0, 'no_split': 0}
ds2 {'copied': 1061, 'missing_label': 0, 'no_split': 0}
ds3 {'copied': 180, 'missing_label': 0, 'no_split': 0}
ds4 {'copied': 2094, 'missing_label': 0, 'no_split': 0}
Combined folder ready at: data\combined


In [18]:
# make_combined_data_yaml.py
import yaml
from pathlib import Path

root = Path("data")
combined = root / "combined"
data_yaml = combined / "data.yaml"

# Попробуем найти names в исходных data.yaml (первый найденный)
candidates = list(root.rglob("data.yaml"))
names = None
for p in candidates:
    try:
        d = yaml.safe_load(p.read_text())
        if isinstance(d, dict) and 'names' in d:
            names = d['names']
            print(f"[INFO] Found names in {p}")
            break
    except Exception:
        continue

# Если names — dict (как 0: 'class'), преобразуем в список по индексам
if isinstance(names, dict):
    # сортируем по ключу
    items = sorted(names.items(), key=lambda x: int(x[0]))
    names = [v for k,v in items]

if names is None:
    names = []  # оставим пустым, ты должен заполнить вручную ниже

data = {
    'train': str((combined / "images" / "train").resolve()),
    'val': str((combined / "images" / "val").resolve()),
    'names': names
}

data_yaml.parent.mkdir(parents=True, exist_ok=True)
data_yaml.write_text(yaml.safe_dump(data, sort_keys=False, allow_unicode=True))
print("Wrote", data_yaml)
if not names:
    print("WARNING: 'names' is empty — открой data/combined/data.yaml и укажи список классов (names).")


[INFO] Found names in data\all-books\data.yaml
Wrote data\combined\data.yaml


In [11]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.202-py3-none-any.whl.metadata (37 kB)
Collecting numpy>=1.23.0 (from ultralytics)
  Using cached numpy-2.3.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting matplotlib>=3.3.0 (from ultralytics)
  Using cached matplotlib-3.10.6-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting opencv-python>=4.6.0 (from ultralytics)
  Using cached opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (19 kB)
Collecting pillow>=7.1.2 (from ultralytics)
  Using cached pillow-11.3.0-cp312-cp312-win_amd64.whl.metadata (9.2 kB)
Collecting pyyaml>=5.3.1 (from ultralytics)
  Using cached PyYAML-6.0.2-cp312-cp312-win_amd64.whl.metadata (2.1 kB)
Collecting requests>=2.23.0 (from ultralytics)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting scipy>=1.4.1 (from ultralytics)
  Using cached scipy-1.16.2-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Using cached torch-2.8.0-cp312-cp31


[notice] A new release of pip is available: 24.2 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import json
import os
import shutil

# Пути
coco_path = "data/coco2017"
combined_path = "data/combined"

splits = {"train2017": "train", "val2017": "val"}

for coco_split, yolo_split in splits.items():
    ann_file = os.path.join(coco_path, "annotations", f"instances_{coco_split}.json")
    with open(ann_file, "r") as f:
        coco = json.load(f)

    book_id = 84  # id "book" в COCO

    # Берём только аннотации книг
    anns_book = [ann for ann in coco["annotations"] if ann["category_id"] == book_id]

    # Изображения с книгами
    img_ids = set([a["image_id"] for a in anns_book])
    imgs_book = [img for img in coco["images"] if img["id"] in img_ids]

    # Папки куда класть
    img_save_dir = os.path.join(combined_path, "images", yolo_split)
    lbl_save_dir = os.path.join(combined_path, "labels", yolo_split)
    os.makedirs(img_save_dir, exist_ok=True)
    os.makedirs(lbl_save_dir, exist_ok=True)

    # Источник картинок
    src_img_dir = os.path.join(coco_path, coco_split)

    for img in imgs_book:
        img_id = img["id"]
        w, h = img["width"], img["height"]

        anns = [a for a in anns_book if a["image_id"] == img_id]

        # Чтобы не было коллизий имён
        base_name = f"coco_{img['file_name']}"
        dst_img = os.path.join(img_save_dir, base_name)
        src_img = os.path.join(src_img_dir, img["file_name"])
        shutil.copyfile(src_img, dst_img)

        # YOLO аннотации
        lines = []
        for a in anns:
            x, y, bw, bh = a["bbox"]
            xc = (x + bw / 2) / w
            yc = (y + bh / 2) / h
            bw /= w
            bh /= h
            # class_id = 0 → "book"
            lines.append(f"0 {xc:.6f} {yc:.6f} {bw:.6f} {bh:.6f}")

        # txt сохраняем в labels
        label_file = os.path.join(lbl_save_dir, base_name.replace(".jpg", ".txt"))
        with open(label_file, "w") as f:
            f.write("\n".join(lines))

    print(f"{yolo_split}: добавлено {len(imgs_book)} изображений с книгами")


train: добавлено 5332 изображений с книгами
val: добавлено 230 изображений с книгами


In [21]:
from ultralytics import YOLO

# Загружаем предобученную модель
model = YOLO("yolo11n.pt")

# Путь к data.yaml объединённого датасета
data_path = r"C:\Users\Егор\VsCode project\avito_test_task\data\combined\data.yaml"

# Заморозим backbone (опционально)
for name, param in model.model.named_parameters():
    if "backbone" in name:
        param.requires_grad = False

# Обучение
results = model.train(data=data_path, epochs=1, imgsz=640, batch=8)

# Валидация
results = model.val()

# Инференс на тестовой картинке
img_path = r"C:\Users\Егор\VsCode project\avito_test_task\data\all-books\test\images\7b28dc3d5e10a9bf_jpg.rf.9e67de51e48f9806fa7b0d0e39cae7da.jpg"
results = model(img_path)
results[0].show()

# Экспорт в ONNX
success = model.export(format="onnx")


Ultralytics 8.3.202  Python-3.12.5 torch-2.8.0+cpu CPU (AMD Ryzen 5 5600H with Radeon Graphics)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\Users\\VsCode project\avito_test_task\data\combined\data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=1, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train5, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_

KeyboardInterrupt: 