In [1]:
!pip install labelImg




[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import cv2, os

video_dir  = 'video'
output_dir = 'frames'
os.makedirs(output_dir, exist_ok=True)

# кадров в секунду
frames_per_sec = 1

for vid in os.listdir(video_dir):
    if not vid.lower().endswith('.mov'):
        continue
    path_in  = os.path.join(video_dir, vid)
    cap      = cv2.VideoCapture(path_in)
    fps      = cap.get(cv2.CAP_PROP_FPS) or frames_per_sec
    step     = int(fps / frames_per_sec)
    frame_id = 0
    sec_id   = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_id % step == 0:
            name = f"{os.path.splitext(vid)[0]}_sec{sec_id:03d}.jpg"
            cv2.imwrite(os.path.join(output_dir, name), frame)
            sec_id += 1
        frame_id += 1

    cap.release()
print("Готово — кадры в папке", output_dir)


Готово — кадры в папке frames


In [1]:
!pip install labelImg




[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import os
import glob
import random
from shutil import copy2

# Параметры разбивки
TRAIN_RATIO = 0.7
VAL_RATIO   = 0.15
TEST_RATIO  = 0.15
SEED        = 42

# Пути к исходным данным
IMG_DIR   = 'frames'
LBL_DIR   = 'labels'

# Целевая структура
BASE_OUT  = 'dataset'
OUT_IMG   = os.path.join(BASE_OUT, 'images')
OUT_LBL   = os.path.join(BASE_OUT, 'labels')
SPLITS    = ['train', 'val', 'test']

# Создаём папки
for split in SPLITS:
    os.makedirs(os.path.join(OUT_IMG, split), exist_ok=True)
    os.makedirs(os.path.join(OUT_LBL, split), exist_ok=True)

# Собираем все изображения
all_imgs = glob.glob(os.path.join(IMG_DIR, '*.jpg'))
all_imgs.sort()
random.seed(SEED)
random.shuffle(all_imgs)

n = len(all_imgs)
n_train = int(n * TRAIN_RATIO)
n_val   = int(n * VAL_RATIO)

# Определяем диапазоны
ranges = {
    'train': all_imgs[:n_train],
    'val':   all_imgs[n_train:n_train + n_val],
    'test':  all_imgs[n_train + n_val:]
}

# Копируем файлы
for split, img_list in ranges.items():
    for img_path in img_list:
        fn = os.path.basename(img_path)
        name, _ = os.path.splitext(fn)
        lbl_path = os.path.join(LBL_DIR, f'{name}.txt')
        copy2(img_path, os.path.join(OUT_IMG, split, fn))
        if os.path.exists(lbl_path):
            copy2(lbl_path, os.path.join(OUT_LBL, split, f'{name}.txt'))
        else:
            print(f'Внимание: нет аннотации для {fn}')


In [4]:
!pip install albumentations opencv-python


Collecting albumentations
  Downloading albumentations-2.0.8-py3-none-any.whl.metadata (43 kB)
Collecting scipy>=1.10.0 (from albumentations)
  Downloading scipy-1.15.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting pydantic>=2.9.2 (from albumentations)
  Downloading pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
Collecting albucore==0.0.24 (from albumentations)
  Downloading albucore-0.0.24-py3-none-any.whl.metadata (5.3 kB)
Collecting opencv-python-headless>=4.9.0.80 (from albumentations)
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting stringzilla>=3.10.4 (from albucore==0.0.24->albumentations)
  Downloading stringzilla-3.12.5-cp312-cp312-win_amd64.whl.metadata (81 kB)
Collecting simsimd>=5.9.2 (from albucore==0.0.24->albumentations)
  Downloading simsimd-6.4.9-cp312-cp312-win_amd64.whl.metadata (67 kB)
Collecting annotated-types>=0.6.0 (from pydantic>=2.9.2->albumentations)
  Downloading annotated_types-0.7.0-py3-none-any.

In [5]:
import os
import cv2
import glob
import yaml
import random
from albumentations import (
    Compose, RandomRotate90, HorizontalFlip,
    RandomBrightnessContrast, HueSaturationValue,
    ShiftScaleRotate, GaussianBlur, BboxParams
)

# Параметры
SRC_IMG_DIR   = 'dataset/images/train'
SRC_LBL_DIR   = 'dataset/labels/train'
OUT_IMG_DIR   = 'augmented/images/train'
OUT_LBL_DIR   = 'augmented/labels/train'
N_AUG_PER_IMG = 3     #  аугментаций на каждый кадр
SEED          = 42

random.seed(SEED)
os.makedirs(OUT_IMG_DIR, exist_ok=True)
os.makedirs(OUT_LBL_DIR, exist_ok=True)

transform = Compose([
    RandomRotate90(p=0.5),
    HorizontalFlip(p=0.5),
    ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, p=0.5),
    RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    GaussianBlur(blur_limit=(3, 7), p=0.3),
],
bbox_params=BboxParams(
    format='yolo',
    label_fields=['class_labels'],
    min_visibility=0.3
))

def read_yolo_annotation(path):
    bboxes, labels = [], []
    with open(path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            cls_id = int(parts[0])
            x_center, y_center, w, h = map(float, parts[1:])
            bboxes.append([x_center, y_center, w, h])
            labels.append(cls_id)
    return bboxes, labels

def save_yolo_annotation(path, bboxes, labels):
    with open(path, 'w') as f:
        for bbox, cls_id in zip(bboxes, labels):
            x_center, y_center, w, h = bbox
            f.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")

all_imgs = glob.glob(os.path.join(SRC_IMG_DIR, '*.jpg'))
for img_path in all_imgs:
    name = os.path.splitext(os.path.basename(img_path))[0]
    lbl_path = os.path.join(SRC_LBL_DIR, name + '.txt')
    if not os.path.exists(lbl_path):
        print(f"Пропускаем {name}, нет .txt")
        continue

    image = cv2.imread(img_path)
    h, w = image.shape[:2]
    bboxes, class_labels = read_yolo_annotation(lbl_path)

    # Для каждой аугментации
    for i in range(N_AUG_PER_IMG):
        augmented = transform(
            image=image,
            bboxes=bboxes,
            class_labels=class_labels
        )
        img_aug = augmented['image']
        bboxes_aug = augmented['bboxes']
        labels_aug = augmented['class_labels']

        if len(bboxes_aug) == 0:
            continue

        out_name = f"{name}_aug{i:02d}.jpg"
        out_img_path = os.path.join(OUT_IMG_DIR, out_name)
        out_lbl_path = os.path.join(OUT_LBL_DIR, f"{name}_aug{i:02d}.txt")

        cv2.imwrite(out_img_path, img_aug)
        save_yolo_annotation(out_lbl_path, bboxes_aug, labels_aug)

print("Аугментация завершена.")


  original_init(self, **validated_kwargs)


Аугментация завершена.


In [7]:
import os
import glob
import shutil

orig_img_dir = 'dataset/images/train'
orig_lbl_dir = 'dataset/labels/train'
aug_img_dir  = 'augmented/images/train'
aug_lbl_dir  = 'augmented/labels/train'

merged_base   = 'merged'
merged_img_dir = os.path.join(merged_base, 'images', 'train')
merged_lbl_dir = os.path.join(merged_base, 'labels', 'train')

os.makedirs(merged_img_dir, exist_ok=True)
os.makedirs(merged_lbl_dir, exist_ok=True)

def copy_all(src_pattern, dst_dir):
    for src_path in glob.glob(src_pattern):
        fn = os.path.basename(src_path)
        dst_path = os.path.join(dst_dir, fn)
        if os.path.exists(dst_path):
            continue
        shutil.copy2(src_path, dst_path)

copy_all(os.path.join(orig_img_dir, '*.jpg'), merged_img_dir)
copy_all(os.path.join(orig_lbl_dir, '*.txt'), merged_lbl_dir)

copy_all(os.path.join(aug_img_dir, '*.jpg'), merged_img_dir)
copy_all(os.path.join(aug_lbl_dir, '*.txt'), merged_lbl_dir)

print(f"Готово! В папке `{merged_base}` теперь {len(os.listdir(merged_img_dir))} изображений и "
      f"{len(os.listdir(merged_lbl_dir))} аннотаций.")


Готово! В папке `merged` теперь 280 изображений и 280 аннотаций.


In [8]:
from ultralytics import YOLO

# 1. Загружаем модель (с нуля или из предобученных весов)
#  - ''  → обучение с нуля
#  - 'yolo11s.pt' → дообучение от предобученной small-модели
model = YOLO('', task='detect', cfg='yolov11s.yaml')


Creating new Ultralytics Settings v0.0.6 file  
View Ultralytics Settings with 'yolo settings' or at 'C:\Users\User\AppData\Roaming\Ultralytics\settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


TypeError: YOLO.__init__() got an unexpected keyword argument 'cfg'

In [None]:
# 2. Запускаем обучение
results = model.train(
    data='../data.yaml',   # путь к data.yaml
    epochs=100,            # количество эпох
    imgsz=640,             # размер входа
    batch=16,              # batch size
    device=0,              # GPU0, можно 'cpu'
    name='my_experiment',  # имя экспириента
    lr0=0.01,              # начальный learning rate
    optimizer='SGD',       # оптимизатор
    momentum=0.937,
    weight_decay=0.0005,
    save=True              # сохранять чекпоинты
)

# 3. После тренировки — валидация (опционально)
metrics = model.val(
    data='../data.yaml',
    weights=results.best,  # лучший чекпоинт
    imgsz=640,
    device=0
)

print(metrics)


In [18]:
import torch, torchvision, torchaudio
print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
if torch.cuda.is_available():
    print("Device name:", torch.cuda.get_device_name(0))
print("TorchVision:", torchvision.__version__)
print("Torchaudio:", torchaudio.__version__)


Torch: 2.7.1+cpu
CUDA available: False
CUDA version: None
TorchVision: 0.22.1+cu118
Torchaudio: 2.7.1+cu118


In [11]:
from ultralytics import YOLO

# Загружаем модель
model = YOLO("yolo11s.pt")

# Обучаем
results = model.train(
    data="data.yaml",
    epochs=100,
    imgsz=640,
    batch=16,
    device=1,
    project="runs/train",
    name="food_exp_py"
)

# Сохраняем лучший чекпоинт вручную (если нужно)
print("Лучшие веса:", results.best)


Ultralytics 8.3.158  Python-3.12.1 torch-2.7.1+cpu 


ValueError: Invalid CUDA 'device=1' requested. Use 'device=cpu' or pass valid CUDA device(s) if available, i.e. 'device=0' or 'device=0,1,2,3' for Multi-GPU.

torch.cuda.is_available(): False
torch.cuda.device_count(): 0
os.environ['CUDA_VISIBLE_DEVICES']: 1
See https://pytorch.org/get-started/locally/ for up-to-date torch install instructions if no CUDA devices are seen by torch.
