In [1]:
"""
Воссоздать окружение startup:
conda env create -f startup_environment.yaml

create_train_val_test_dirs: 
Создает директории для выборок train, val и test и копирует изображения в соответствующие папки
Проверяет на валидность изображения

train_yolo_model:
Проводит обучение yolov8, сохраняает последние и наилучшие веса в папке runs/classify/train/weights/
"""

import os
import shutil
from PIL import Image
import torch
from ultralytics import YOLO
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
output_dirs = {
    "train": "data/train",
    "val": "data/val",
    "test": "data/test"}

classes = ["Cat", "Dog"] 
supported_formats = ('jpg', 'jpeg', 'png', 'bmp', 'tif', 'tiff', 'webp', 'pfm', 'dng', 'mpo')
unsupported_formats = ('gif', 'psd')


In [3]:
def convert_images(source_dir, unsupported_formats):
    """
    Конвертирует изображения из неподдерживаемых форматов в формат JPEG
    source_dir: Путь к директории, содержащей изображения
    """
    for root, _, files in os.walk(source_dir):
        for file in files:
            file_path = os.path.join(root, file)
            file_ext = file.split('.')[-1].lower()

            if file_ext in unsupported_formats:
                try:
                    img = Image.open(file_path)
                    file_base = os.path.splitext(file)[0]
                    new_file_path = os.path.join(root, file_base + '.jpg')
                    img.convert('RGB').save(new_file_path, "JPEG")
                    os.remove(file_path)
                except:
                    pass

def is_image_valid(image_path, supported_formats):
    """
    Проверяет, является ли изображение размером больше 10x10 пикселей и поддерживаемого формата
    image_path: Путь к изображению
    """
    try:
        img = Image.open(image_path)
        if img.format.lower() not in supported_formats:
            return False
        if img.size[0] < 10 or img.size[1] < 10:
            return False
        img.verify()
        img.close()
        return True
    except Exception:
        return False

def create_train_val_test_dirs(data_dir, output_dirs, classes,supported_formats, unsupported_formats, test_size=0.2, random_state=42):
    """
    Создает директории для выборок train, val и test и копирует изображения в соответствующие папки

    data_dir: Путь к исходной папке с данными
    output_dirs: Словарь с путями для папок train, val и test
    classes: Список классов данных
    supported_formats: Поддерживаемые форматы изображений
    unsupported_formats: Неподдерживаемые форматы изображений
    test_size: Доля данных для валидации/тестирования   
    random_state: Значение для генератора случайных чисел
    """
    for dir_name in output_dirs.values():
        os.makedirs(dir_name, exist_ok=True)

    for class_name in classes:
        class_dir = os.path.join(data_dir, class_name)
        list_images = [img for img in os.listdir(class_dir) if os.path.isfile(os.path.join(class_dir, img))]

        convert_images(class_dir, unsupported_formats)

        valid_images = [img for img in list_images if is_image_valid(os.path.join(class_dir, img), supported_formats)]

        train_images, val_test_images = train_test_split(valid_images, test_size=test_size * 2, random_state=random_state)
        val_images, test_images = train_test_split(val_test_images, test_size=0.5, random_state=random_state)

        for subset_name, imgs in zip(["train", "val", "test"], [train_images, val_images, test_images]):
            output_class_dir = os.path.join(output_dirs[subset_name], class_name)
            os.makedirs(output_class_dir, exist_ok=True)
            for img in imgs:
                try:
                    shutil.copy(os.path.join(class_dir, img), os.path.join(output_class_dir, img))
                except:
                    pass
                    


def train_yolo_model(model, data_path, epochs=50, patience = 5, img_size=640, batch_size=16, save_dir="runs/train", verbose=True, dropout=0.5):
    """
    Тренировка модели YOLOv8 с ранней остановкой

    model: Модель YOLOv8
    data_path: Путь к данным для обучения
    epochs: Количество эпох для обучения
    patience: Количество эпох для ранней остановки
    img_size: Размер изображения для обучения
    batch_size: Размер батча для обучения
    save_dir: Директория для сохранения моделей и логов
    verbose: Управление выводом логов на уровне батчей
    dropout: Отсев

    """

    results = model.train(
        data=data_path,
        epochs=epochs,
        patience = patience,
        imgsz=img_size,
        batch=batch_size,        
        save_dir=save_dir,           
        verbose=verbose,
        dropout=dropout)

    return results

In [4]:
create_train_val_test_dirs('kagglecatsanddogs_5340/PetImages', output_dirs, classes, supported_formats, unsupported_formats)
model = YOLO("yolov8n-cls.pt")
results = train_yolo_model(model, "data")

New https://pypi.org/project/ultralytics/8.2.45 available  Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.2.42  Python-3.9.19 torch-2.2.2 CUDA:0 (NVIDIA GeForce RTX 3050, 8192MiB)
[34m[1mengine\trainer: [0mtask=classify, mode=train, model=yolov8n-cls.pt, data=data, epochs=50, time=None, patience=5, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.5, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_fr

[34m[1mtrain: [0mScanning C:\Users\буся\Desktop\startup\data\train... 14967 images, 0 corrupt: 100%|██████████| 14967/14967 [00:0[0m




[34m[1mval: [0mScanning C:\Users\буся\Desktop\startup\data\val... 4989 images, 0 corrupt: 100%|██████████| 4989/4989 [00:00<?, ?i[0m






[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 26 weight(decay=0.0), 27 weight(decay=0.0005), 27 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns\classify\train[0m
Starting training for 50 epochs...

      Epoch    GPU_mem       loss  Instances       Size


       1/50      1.46G     0.4511          7        640: 100%|██████████| 936/936 [01:32<00:00, 10.17it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:18<00:00,  8.60it/s]

                   all      0.936          1






      Epoch    GPU_mem       loss  Instances       Size


       2/50      1.44G     0.2479          7        640: 100%|██████████| 936/936 [01:15<00:00, 12.43it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:18<00:00,  8.42it/s]

                   all      0.967          1






      Epoch    GPU_mem       loss  Instances       Size


       3/50      1.44G     0.2279          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.87it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:19<00:00,  8.10it/s]

                   all      0.957          1






      Epoch    GPU_mem       loss  Instances       Size


       4/50      1.44G     0.1985          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.97it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:19<00:00,  7.97it/s]

                   all      0.964          1






      Epoch    GPU_mem       loss  Instances       Size


       5/50      1.44G     0.1475          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.93it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.60it/s]

                   all      0.977          1






      Epoch    GPU_mem       loss  Instances       Size


       6/50      1.44G     0.1239          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.70it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.47it/s]

                   all      0.975          1






      Epoch    GPU_mem       loss  Instances       Size


       7/50      1.44G     0.1135          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.81it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.73it/s]

                   all      0.976          1






      Epoch    GPU_mem       loss  Instances       Size


       8/50      1.44G     0.1068          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.82it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.57it/s]

                   all      0.975          1






      Epoch    GPU_mem       loss  Instances       Size


       9/50      1.44G     0.1009          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.84it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.64it/s]

                   all      0.981          1






      Epoch    GPU_mem       loss  Instances       Size


      10/50      1.44G    0.09486          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.85it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.73it/s]

                   all      0.981          1






      Epoch    GPU_mem       loss  Instances       Size


      11/50      1.44G    0.09056          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.83it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.48it/s]

                   all       0.98          1






      Epoch    GPU_mem       loss  Instances       Size


      12/50      1.44G     0.0797          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.84it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.49it/s]

                   all      0.984          1






      Epoch    GPU_mem       loss  Instances       Size


      13/50      1.44G    0.07983          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.81it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.61it/s]

                   all      0.984          1






      Epoch    GPU_mem       loss  Instances       Size


      14/50      1.44G     0.0842          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.72it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.57it/s]

                   all      0.982          1






      Epoch    GPU_mem       loss  Instances       Size


      15/50      1.44G    0.07393          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.82it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.55it/s]

                   all      0.985          1






      Epoch    GPU_mem       loss  Instances       Size


      16/50      1.44G    0.06629          7        640: 100%|██████████| 936/936 [01:15<00:00, 12.47it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:21<00:00,  7.37it/s]

                   all      0.982          1






      Epoch    GPU_mem       loss  Instances       Size


      17/50      1.44G    0.06665          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.67it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:21<00:00,  7.40it/s]

                   all      0.985          1






      Epoch    GPU_mem       loss  Instances       Size


      18/50      1.44G    0.06629          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.68it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:19<00:00,  8.02it/s]

                   all      0.985          1






      Epoch    GPU_mem       loss  Instances       Size


      19/50      1.44G    0.06229          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.94it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.56it/s]

                   all      0.985          1






      Epoch    GPU_mem       loss  Instances       Size


      20/50      1.44G    0.06297          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.74it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:21<00:00,  7.43it/s]

                   all      0.986          1






      Epoch    GPU_mem       loss  Instances       Size


      21/50      1.44G    0.05631          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.71it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.73it/s]

                   all      0.985          1






      Epoch    GPU_mem       loss  Instances       Size


      22/50      1.44G    0.05391          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.85it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.48it/s]

                   all      0.988          1






      Epoch    GPU_mem       loss  Instances       Size


      23/50      1.44G    0.05818          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.73it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.71it/s]

                   all      0.985          1






      Epoch    GPU_mem       loss  Instances       Size


      24/50      1.44G    0.05031          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.85it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.66it/s]

                   all      0.987          1






      Epoch    GPU_mem       loss  Instances       Size


      25/50      1.44G    0.05344          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.83it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.57it/s]

                   all      0.989          1






      Epoch    GPU_mem       loss  Instances       Size


      26/50      1.44G    0.04881          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.85it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.52it/s]

                   all      0.989          1






      Epoch    GPU_mem       loss  Instances       Size


      27/50      1.44G    0.04472          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.84it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.66it/s]

                   all       0.99          1






      Epoch    GPU_mem       loss  Instances       Size


      28/50      1.44G     0.0501          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.85it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.64it/s]

                   all      0.988          1






      Epoch    GPU_mem       loss  Instances       Size


      29/50      1.44G    0.04071          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.85it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.71it/s]

                   all      0.989          1






      Epoch    GPU_mem       loss  Instances       Size


      30/50      1.44G    0.03962          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.80it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.62it/s]

                   all      0.988          1






      Epoch    GPU_mem       loss  Instances       Size


      31/50      1.44G     0.0419          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.83it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.63it/s]

                   all      0.991          1






      Epoch    GPU_mem       loss  Instances       Size


      32/50      1.44G    0.03871          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.85it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.56it/s]

                   all      0.989          1






      Epoch    GPU_mem       loss  Instances       Size


      33/50      1.44G    0.04046          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.85it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.55it/s]

                   all      0.989          1






      Epoch    GPU_mem       loss  Instances       Size


      34/50      1.44G     0.0353          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.86it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:21<00:00,  7.41it/s]

                   all      0.989          1






      Epoch    GPU_mem       loss  Instances       Size


      35/50      1.44G    0.03695          7        640: 100%|██████████| 936/936 [01:13<00:00, 12.65it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.60it/s]

                   all       0.99          1






      Epoch    GPU_mem       loss  Instances       Size


      36/50      1.44G    0.03094          7        640: 100%|██████████| 936/936 [01:12<00:00, 12.83it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:20<00:00,  7.43it/s]

                   all       0.99          1
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 5 epochs. Best results observed at epoch 31, best model saved as best.pt.
To update EarlyStopping(patience=5) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






36 epochs completed in 0.945 hours.
Optimizer stripped from runs\classify\train\weights\last.pt, 3.0MB
Optimizer stripped from runs\classify\train\weights\best.pt, 3.0MB

Validating runs\classify\train\weights\best.pt...
Ultralytics YOLOv8.2.42  Python-3.9.19 torch-2.2.2 CUDA:0 (NVIDIA GeForce RTX 3050, 8192MiB)
YOLOv8n-cls summary (fused): 73 layers, 1437442 parameters, 0 gradients, 3.3 GFLOPs
[34m[1mtrain:[0m C:\Users\\Desktop\startup\data\train... found 14967 images in 2 classes  
[34m[1mval:[0m C:\Users\\Desktop\startup\data\val... found 4989 images in 2 classes  
[34m[1mtest:[0m C:\Users\\Desktop\startup\data\test... found 4989 images in 2 classes  


               classes   top1_acc   top5_acc: 100%|██████████| 156/156 [00:21<00:00,  7.36it/s]


                   all      0.991          1
Speed: 0.8ms preprocess, 1.1ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns\classify\train[0m
Results saved to [1mruns\classify\train[0m
