# Исследование моделей обнаружения и распознавания объектов с использованием Ultralytics (YOLOv11)

# 📌 Установка необходимых библиотек

In [1]:
!pip install ultralytics
!pip install roboflow


Collecting ultralytics
  Downloading ultralytics-8.3.114-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

# 📌 Подключение к Google Drive и распаковка архива

In [2]:
from google.colab import drive
import zipfile, os, shutil
import os
from shutil import move

drive.mount('/content/drive')

# Пути
zip_path = '/content/drive/MyDrive/Colab Notebooks/AiDatasets/smokedata.zip'
extract_path = '/content/smokedata_raw'
final_path = '/content/smokedata'

# Распаковываем
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# Создаем финальную структуру директорий
for subset in ['train', 'val', 'test']:
    os.makedirs(os.path.join(final_path, 'images', subset), exist_ok=True)
    os.makedirs(os.path.join(final_path, 'labels', subset), exist_ok=True)

# Функция для перемещения изображений и меток
def move_data(src_folder, dst_img_folder, dst_label_folder):
    for fname in os.listdir(src_folder):
        full_path = os.path.join(src_folder, fname)
        if fname.endswith(('.jpg', '.jpeg', '.png')):
            shutil.move(full_path, os.path.join(dst_img_folder, fname))
        elif fname.endswith('.txt'):
            shutil.move(full_path, os.path.join(dst_label_folder, fname))

# Переносим данные из исходных папок
move_data(os.path.join(extract_path, 'Training', 'Training'),
          os.path.join(final_path, 'images', 'train'),
          os.path.join(final_path, 'labels', 'train'))

move_data(os.path.join(extract_path, 'Validation', 'Validation'),
          os.path.join(final_path, 'images', 'val'),
          os.path.join(final_path, 'labels', 'val'))

move_data(os.path.join(extract_path, 'Testing', 'Testing'),
          os.path.join(final_path, 'images', 'test'),
          os.path.join(final_path, 'labels', 'test'))

Mounted at /content/drive


In [3]:
import zipfile
import os
# Destination directory
extract_dir = 'smokedata_raw'

# Create the destination directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Extract all contents of the ZIP file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"Archive '{zip_path}' successfully extracted to '{extract_dir}'")

Archive '/content/drive/MyDrive/Colab Notebooks/AiDatasets/smokedata.zip' successfully extracted to 'smokedata_raw'


# 1. Выбор начальных условий
# a. Выбор набора данных
#   Выбран набор данных курящих и некурящих людей. Это имеет прикладное значение, например, для мониторинга нарушений правил в общественных местах или для медицинских анализов.
# b. Метрики качества
#   Используемые метрики: mAP@0.5, precision, recall. Они лучше всего подходят для оценки моделей детекции объектов.

# 📌 Импорт и подготовка

In [4]:
from ultralytics import YOLO
from sklearn.metrics import classification_report
import shutil

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


# 📌 Подготовка данных в формате YOLO
# Папки должны быть следующего вида:
# smokedata/
# ├── images/
# │   ├── train/
# │   ├── val/
# │   └── test/
# └── labels/
#     ├── train/
#     ├── val/
#     └── test/

# 🧱 Создание структуры для YOLO

# 📄 Создание файла конфигурации данных для обучения

In [5]:
with open("/content/smokedata/data.yaml", "w") as f:
    f.write("""
path: /content/smokedata
train: images/train
val: images/val
test: images/test
nc: 2
names: ['non-smoker', 'smoker']
""")


# 2. Создание бейзлайна и оценка качества
# a. Обучение моделей из семейства YOLOv11 (YOLOv8 в ultralytics)

# Обучение сверточной модели YOLOv8n (наиболее легкая для Colab)

In [6]:
model_cnn = YOLO('yolov8n.pt')
model_cnn.train(data='/content/smokedata/data.yaml', epochs=3, imgsz=640, batch=16, name='baseline_cnn')

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 113MB/s]


Ultralytics 8.3.114 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/content/smokedata/data.yaml, epochs=3, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=baseline_cnn, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_bo

100%|██████████| 755k/755k [00:00<00:00, 26.6MB/s]

Overriding model.yaml nc=80 with nc=2

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics




Model summary: 129 layers, 3,011,238 parameters, 3,011,222 gradients, 8.2 GFLOPs

Transferred 319/355 items from pretrained weights
Freezing layer 'model.22.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt'...


100%|██████████| 5.35M/5.35M [00:00<00:00, 98.7MB/s]


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1287.4±725.4 MB/s, size: 71.4 KB)


[34m[1mtrain: [0mScanning /content/smokedata/labels/train... 0 images, 716 backgrounds, 0 corrupt: 100%|██████████| 716/716 [00:00<00:00, 2287.65it/s]

[34m[1mtrain: [0mNew cache created: /content/smokedata/labels/train.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 564.6±480.3 MB/s, size: 75.6 KB)


[34m[1mval: [0mScanning /content/smokedata/labels/val... 0 images, 180 backgrounds, 0 corrupt: 100%|██████████| 180/180 [00:00<00:00, 1450.28it/s]

[34m[1mval: [0mNew cache created: /content/smokedata/labels/val.cache





Plotting labels to runs/detect/baseline_cnn/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/baseline_cnn[0m
Starting training for 3 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/3      1.95G          0      110.1          0          0        640: 100%|██████████| 45/45 [00:13<00:00,  3.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:01<00:00,  4.16it/s]

                   all        180          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/3      2.44G          0      84.59          0          0        640: 100%|██████████| 45/45 [00:11<00:00,  3.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:01<00:00,  4.04it/s]

                   all        180          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/3      2.45G          0      77.15          0          0        640: 100%|██████████| 45/45 [00:11<00:00,  3.90it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:01<00:00,  3.87it/s]

                   all        180          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



3 epochs completed in 0.012 hours.
Optimizer stripped from runs/detect/baseline_cnn/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/baseline_cnn/weights/best.pt, 6.2MB

Validating runs/detect/baseline_cnn/weights/best.pt...
Ultralytics 8.3.114 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:04<00:00,  1.47it/s]
  ax.plot(px, py.mean(1), linewidth=3, color="blue", label=f"all classes {ap[:, 0].mean():.3f} mAP@0.5")
  ret = ret.dtype.type(ret / rcount)
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(


                   all        180          0          0          0          0          0


  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index


Speed: 0.3ms preprocess, 3.6ms inference, 0.0ms loss, 12.4ms postprocess per image
Results saved to [1mruns/detect/baseline_cnn[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([], dtype=int64)
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7be99769be10>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
        

In [7]:
!cat /content/smokedata/data.yaml


path: /content/smokedata
train: images/train
val: images/val
test: images/test
nc: 2
names: ['non-smoker', 'smoker']


# Обучение трансформерной модели YOLOv8x (условно трансформерная архитектура)

In [8]:
import os
import cv2
import numpy as np
from glob import glob

base_path = '/content/smokedata'
splits = ['train', 'val', 'test']
CLASS_ID = 0  # Предположим, у нас один класс — дым

def create_yolo_label(mask_path, label_path, img_shape):
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    contours, _ = cv2.findContours((mask > 127).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    h, w = img_shape[:2]
    lines = []

    for contour in contours:
        x, y, bw, bh = cv2.boundingRect(contour)
        # YOLO format
        x_c = (x + bw / 2) / w
        y_c = (y + bh / 2) / h
        bw /= w
        bh /= h
        lines.append(f"{CLASS_ID} {x_c:.6f} {y_c:.6f} {bw:.6f} {bh:.6f}")

    with open(label_path, 'w') as f:
        f.write("\n".join(lines))

for split in splits:
    image_dir = os.path.join(base_path, 'images', split)
    mask_dir = os.path.join(base_path, 'masks', split)
    label_dir = os.path.join(base_path, 'labels', split)
    os.makedirs(label_dir, exist_ok=True)

    for mask_path in glob(f"{mask_dir}/*.png"):
        file_stem = os.path.basename(mask_path).replace('.png', '')  # пример: 'img001'
        image_path = os.path.join(image_dir, f"{file_stem}.jpg")
        label_path = os.path.join(label_dir, f"{file_stem}.txt")

        if not os.path.exists(image_path):
            print(f"⚠️ Нет изображения для маски: {image_path}")
            continue

        img = cv2.imread(image_path)
        create_yolo_label(mask_path, label_path, img.shape)
        print(f"✅ Аннотация создана: {label_path}")


In [9]:
import glob

# Выведем все доступные изображения в папке валидации
images = glob.glob('/content/smokedata/labels/val/*')
print("Найдено изображений:", len(images))
for path in images[:5]:  # покажем первые 5
    print(path)


Найдено изображений: 0


In [10]:
!find /content/smokedata/labels/val -name "*.txt" -exec grep . {} \;

In [11]:
pip install ultralytics



In [12]:
model = YOLO('yolov8x.pt')

# Папка с изображениями, которые хотим разметить
image_dir = '/content/smokedata/images/train'  # или 'val', 'test' по нужде
label_dir = '/content/smokedata/labels/train'
os.makedirs(label_dir, exist_ok=True)

# Получаем список изображений
image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]

for img_path in image_paths:
    img = cv2.imread(img_path)
    h, w = img.shape[:2]

    # Получаем предсказания
    results = model(img, verbose=False)[0]

    txt_path = os.path.join(label_dir, os.path.splitext(os.path.basename(img_path))[0] + '.txt')

    with open(txt_path, 'w') as f:
        for box in results.boxes:
            cls = int(box.cls[0])
            x1, y1, x2, y2 = box.xyxy[0]

            # YOLO формат (x_center, y_center, width, height), всё в относительных координатах [0..1]
            x_center = ((x1 + x2) / 2) / w
            y_center = ((y1 + y2) / 2) / h
            bw = (x2 - x1) / w
            bh = (y2 - y1) / h

            f.write(f"{cls} {x_center:.6f} {y_center:.6f} {bw:.6f} {bh:.6f}\n")

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:01<00:00, 92.7MB/s]


In [13]:
model_transformer = YOLO('yolov8x.pt')
model_transformer.train(data='/content/smokedata/data.yaml', epochs=3, imgsz=640, batch=8, name='baseline_transformer')

Ultralytics 8.3.114 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8x.pt, data=/content/smokedata/data.yaml, epochs=3, time=None, patience=100, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=baseline_transformer, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, 

[34m[1mtrain: [0mScanning /content/smokedata/labels/train... 716 images, 5 backgrounds, 381 corrupt: 100%|██████████| 716/716 [00:00<00:00, 1066.50it/s]

[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0001.jpg: ignoring corrupt image/label: Label class 32 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0010.jpg: ignoring corrupt image/label: Label class 27 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0011.jpg: ignoring corrupt image/label: Label class 74 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0019.jpg: ignoring corrupt image/label: Label class 27 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0022.jpg: ignoring corrupt image/label: Label class 66 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0025.jpg: ignoring corrupt image/label: La




[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 502.4±90.6 MB/s, size: 75.6 KB)


[34m[1mval: [0mScanning /content/smokedata/labels/val.cache... 0 images, 180 backgrounds, 0 corrupt: 100%|██████████| 180/180 [00:00<?, ?it/s]






Plotting labels to runs/detect/baseline_transformer/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 97 weight(decay=0.0), 104 weight(decay=0.0005), 103 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/baseline_transformer[0m
Starting training for 3 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/3      6.75G      1.053       1.65      1.615         26        640: 100%|██████████| 42/42 [00:29<00:00,  1.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:05<00:00,  2.18it/s]

                   all        180          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/3      7.16G      1.139      1.169      1.674         21        640: 100%|██████████| 42/42 [00:27<00:00,  1.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:05<00:00,  2.23it/s]

                   all        180          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/3      7.43G       1.07      1.128      1.632         20        640: 100%|██████████| 42/42 [00:26<00:00,  1.57it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:05<00:00,  2.21it/s]

                   all        180          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



3 epochs completed in 0.037 hours.
Optimizer stripped from runs/detect/baseline_transformer/weights/last.pt, 136.7MB
Optimizer stripped from runs/detect/baseline_transformer/weights/best.pt, 136.7MB

Validating runs/detect/baseline_transformer/weights/best.pt...
Ultralytics 8.3.114 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 112 layers, 68,125,494 parameters, 0 gradients, 257.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:05<00:00,  2.06it/s]
  ax.plot(px, py.mean(1), linewidth=3, color="blue", label=f"all classes {ap[:, 0].mean():.3f} mAP@0.5")
  ret = ret.dtype.type(ret / rcount)
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(


                   all        180          0          0          0          0          0


  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index


Speed: 0.3ms preprocess, 26.6ms inference, 0.0ms loss, 3.3ms postprocess per image
Results saved to [1mruns/detect/baseline_transformer[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([], dtype=int64)
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7be98e18fb50>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
        

# b. Оценка моделей

In [14]:
metrics_cnn = model_cnn.val()
metrics_transformer = model_transformer.val()

Ultralytics 8.3.114 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2386.2±686.8 MB/s, size: 81.9 KB)


[34m[1mval: [0mScanning /content/smokedata/labels/val.cache... 0 images, 180 backgrounds, 0 corrupt: 100%|██████████| 180/180 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:03<00:00,  3.56it/s]
  ax.plot(px, py.mean(1), linewidth=3, color="blue", label=f"all classes {ap[:, 0].mean():.3f} mAP@0.5")
  ret = ret.dtype.type(ret / rcount)
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(


                   all        180          0          0          0          0          0


  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index


Speed: 2.1ms preprocess, 6.8ms inference, 0.0ms loss, 4.5ms postprocess per image
Results saved to [1mruns/detect/baseline_cnn2[0m
Ultralytics 8.3.114 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 112 layers, 68,125,494 parameters, 0 gradients, 257.4 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2436.2±676.7 MB/s, size: 62.2 KB)


[34m[1mval: [0mScanning /content/smokedata/labels/val.cache... 0 images, 180 backgrounds, 0 corrupt: 100%|██████████| 180/180 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:14<00:00,  1.57it/s]
  ax.plot(px, py.mean(1), linewidth=3, color="blue", label=f"all classes {ap[:, 0].mean():.3f} mAP@0.5")
  ret = ret.dtype.type(ret / rcount)
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(


                   all        180          0          0          0          0          0


  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index


Speed: 1.5ms preprocess, 76.7ms inference, 0.0ms loss, 1.1ms postprocess per image
Results saved to [1mruns/detect/baseline_transformer2[0m



# 3. Улучшение бейзлайна
# a. Формулировка гипотез
# - Применение аугментаций (flip, HSV, scale)
# - Использование другой модели (YOLOv8m)
# - Настройка learning rate

# b. Проверка гипотез

In [15]:
model_aug = YOLO('yolov8m.pt')
model_aug.train(data='/content/smokedata/data.yaml', epochs=3, imgsz=640, batch=8, name='augmented',
                augment=True, lr0=0.005)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt'...


100%|██████████| 49.7M/49.7M [00:00<00:00, 296MB/s]


Ultralytics 8.3.114 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=/content/smokedata/data.yaml, epochs=3, time=None, patience=100, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=augmented, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=T

[34m[1mtrain: [0mScanning /content/smokedata/labels/train.cache... 716 images, 5 backgrounds, 381 corrupt: 100%|██████████| 716/716 [00:00<?, ?it/s]

[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0001.jpg: ignoring corrupt image/label: Label class 32 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0010.jpg: ignoring corrupt image/label: Label class 27 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0011.jpg: ignoring corrupt image/label: Label class 74 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0019.jpg: ignoring corrupt image/label: Label class 27 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0022.jpg: ignoring corrupt image/label: Label class 66 exceeds dataset class count 2. Possible class labels are 0-1
[34m[1mtrain: [0m/content/smokedata/images/train/notsmoking_0025.jpg: ignoring corrupt image/label: La




[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 578.6±318.2 MB/s, size: 75.6 KB)


[34m[1mval: [0mScanning /content/smokedata/labels/val.cache... 0 images, 180 backgrounds, 0 corrupt: 100%|██████████| 180/180 [00:00<?, ?it/s]






Plotting labels to runs/detect/augmented/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.005' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/augmented[0m
Starting training for 3 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/3      5.12G     0.8556      1.595      1.462         26        640: 100%|██████████| 42/42 [00:12<00:00,  3.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:02<00:00,  5.16it/s]

                   all        180          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/3      5.27G     0.9345      1.003      1.522         21        640: 100%|██████████| 42/42 [00:12<00:00,  3.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:02<00:00,  5.54it/s]

                   all        180          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/3       5.4G     0.8537     0.9897      1.462         20        640: 100%|██████████| 42/42 [00:12<00:00,  3.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:02<00:00,  5.62it/s]

                   all        180          0          0          0          0          0



  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index
  ret = um.true_divide(



3 epochs completed in 0.018 hours.
Optimizer stripped from runs/detect/augmented/weights/last.pt, 52.0MB
Optimizer stripped from runs/detect/augmented/weights/best.pt, 52.0MB

Validating runs/detect/augmented/weights/best.pt...
Ultralytics 8.3.114 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 92 layers, 25,840,918 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:06<00:00,  1.93it/s]
  ax.plot(px, py.mean(1), linewidth=3, color="blue", label=f"all classes {ap[:, 0].mean():.3f} mAP@0.5")
  ret = ret.dtype.type(ret / rcount)
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(


                   all        180          0          0          0          0          0


  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index


Speed: 0.3ms preprocess, 30.4ms inference, 0.0ms loss, 1.9ms postprocess per image
Results saved to [1mruns/detect/augmented[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([], dtype=int64)
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7be998e281d0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
        


# e. Оценка качества модели с улучшенным бейзлайном

In [16]:
metrics_aug = model_aug.val()


Ultralytics 8.3.114 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 92 layers, 25,840,918 parameters, 0 gradients, 78.7 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2542.1±770.1 MB/s, size: 81.9 KB)


[34m[1mval: [0mScanning /content/smokedata/labels/val.cache... 0 images, 180 backgrounds, 0 corrupt: 100%|██████████| 180/180 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 23/23 [00:11<00:00,  2.02it/s]
  ax.plot(px, py.mean(1), linewidth=3, color="blue", label=f"all classes {ap[:, 0].mean():.3f} mAP@0.5")
  ret = ret.dtype.type(ret / rcount)
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(
  y = smooth(py.mean(0), 0.1)
  ret = um.true_divide(


                   all        180          0          0          0          0          0


  i = smooth(f1_curve.mean(0), 0.1).argmax()  # max F1 index


Speed: 2.1ms preprocess, 58.4ms inference, 0.0ms loss, 1.6ms postprocess per image
Results saved to [1mruns/detect/augmented2[0m


# 4. Имплементация алгоритма машинного обучения
# a. Самостоятельная имплементация простой классификатора на CNN в PyTorch

In [29]:
import os
import shutil

base_dir = '/content/smokedata_raw'
splits = ['Training', 'Validation', 'Testing']
classes = ['smoking', 'nosmoking']

for split in splits:
    split_dir = os.path.join(base_dir, split)
    for cls in classes:
        os.makedirs(os.path.join(split_dir, cls), exist_ok=True)

    for filename in os.listdir(split_dir):
        if filename.endswith(('.jpg', '.png')):
            src = os.path.join(split_dir, filename)

            # Класс определяем по имени файла
            label = 'smoking' if 'smoking' in filename.lower() else 'nosmoking'
            dst = os.path.join(split_dir, label, filename)

            shutil.move(src, dst)

print(" Изображения распределены по классам.")


 Изображения распределены по классам.


In [25]:
import os
import shutil

def organize_images(root_path):
    for split in ['Training', 'Validation', 'Testing']:
        full_path = os.path.join(root_path, split, split)
        smoking_path = os.path.join(full_path, 'smoking')
        nosmoking_path = os.path.join(full_path, 'nosmoking')

        os.makedirs(smoking_path, exist_ok=True)
        os.makedirs(nosmoking_path, exist_ok=True)

        for filename in os.listdir(full_path):
            src = os.path.join(full_path, filename)
            if os.path.isfile(src):
                lower = filename.lower()
                if lower.startswith('smoking'):
                    shutil.move(src, os.path.join(smoking_path, filename))
                elif lower.startswith('nosmoking') or lower.startswith('notsmoking'):
                    shutil.move(src, os.path.join(nosmoking_path, filename))

organize_images('/content/smokedata_raw')




In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_data = datasets.ImageFolder(f'/content/smokedata_raw/Training/Training', transform=transform)
val_data = datasets.ImageFolder(f'/content/smokedata_raw/Validation/Validation', transform=transform)
test_data = datasets.ImageFolder(f'/content/smokedata_raw/Testing/Testing', transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 56 * 56, 100),
            nn.ReLU(),
            nn.Linear(100, 2)
        )
    def forward(self, x):
        x = self.conv(x)
        return self.fc(x)

model_custom = SimpleCNN().to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_custom.parameters(), lr=0.001)

# Обучение модели

In [27]:
for epoch in range(3):
    model_custom.train()
    for images, labels in train_loader:
        images, labels = images.to('cuda'), labels.to('cuda')
        optimizer.zero_grad()
        outputs = model_custom(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Оценка модели

In [28]:
model_custom.eval()
y_true, y_pred = [], []
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to('cuda')
        outputs = model_custom(images)
        y_pred.extend(torch.argmax(outputs, dim=1).cpu().numpy())
        y_true.extend(labels.numpy())

print(classification_report(y_true, y_pred, target_names=["non-smoker", "smoker"]))

              precision    recall  f1-score   support

  non-smoker       0.82      0.56      0.67       112
      smoker       0.67      0.88      0.76       112

    accuracy                           0.72       224
   macro avg       0.74      0.72      0.71       224
weighted avg       0.74      0.72      0.71       224

