Подготавливаем датасет

In [1]:
import os
import yaml
from ultralytics import YOLO
import shutil
from sklearn.model_selection import train_test_split

def prepare_dataset():
    base_path = 'hacaton'
    images_dir = os.path.join(base_path, 'train', 'images')
    labels_dir = os.path.join(base_path, 'train', 'labels')
    
    datasets_dir = os.path.join(base_path, 'datasets')
    os.makedirs(os.path.join(datasets_dir, 'images', 'train'), exist_ok=True)
    os.makedirs(os.path.join(datasets_dir, 'images', 'val'), exist_ok=True)
    os.makedirs(os.path.join(datasets_dir, 'labels', 'train'), exist_ok=True)
    os.makedirs(os.path.join(datasets_dir, 'labels', 'val'), exist_ok=True)
    
    image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
    
    train_files, val_files = train_test_split(image_files, test_size=0.2, random_state=42)
    
    for file in train_files:
        shutil.copy(os.path.join(images_dir, file), 
                   os.path.join(datasets_dir, 'images', 'train', file))
        label_file = os.path.splitext(file)[0] + '.txt'
        if os.path.exists(os.path.join(labels_dir, label_file)):
            shutil.copy(os.path.join(labels_dir, label_file), 
                       os.path.join(datasets_dir, 'labels', 'train', label_file))
    
    for file in val_files:
        shutil.copy(os.path.join(images_dir, file), 
                   os.path.join(datasets_dir, 'images', 'val', file))
        label_file = os.path.splitext(file)[0] + '.txt'
        if os.path.exists(os.path.join(labels_dir, label_file)):
            shutil.copy(os.path.join(labels_dir, label_file), 
                       os.path.join(datasets_dir, 'labels', 'val', label_file))
    
    print(f"Train: {len(train_files)}, Val: {len(val_files)}")
    return datasets_dir

datasets_dir = prepare_dataset()

Train: 72, Val: 19


Создаём YAML

In [2]:
dataset_yaml = f"""
path: {os.path.abspath(datasets_dir)}
train: images/train
val: images/val

nc: 1
names: ['person']
"""

with open(os.path.join(datasets_dir, 'dataset.yaml'), 'w') as f:
    f.write(dataset_yaml)

print("Dataset YAML created!")

Dataset YAML created!


Дополняем датасет: Загружаем кастомные фотографии пустых аудиторий, фотографии локтей, стульев и СТОЯЧИХ людей (индефицируем их как преподователей)

In [3]:
import os
import shutil
from ultralytics import YOLO

def add_standing_people_to_negatives():
    
    datasets_dir = 'hacaton/datasets'
    ignore_dir = 'ignore'
    
    neg_images_dir = os.path.join(datasets_dir, 'images', 'train')
    neg_labels_dir = os.path.join(datasets_dir, 'labels', 'train')
    
    standing_people_images = []
    if os.path.exists(ignore_dir):
        for file in os.listdir(ignore_dir):
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                full_path = os.path.join(ignore_dir, file)
                standing_people_images.append(full_path)
    
    for img_path in standing_people_images:
        if os.path.exists(img_path):
            img_name = os.path.basename(img_path)
            name, ext = os.path.splitext(img_name)
            new_img_name = f"{name}_teach{ext}"

            shutil.copy(img_path, os.path.join(neg_images_dir, new_img_name))

            label_name = f"{name}_teach.txt"
            label_path = os.path.join(neg_labels_dir, label_name)
            
            with open(label_path, 'w') as f:
                pass

def add_negative_samples():
    
    datasets_dir = 'hacaton/datasets'

    neg_images_dir = os.path.join(datasets_dir, 'images', 'train')
    neg_labels_dir = os.path.join(datasets_dir, 'labels', 'train')
    
    negative_images = []
    ignore_dir = 'ignore_image'

    for root, dirs, files in os.walk(ignore_dir):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                full_path = os.path.join(root, file)
                negative_images.append(full_path)
    
    for img_path in negative_images:
        if os.path.exists(img_path):
            img_name = os.path.basename(img_path)
            shutil.copy(img_path, os.path.join(neg_images_dir, img_name))
            
            # Создаем ПУСТОЙ .txt файл
            label_name = os.path.splitext(img_name)[0] + '.txt'
            label_path = os.path.join(neg_labels_dir, label_name)
            
            with open(label_path, 'w') as f:
                pass
    print(negative_images)
    print("Негативные примеры добавлены!")

add_standing_people_to_negatives()
add_negative_samples()

['ignore_image\\chairs\\1.png', 'ignore_image\\chairs\\10.png', 'ignore_image\\chairs\\11.png', 'ignore_image\\chairs\\12.png', 'ignore_image\\chairs\\13.png', 'ignore_image\\chairs\\14.png', 'ignore_image\\chairs\\2.png', 'ignore_image\\chairs\\3.png', 'ignore_image\\chairs\\4.png', 'ignore_image\\chairs\\5.png', 'ignore_image\\chairs\\6.png', 'ignore_image\\chairs\\7.png', 'ignore_image\\chairs\\8.png', 'ignore_image\\chairs\\9.png', 'ignore_image\\elbows\\1.png', 'ignore_image\\elbows\\2.png', 'ignore_image\\elbows\\3.png', 'ignore_image\\elbows\\4.png']
Негативные примеры добавлены!


Обучаем модель на базе yolov8n.
Самая легкая модель для сохранения скорости

Проверяем валидацию

In [None]:
def train_improved_model():
    model = YOLO('yolo/yolov8n.pt')
    
    results = model.train(
        data=os.path.join(datasets_dir, 'dataset.yaml'),
        epochs=100,
        imgsz=1024,
        batch=4,
        device='cpu',
        workers=0,
        lr0=0.001,
        lrf=0.01,
        momentum=0.9,
        weight_decay=0.0005,
        warmup_epochs=3.0,
        warmup_momentum=0.8,
        warmup_bias_lr=0.1,
        box=7.5,
        cls=0.5,
        dfl=1.5,
        pose=12.0,
        kobj=1.0,
        label_smoothing=0.0,
        nbs=64,
        hsv_h=0.015,
        hsv_s=0.7,
        hsv_v=0.4,
        degrees=0.0,
        translate=0.1,
        scale=0.5,
        shear=0.0,
        perspective=0.0,
        flipud=0.0,
        fliplr=0.5,
        mosaic=1.0,
        mixup=0.0,
        copy_paste=0.0,
        auto_augment='randaugment',
        erasing=0.4,
        crop_fraction=1.0,
        patience=20,
        project='hacaton_results_improved',
        name='yolov8s_improved_v2',
        exist_ok=True,
        single_cls=True,
        optimizer='AdamW',
        verbose=True,
        seed=42,
        deterministic=True,
        plots=True,
        save=True,
        save_period=-1,
        val=True,
        amp=False
    )
    
    return model, results

model_improved, results_improved = train_improved_model()

In [10]:
def validate_trained_model():
    best_model_path = 'hacaton_results_improved/yolov8s_improved_v2/weights/best.pt'
    
    if os.path.exists(best_model_path):
        model = YOLO(best_model_path)
        
        metrics = model.val(
            data=os.path.join(datasets_dir, 'dataset.yaml'),
            batch=4,
            device='cpu',
            verbose=True
        )
        
        print(f"Validation mAP50: {metrics.box.map50:.4f}")
        print(f"Validation mAP50-95: {metrics.box.map:.4f}")
        return metrics
    else:
        print("Trained model not found yet")
        return None

metrics = validate_trained_model()

Ultralytics 8.3.229  Python-3.13.7 torch-2.8.0+cpu CPU (Intel Core 5 220H)
Model summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 1539.9484.0 MB/s, size: 460.4 KB)
[K[34m[1mval: [0mScanning D:\ml\hacaton\datasets\labels\val.cache... 19 images, 3 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 19/19 46.9Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 5/5 1.2it/s 4.1s1.2s
                   all         19        941      0.785      0.631      0.709      0.332
Speed: 1.2ms preprocess, 85.2ms inference, 0.0ms loss, 106.7ms postprocess per image
Results saved to [1mD:\ml\runs\detect\val2[0m
Validation mAP50: 0.7087
Validation mAP50-95: 0.3317


Прогоняем модель по тестовой выборке

In [9]:
import pandas as pd
from tqdm import tqdm
import os
from ultralytics import YOLO
import cv2

def create_ensemble_submission():
    model2 = YOLO('hacaton_results_improved/yolov8s_improved_v2/weights/best.pt')
    test_images_dir = os.path.join('hacaton', 'test_images')
    test_images = [f for f in os.listdir(test_images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
    
    output_dir = 'ensemble_results'
    os.makedirs(output_dir, exist_ok=True)
    
    results = []
    
    for image_file in tqdm(test_images, desc="Ensemble processing"):
        image_path = os.path.join(test_images_dir, image_file)
        
        original_image = cv2.imread(image_path)
        original_image_rgb = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

        results2 = model2.predict(
            source=image_path,
            conf=0.25,
            iou=0.3,
            imgsz=1024,
            device='cpu', 
            verbose=False,
            max_det=800,
            augment=True,
            save=False,
            classes=[0],
            agnostic_nms=False,
            half=False,
            nms=True,
            mode='predict',
        )

        face_boxes = []
        
        for result in results2:
            if result.boxes is not None:
                for box in result.boxes:
                    x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                    conf = box.conf[0].cpu().numpy()
                    face_boxes.append({
                        'bbox': [x1, y1, x2, y2],
                        'confidence': conf
                    })
        
        unique_people_count = len(face_boxes)
        
        result_image = original_image_rgb.copy()
        
        # Визуализация лиц
        for i, face in enumerate(face_boxes):
            x1, y1, x2, y2 = map(int, face['bbox'])
            confidence = face['confidence']
            
            color = (0, 255, 0)  # Зеленый для лиц
            label = f"Face {confidence:.3f}"
            
            cv2.rectangle(result_image, (x1, y1), (x2, y2), color, 2)
            cv2.putText(result_image, label, (x1, y1-10), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
        
        # Статистика на изображении
        cv2.putText(result_image, f"Faces: {len(face_boxes)}", (20, 30), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(result_image, f"Total unique: {unique_people_count}", (20, 60), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
        
        # Сохраняем результат
        result_filename = f"{os.path.splitext(image_file)[0]}_ensemble.jpg"
        result_path = os.path.join(output_dir, result_filename)
        cv2.imwrite(result_path, cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR))
        
        # Записываем в CSV
        img_id = os.path.splitext(image_file)[0]
        results.append({'IMG_ID': img_id, 'label': unique_people_count})
        
        print(f"{image_file}: Лиц={len(face_boxes)}, Уникальных={unique_people_count}")
    
    df = pd.DataFrame(results)
    submission_path = 'submission_ensemble.csv'
    df.to_csv(submission_path, index=False)
    
    print(f"\nEnsemble submission saved to {submission_path}")
    print("Statistics:")
    print(f"Min: {df['label'].min()}, Max: {df['label'].max()}, Mean: {df['label'].mean():.2f}")
    print(f"Total unique people detected: {df['label'].sum()}")
    
    return df

ensemble_result = create_ensemble_submission()

Ensemble processing:   4%|▍         | 2/50 [00:00<00:16,  2.90it/s]

0.jpg: Лиц=18, Уникальных=18
1.jpg: Лиц=16, Уникальных=16


Ensemble processing:   6%|▌         | 3/50 [00:01<00:14,  3.20it/s]

10.jpg: Лиц=12, Уникальных=12


Ensemble processing:   8%|▊         | 4/50 [00:01<00:12,  3.56it/s]

11.jpg: Лиц=122, Уникальных=122


Ensemble processing:  10%|█         | 5/50 [00:01<00:11,  3.81it/s]

12.jpg: Лиц=15, Уникальных=15


Ensemble processing:  12%|█▏        | 6/50 [00:01<00:11,  3.90it/s]

13.jpg: Лиц=87, Уникальных=87


Ensemble processing:  14%|█▍        | 7/50 [00:01<00:11,  3.85it/s]

14.jpg: Лиц=31, Уникальных=31


Ensemble processing:  16%|█▌        | 8/50 [00:02<00:11,  3.73it/s]

15.jpg: Лиц=66, Уникальных=66


Ensemble processing:  18%|█▊        | 9/50 [00:02<00:10,  3.84it/s]

16.jpg: Лиц=144, Уникальных=144


Ensemble processing:  20%|██        | 10/50 [00:02<00:10,  3.87it/s]

17.jpg: Лиц=32, Уникальных=32


Ensemble processing:  22%|██▏       | 11/50 [00:03<00:09,  3.94it/s]

18.jpg: Лиц=122, Уникальных=122


Ensemble processing:  24%|██▍       | 12/50 [00:03<00:09,  4.05it/s]

19.jpg: Лиц=81, Уникальных=81


Ensemble processing:  26%|██▌       | 13/50 [00:03<00:08,  4.12it/s]

2.jpg: Лиц=27, Уникальных=27
20.jpg: Лиц=10, Уникальных=10


Ensemble processing:  30%|███       | 15/50 [00:03<00:08,  4.35it/s]

21.jpg: Лиц=110, Уникальных=110
22.jpg: Лиц=43, Уникальных=43


Ensemble processing:  34%|███▍      | 17/50 [00:04<00:07,  4.47it/s]

23.jpg: Лиц=37, Уникальных=37


Ensemble processing:  36%|███▌      | 18/50 [00:04<00:08,  3.75it/s]

24.jpg: Лиц=33, Уникальных=33


Ensemble processing:  38%|███▊      | 19/50 [00:05<00:08,  3.50it/s]

25.jpg: Лиц=18, Уникальных=18


Ensemble processing:  40%|████      | 20/50 [00:05<00:08,  3.46it/s]

26.jpg: Лиц=59, Уникальных=59


Ensemble processing:  42%|████▏     | 21/50 [00:05<00:08,  3.29it/s]

27.jpg: Лиц=46, Уникальных=46


Ensemble processing:  44%|████▍     | 22/50 [00:06<00:08,  3.17it/s]

28.jpg: Лиц=53, Уникальных=53


Ensemble processing:  46%|████▌     | 23/50 [00:06<00:08,  3.19it/s]

29.jpg: Лиц=87, Уникальных=87


Ensemble processing:  48%|████▊     | 24/50 [00:06<00:08,  2.93it/s]

3.jpg: Лиц=22, Уникальных=22


Ensemble processing:  50%|█████     | 25/50 [00:07<00:08,  3.12it/s]

30.jpg: Лиц=6, Уникальных=6


Ensemble processing:  52%|█████▏    | 26/50 [00:07<00:07,  3.13it/s]

31.jpg: Лиц=108, Уникальных=108


Ensemble processing:  54%|█████▍    | 27/50 [00:07<00:07,  3.21it/s]

32.jpg: Лиц=41, Уникальных=41


Ensemble processing:  56%|█████▌    | 28/50 [00:07<00:06,  3.28it/s]

33.jpg: Лиц=62, Уникальных=62


Ensemble processing:  58%|█████▊    | 29/50 [00:08<00:06,  3.36it/s]

34.jpg: Лиц=161, Уникальных=161


Ensemble processing:  60%|██████    | 30/50 [00:08<00:05,  3.52it/s]

35.jpg: Лиц=35, Уникальных=35


Ensemble processing:  62%|██████▏   | 31/50 [00:08<00:05,  3.38it/s]

36.jpg: Лиц=57, Уникальных=57


Ensemble processing:  64%|██████▍   | 32/50 [00:09<00:05,  3.38it/s]

37.jpg: Лиц=37, Уникальных=37


Ensemble processing:  66%|██████▌   | 33/50 [00:09<00:05,  3.34it/s]

38.jpg: Лиц=11, Уникальных=11


Ensemble processing:  68%|██████▊   | 34/50 [00:09<00:05,  2.99it/s]

39.jpg: Лиц=133, Уникальных=133


Ensemble processing:  70%|███████   | 35/50 [00:10<00:04,  3.12it/s]

4.jpg: Лиц=57, Уникальных=57


Ensemble processing:  72%|███████▏  | 36/50 [00:10<00:04,  3.11it/s]

40.jpg: Лиц=87, Уникальных=87


Ensemble processing:  74%|███████▍  | 37/50 [00:10<00:04,  3.16it/s]

41.jpg: Лиц=11, Уникальных=11


Ensemble processing:  76%|███████▌  | 38/50 [00:11<00:03,  3.12it/s]

42.jpg: Лиц=103, Уникальных=103


Ensemble processing:  78%|███████▊  | 39/50 [00:11<00:03,  3.28it/s]

43.jpg: Лиц=38, Уникальных=38


Ensemble processing:  80%|████████  | 40/50 [00:11<00:02,  3.41it/s]

44.jpg: Лиц=17, Уникальных=17


Ensemble processing:  82%|████████▏ | 41/50 [00:11<00:02,  3.30it/s]

45.jpg: Лиц=0, Уникальных=0


Ensemble processing:  84%|████████▍ | 42/50 [00:12<00:02,  3.33it/s]

46.jpg: Лиц=173, Уникальных=173


Ensemble processing:  86%|████████▌ | 43/50 [00:12<00:02,  3.45it/s]

47.jpg: Лиц=30, Уникальных=30


Ensemble processing:  88%|████████▊ | 44/50 [00:12<00:01,  3.41it/s]

48.jpg: Лиц=57, Уникальных=57


Ensemble processing:  90%|█████████ | 45/50 [00:13<00:01,  3.57it/s]

49.jpg: Лиц=57, Уникальных=57


Ensemble processing:  92%|█████████▏| 46/50 [00:13<00:01,  3.07it/s]

5.jpg: Лиц=8, Уникальных=8


Ensemble processing:  94%|█████████▍| 47/50 [00:13<00:00,  3.36it/s]

6.jpg: Лиц=32, Уникальных=32


Ensemble processing:  96%|█████████▌| 48/50 [00:13<00:00,  3.32it/s]

7.jpg: Лиц=13, Уникальных=13


Ensemble processing:  98%|█████████▊| 49/50 [00:14<00:00,  3.37it/s]

8.jpg: Лиц=19, Уникальных=19


Ensemble processing: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s]

9.jpg: Лиц=31, Уникальных=31

Ensemble submission saved to submission_ensemble.csv
Statistics:
Min: 0, Max: 173, Mean: 53.50
Total unique people detected: 2675



