In [8]:
# setup chunk

## for deep learning architecture and evaluation
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchsummary import summary
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

## for plotting
from PIL import Image
import matplotlib.pyplot as plt

## for preprocessing
import os
import numpy as np
import random
import math
import pandas as pd

## set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

from ultralytics import YOLO
import cv2

cpu


In [29]:
image_folder = '../../02_data/Google_OpenImages/data_yolo'
annotation_file = '../../02_data/Google_OpenImages/filtered_person_annotations.csv'

df = pd.read_csv(annotation_file)

# Convert bounding boxes to YOLO format
def convert_bbox_to_yolo_format(xmin, ymin, xmax, ymax):
    center_x = (xmin + xmax) / 2
    center_y = (ymin + ymax) / 2
    w = xmax - xmin
    h = ymax - ymin
    return center_x, center_y, w, h

# Process images in train and validation folders
for split in ['train', 'validation']:
    split_image_folder = os.path.join(image_folder, split, 'images')
    split_label_folder = os.path.join(image_folder, split, 'labels')

    if not os.path.exists(split_label_folder):
        os.makedirs(split_label_folder)

    # Process each image in the split folder
    for image_file in os.listdir(split_image_folder):
        if not image_file.lower().endswith(('.jpg', '.png')):  # Adjust if needed
            continue

        image_id = os.path.splitext(image_file)[0]
        label_file = os.path.join(split_label_folder, image_id + '.txt')

        # Read the image to get its dimensions
        image_path = os.path.join(split_image_folder, image_file)
        img = cv2.imread(image_path)
        img_height, img_width = img.shape[:2]

        # Filter annotations for the current image
        image_annotations = df[df['ImageID'] == image_id]

        with open(label_file, 'w') as f:
            for _, row in image_annotations.iterrows():
                xmin, ymin, xmax, ymax = row[['XMin', 'YMin', 'XMax', 'YMax']]
                class_id = 0  # Assuming single class (person)
                center_x, center_y, w, h = convert_bbox_to_yolo_format(xmin, ymin, xmax, ymax)
                f.write(f"{class_id} {center_x} {center_y} {w} {h}\n")

print("Annotation files have been created.")

Annotation files have been created.


In [31]:
model = YOLO("yolov8n.pt") # .pt means using pretrained weights, this is the nano yolo model from ultralytics
results = model.train(data="config.yaml", epochs=10, imgsz=640, device="cpu", name="yolov8_person", lr0=0.001)

New https://pypi.org/project/ultralytics/8.2.61 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.2.60 🚀 Python-3.11.5 torch-2.1.0.post100 CPU (Apple M2 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=config.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cpu, workers=8, project=None, name=yolov8_person, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frame

[34m[1mtrain: [0mScanning /Users/maxweiland/Desktop/SEDS/2nd/DeepL/CNN-to-Evaluate-Social-Distancing-Measures/02_data/Google_OpenImages/data_yolo/train/labels.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /Users/maxweiland/Desktop/SEDS/2nd/DeepL/CNN-to-Evaluate-Social-Distancing-Measures/02_data/Google_OpenImages/data_yolo/validation/labels.cache... 100 images, 0 backgrounds, 0 corrupt: 100%|██████████| 100/100 [00:00<?, ?it/s]

Plotting labels to runs/detect/yolov8_person/labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.001' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/yolov8_person[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      1.414      2.661      1.304         36        640: 100%|██████████| 13/13 [02:09<00:00,  9.99s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.61s/it]

                   all        100        359     0.0102       0.85      0.315       0.18






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      1.427       2.03      1.385         36        640: 100%|██████████| 13/13 [02:10<00:00, 10.02s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.63s/it]

                   all        100        359    0.00987      0.825      0.266      0.146






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G       1.43      2.012      1.374         49        640: 100%|██████████| 13/13 [02:10<00:00, 10.06s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.71s/it]

                   all        100        359     0.0152      0.657      0.175     0.0826






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G      1.467      1.997      1.353         23        640: 100%|██████████| 13/13 [02:09<00:00,  9.96s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.66s/it]

                   all        100        359      0.607       0.15      0.241      0.118






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G      1.429      2.051      1.377         38        640: 100%|██████████| 13/13 [02:06<00:00,  9.76s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.58s/it]

                   all        100        359      0.475      0.173      0.196      0.102






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G      1.464      1.936      1.424         16        640: 100%|██████████| 13/13 [02:09<00:00,  9.94s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.63s/it]

                   all        100        359      0.413      0.259       0.26      0.127






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G       1.45      1.972      1.392         22        640: 100%|██████████| 13/13 [02:06<00:00,  9.73s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.58s/it]

                   all        100        359       0.44      0.306      0.295      0.135






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G      1.468      1.917      1.385         28        640: 100%|██████████| 13/13 [02:06<00:00,  9.74s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.58s/it]

                   all        100        359      0.473      0.393       0.38      0.191






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G      1.379      1.789      1.347         15        640: 100%|██████████| 13/13 [02:08<00:00,  9.88s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.60s/it]

                   all        100        359      0.588      0.413      0.466      0.256






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G      1.342      1.708       1.34         31        640: 100%|██████████| 13/13 [02:08<00:00,  9.87s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:22<00:00,  5.56s/it]

                   all        100        359      0.545      0.443      0.472      0.261






10 epochs completed in 0.422 hours.
Optimizer stripped from runs/detect/yolov8_person/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/yolov8_person/weights/best.pt, 6.2MB

Validating runs/detect/yolov8_person/weights/best.pt...
Ultralytics YOLOv8.2.60 🚀 Python-3.11.5 torch-2.1.0.post100 CPU (Apple M2 Pro)
Model summary (fused): 168 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 4/4 [00:20<00:00,  5.24s/it]


                   all        100        359      0.547      0.443      0.472      0.261
Speed: 1.1ms preprocess, 201.0ms inference, 0.0ms loss, 2.0ms postprocess per image
Results saved to [1mruns/detect/yolov8_person[0m
