# Dataset preparation

In [1]:
import cv2 as cv
import os
import shutil
import numpy as np
import plotly.express as px
import pandas as pd

import plotly.io as pio
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
from ultralytics.utils.ops import xyxy2xywhn

pio.renderers.default = 'png'
pio.templates.default = 'plotly_dark'

In [2]:
dataset_path = '/mnt/s/CV/STR'
images_path = os.path.join(dataset_path, 'images')

df = pd.read_csv(os.path.join(dataset_path, 'annot.csv'), index_col=0)
df.head()

Unnamed: 0,id,image_id,bbox,utf8_string,points,area
0,a4ea732cd3d5948a_1,a4ea732cd3d5948a,"[525.83, 3.4, 197.64, 33.94]",Performance,"[525.83, 3.4, 723.47, 7.29, 722.76, 36.99, 525...",6707.9
1,a4ea732cd3d5948a_2,a4ea732cd3d5948a,"[534.67, 64.68, 91.22, 38.19]",Sport,"[535.73, 64.68, 623.41, 67.51, 625.89, 102.87,...",3483.69
2,a4ea732cd3d5948a_3,a4ea732cd3d5948a,"[626.95, 63.62, 96.52, 31.82]",Watch,"[626.95, 63.62, 721.7, 63.62, 723.47, 95.44, 6...",3071.27
3,a4ea732cd3d5948a_4,a4ea732cd3d5948a,"[577.4, 141.87, 147.13, 43.1]",...period.,"[580.02, 143.61, 724.53, 141.87, 723.66, 184.9...",6341.3
4,a4ea732cd3d5948a_5,a4ea732cd3d5948a,"[391.03, 163.9, 60.82, 38.65]",.,"[395.2, 163.9, 451.85, 191.94, 445.59, 202.55,...",2350.69


In [3]:
unique_images = pd.Series(df['image_id'].unique())

In [4]:
train_imgs, val_imgs = train_test_split(unique_images, test_size=0.2, random_state=42)

In [5]:
train_df = df[df['image_id'].isin(train_imgs)].copy()
val_df = df[df['image_id'].isin(val_imgs)].copy()

In [6]:
yolov8_dataset_path = '/mnt/s/CV/STR_yolo'
yolov8_images_path = os.path.join(yolov8_dataset_path, 'images')
yolov8_labels_path = os.path.join(yolov8_dataset_path, 'labels')

In [7]:
def create_yolo_dataset(df, yolov8_images_path, yolov8_labels_path, sample):
    for unique_image in tqdm(df['image_id'].unique()):
        if not os.path.exists(os.path.join(yolov8_images_path, sample, unique_image + '.jpg')):
            shutil.copy(os.path.join(images_path, unique_image + '.jpg'), os.path.join(yolov8_images_path, sample, unique_image + '.jpg'))
        label_file = os.path.join(yolov8_labels_path, sample, unique_image + '.txt')
        height, width = cv.imread(os.path.join(images_path, unique_image + '.jpg')).shape[:2]
        with open(label_file, 'w') as f:
            for _, row in df[df['image_id'] == unique_image].iterrows():
                bbox = np.array(row['bbox'].strip('[]').split(', '), dtype=float)
                xyxy = np.array([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
                xywhn = xyxy2xywhn(xyxy, w=width, h=height, clip=True)
                f.writelines(f'0 {xywhn[0]} {xywhn[1]} {xywhn[2]} {xywhn[3]}\n')
            f.close()

In [8]:
# create_yolo_dataset(train_df, yolov8_images_path, yolov8_labels_path, 'train')
# create_yolo_dataset(val_df, yolov8_images_path, yolov8_labels_path, 'val')

In [9]:
model = YOLO("yolov8l.pt", task='detect')

In [10]:
output = model.train(data='yolo.yaml', epochs=3, imgsz=640, batch=4)

New https://pypi.org/project/ultralytics/8.2.35 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.1.14 🚀 Python-3.11.5 torch-2.3.0+cu121 CUDA:0 (NVIDIA GeForce RTX 3080 Ti, 12288MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8l.pt, data=yolo.yaml, epochs=3, time=None, patience=50, batch=4, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=Fals

2024-06-19 22:13:03.689506: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-19 22:13:03.689546: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-19 22:13:03.690015: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1      1856  ultralytics.nn.modules.conv.Conv             [3, 64, 3, 2]                 
  1                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  2                  -1  3    279808  ultralytics.nn.modules.block.C2f             [128, 128, 3, True]           
  3                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  4                  -1  6   2101248  ultralytics.nn.modules.block.C2f             [256, 256, 6, True]           
  5                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256, 512, 3, 2]              
  6                  -1  6   8396800  ultralytics.nn.modules.block.C2f             [512, 512, 6, True]           
  7                  -1  1   2360320  ultralytics


Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)

[34m[1mtrain: [0mScanning /mnt/s/CV/STR_yolo/labels/train.cache... 17422 images, 0 backgrounds, 0 corrupt: 100%|██████████| 17422/17422 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /mnt/s/CV/STR_yolo/labels/val.cache... 4356 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4356/4356 [00:00<?, ?it/s]






Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 97 weight(decay=0.0), 104 weight(decay=0.0005), 103 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 3 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)

        1/3      10.7G      1.695      1.188      1.142         64        640: 100%|██████████| 4356/4356 [07:21<00:00,  9.88it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 545/545 [00:50<00:00, 10.87it/s]


                   all       4356     201746      0.682      0.473      0.519      0.288

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/3       9.3G      1.575      1.062      1.093        106        640: 100%|██████████| 4356/4356 [06:55<00:00, 10.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 545/545 [00:40<00:00, 13.39it/s]


                   all       4356     201746      0.711      0.505      0.562      0.325

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/3      8.32G      1.462     0.9559      1.053         58        640: 100%|██████████| 4356/4356 [06:47<00:00, 10.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 545/545 [02:10<00:00,  4.17it/s]


                   all       4356     201746      0.737      0.544      0.607      0.357

3 epochs completed in 0.415 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 87.6MB
Optimizer stripped from runs/detect/train/weights/best.pt, 87.6MB

Validating runs/detect/train/weights/best.pt...
Ultralytics YOLOv8.1.14 🚀 Python-3.11.5 torch-2.3.0+cu121 CUDA:0 (NVIDIA GeForce RTX 3080 Ti, 12288MiB)
Model summary (fused): 268 layers, 43607379 parameters, 0 gradients, 164.8 GFLOPs



Plan failed with a cudnnException: CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR: cudnnFinalize Descriptor Failed cudnn_status: CUDNN_STATUS_NOT_SUPPORTED (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:919.)

                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 545/545 [01:08<00:00,  7.99it/s]


                   all       4356     201746      0.736      0.544      0.607      0.356
Speed: 0.1ms preprocess, 4.9ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1mruns/detect/train[0m


In [18]:
prediction = model.predict(images_path + '/fdefa17d92c7a54a.jpg', save=True)


image 1/1 /mnt/s/CV/STR/images/fdefa17d92c7a54a.jpg: 640x640 56 texts, 198.6ms
Speed: 2.8ms preprocess, 198.6ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1mruns/detect/train5[0m
