# Install Library

In [1]:
!pip install ultralytics -q

In [2]:
import os,os.path as osp
from glob import glob
from tqdm import tqdm
import shutil
import random

import cv2
import numpy as np

from ultralytics import YOLO
from ultralytics import settings

import yaml
from sklearn.model_selection import train_test_split

settings.update({"wandb": False})

# Data preprocessing

In [3]:
dataset_dir = '/kaggle/input/vehicle-detection/TrafficPublic'
train_dir = dataset_dir + '/train'
test_dir = dataset_dir + '/test'
save_dir = '/kaggle/working/yolov8'
val_ratio = 0.2

# Create folder dataset for yolov8
os.makedirs(save_dir,exist_ok=True)
os.makedirs(save_dir + '/' + 'images',exist_ok=True)
os.makedirs(save_dir + '/' + 'labels',exist_ok=True)

os.makedirs(save_dir + '/' + 'images/train',exist_ok=True)
os.makedirs(save_dir + '/' + 'labels/train',exist_ok=True)

os.makedirs(save_dir + '/' + 'images/val',exist_ok=True)
os.makedirs(save_dir + '/' + 'labels/val',exist_ok=True)

# List annotation file  
ann_paths = glob(osp.join(train_dir , '*.txt'))
ann_train, ann_val = train_test_split(ann_paths, test_size=val_ratio)

# Copy train images and labels folder
print('Copy images and labels in train folder')
for ann_path in tqdm(ann_train):
    filename = osp.split(ann_path[:-4])[-1]
    
    img_path = ann_path[0:-4] + '.jpg'
    save_img_path = save_dir + '/images/train/' + filename + '.jpg'
    save_label_path = save_dir + '/labels/train/' + filename + '.txt'
    
    if os.path.exists(img_path):  
        shutil.copy(img_path, save_img_path)
        shutil.copy(ann_path, save_label_path)

# Copy val images and labels folder
print('Copy images and labels in val folder')
for ann_path in tqdm(ann_val):
    filename = osp.split(ann_path[:-4])[-1]
    
    img_path = ann_path[0:-4] + '.jpg'
    save_img_path = save_dir + '/images/val/' + filename + '.jpg'
    save_label_path = save_dir + '/labels/val/' + filename + '.txt'
    
    if os.path.exists(img_path):  
        shutil.copy(img_path, save_img_path)
        shutil.copy(ann_path,  save_label_path)
    
# Create .yaml yolo format
print('Create config file dataset.yaml')
classes_list = []
index = 0

for label in open(train_dir + "/classes.txt", "r").read().split('\n'): 
    classes_list.append(str(index) + ': ' + label)
    index += 1
    
data = {
    "path" : save_dir,
    "train" : save_dir + '/' + 'images/train',
    "val" : save_dir + '/' + 'images/val',
    "names" : classes_list
}

with open('yolov8/dataset.yaml', 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False, sort_keys=False)

Copy images and labels in train folder


100%|██████████| 1459/1459 [00:11<00:00, 129.02it/s]


Copy images and labels in val folder


100%|██████████| 365/365 [00:05<00:00, 71.75it/s] 

Create config file dataset.yaml





# Training

In [None]:
model = YOLO('yolov8l.pt')  # load a pretrained model (recommended for training)
results = model.train(data='yolov8/dataset.yaml',project='detect',name = 'train', epochs=10)

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l.pt to 'yolov8l.pt'...


100%|██████████| 83.7M/83.7M [00:01<00:00, 46.3MB/s]


Ultralytics YOLOv8.2.78 🚀 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8l.pt, data=yolov8/dataset.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=detect, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, sho

100%|██████████| 755k/755k [00:00<00:00, 4.02MB/s]
2024-08-16 14:55:02,720	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-08-16 14:55:03,828	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


Overriding model.yaml nc=80 with nc=5

                   from  n    params  module                                       arguments                     
  0                  -1  1      1856  ultralytics.nn.modules.conv.Conv             [3, 64, 3, 2]                 
  1                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  2                  -1  3    279808  ultralytics.nn.modules.block.C2f             [128, 128, 3, True]           
  3                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  4                  -1  6   2101248  ultralytics.nn.modules.block.C2f             [256, 256, 6, True]           
  5                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256, 512, 3, 2]              
  6                  -1  6   8396800  ultralytics.nn.modules.block.C2f             [512, 512, 6, True]           
  7                  -1  1   2360320  ultralytics

100%|██████████| 6.25M/6.25M [00:00<00:00, 20.1MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /kaggle/working/yolov8/labels/train... 1458 images, 1 backgrounds, 0 corrupt: 100%|██████████| 1458/1458 [00:01<00:00, 1239.79it/s]


[34m[1mtrain: [0mNew cache created: /kaggle/working/yolov8/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()
[34m[1mval: [0mScanning /kaggle/working/yolov8/labels/val... 365 images, 0 backgrounds, 0 corrupt: 100%|██████████| 365/365 [00:00<00:00, 1156.30it/s]

[34m[1mval: [0mNew cache created: /kaggle/working/yolov8/labels/val.cache





Plotting labels to detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001111, momentum=0.9) with parameter groups 97 weight(decay=0.0), 104 weight(decay=0.0005), 103 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mdetect/train[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()
  self.pid = os.fork()



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      11.4G      1.999      2.414      1.721         91        640:  30%|███       | 28/92 [00:23<00:49,  1.30it/s]

# Inference

In [None]:
answer_list = []
# Use the model
model = YOLO('/kaggle/working/detect/train2/weights/best.pt')

for file in tqdm(glob(test_dir + '/*')):
    
    bbox_list = []
    cls_list = []
    scores_list = []

    # Predict on an image
    results = model(file, verbose=False)
    # Process results list
    for result in results:
        
        boxes = result.boxes
        for box in boxes:
            coor_box = box.xyxy.cpu().numpy().tolist()  # Boxes object for bounding box outputs
            class_box = box.cls.cpu().numpy().tolist()
            class_scores = box.conf.cpu().numpy().tolist()
        
            bbox_list.append(coor_box[0])
            cls_list.append(int(class_box[0]))
            scores_list.append(class_scores[0])
        
        value = (file.split('/')[-1], bbox_list, cls_list,scores_list )

    answer_list.append(value)

In [None]:
import pandas as pd
column_name = ['id','boxes', 'labels', 'scores']
xml_df = pd.DataFrame(answer_list, columns=column_name)
xml_df.to_csv('/kaggle/working/submission-pretrain_30_percent_finetune.csv', index=None)