# Install Library

In [None]:
!pip install ultralytics -q

In [None]:
import os,os.path as osp
from glob import glob
from tqdm import tqdm
import shutil
import random

import cv2
import numpy as np

from ultralytics import YOLO
from ultralytics import settings

import yaml
from sklearn.model_selection import train_test_split

settings.update({"wandb": False})

# Data preprocessing

In [None]:
dataset_dir = '/kaggle/input/th-road-safety/TrafficHackathon'
train_dir = dataset_dir + '/train'
test_dir = dataset_dir + '/test'
save_dir = '/kaggle/working/yolov8'
val_ratio = 0.2

# Create folder dataset for yolov8
os.makedirs(save_dir,exist_ok=True)
os.makedirs(save_dir + '/' + 'images',exist_ok=True)
os.makedirs(save_dir + '/' + 'labels',exist_ok=True)

os.makedirs(save_dir + '/' + 'images/train',exist_ok=True)
os.makedirs(save_dir + '/' + 'labels/train',exist_ok=True)

os.makedirs(save_dir + '/' + 'images/val',exist_ok=True)
os.makedirs(save_dir + '/' + 'labels/val',exist_ok=True)

# List annotation file  
ann_paths = glob(osp.join(train_dir , '*.txt'))
ann_train, ann_val = train_test_split(ann_paths, test_size=val_ratio)

# Copy train images and labels folder
print('Copy images and labels in train folder')
for ann_path in tqdm(ann_train):
    filename = osp.split(ann_path[:-4])[-1]
    
    img_path = ann_path[0:-4] + '.jpg'
    save_img_path = save_dir + '/images/train/' + filename + '.jpg'
    save_label_path = save_dir + '/labels/train/' + filename + '.txt'
    
    if os.path.exists(img_path):  
        shutil.copy(img_path, save_img_path)
        shutil.copy(ann_path, save_label_path)

# Copy val images and labels folder
print('Copy images and labels in val folder')
for ann_path in tqdm(ann_val):
    filename = osp.split(ann_path[:-4])[-1]
    
    img_path = ann_path[0:-4] + '.jpg'
    save_img_path = save_dir + '/images/val/' + filename + '.jpg'
    save_label_path = save_dir + '/labels/val/' + filename + '.txt'
    
    if os.path.exists(img_path):  
        shutil.copy(img_path, save_img_path)
        shutil.copy(ann_path,  save_label_path)
    
# Create .yaml yolo format
print('Create config file dataset.yaml')
classes_list = []
index = 0

for label in open(train_dir + "/classes.txt", "r").read().split('\n'): 
    classes_list.append(str(index) + ': ' + label)
    index += 1
    
data = {
    "path" : save_dir,
    "train" : save_dir + '/' + 'images/train',
    "val" : save_dir + '/' + 'images/val',
    "names" : classes_list
}

with open('yolov8/dataset.yaml', 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False, sort_keys=False)

# Training

In [None]:
model = YOLO('yolov10l.pt')  # load a pretrained model (recommended for training)
results = model.train(data='yolov8/dataset.yaml',project='detect',name = 'train', epochs=200, patience=50)

# Inference

In [None]:
answer_list = []
# Use the model
model = YOLO('/kaggle/working/detect/train/weights/best.pt')

for file in tqdm(glob(test_dir + '/*')):
    
    bbox_list = []
    cls_list = []
    scores_list = []

    # Predict on an image
    results = model(file, verbose=False)
    # Process results list
    for result in results:
        
        boxes = result.boxes
        for box in boxes:
            coor_box = box.xyxy.cpu().numpy().tolist()  # Boxes object for bounding box outputs
            class_box = box.cls.cpu().numpy().tolist()
            class_scores = box.conf.cpu().numpy().tolist()
        
            bbox_list.append(coor_box[0])
            cls_list.append(int(class_box[0]))
            scores_list.append(class_scores[0])
        
        value = (file.split('/')[-1], bbox_list, cls_list,scores_list )

    answer_list.append(value)

In [None]:
import pandas as pd
column_name = ['id','boxes', 'labels', 'scores']
xml_df = pd.DataFrame(answer_list, columns=column_name)
xml_df.to_csv('/kaggle/working/submission.csv', index=None)