# Import Libraries

In [1]:
import os
import shutil
import random
from tqdm.notebook import tqdm

In [2]:
train_path_img = "./yolo_data_billboards/images/train/"
train_path_label = "./yolo_data_billboards/labels/train/"
val_path_img = "./yolo_data_billboards/images/val/"
val_path_label = "./yolo_data_billboards/labels/val/"
test_path = "./yolo_data_billboards/test"

In [5]:
'''
Split the dataset into train and test and creates the train.txt and test.tx with
the respective path of the images in each folder
'''

def train_test_split(path, neg_path=None, split=0.2):
    print("------ PROCESS STARTED -------")

    files = list(set([name[:-4] for name in os.listdir(path)]))  ## removing duplicate names i.e. counting only number of images

    print(f"--- This folder has a total number of {len(files)} images---")
    random.seed(42)
    random.shuffle(files)

    test_size = int(len(files) * split)
    train_size = len(files) - test_size

    ## creating required directories
    os.makedirs(train_path_img, exist_ok=True)
    os.makedirs(train_path_label, exist_ok=True)
    os.makedirs(val_path_img, exist_ok=True)
    os.makedirs(val_path_label, exist_ok=True)

    ### ----------- copying images to train folder
    for filex in (files[:train_size]):
        if filex == 'classes':
            continue
        shutil.copy2(path + filex + '.jpg', f"{train_path_img}/" + filex + '.jpg')
        shutil.copy2(path + filex + '.txt', f"{train_path_label}/" + filex + '.txt')

    print(f"------ Training data created with 80% split {len(files[:train_size])} images -------")

    if neg_path:
        neg_images = list(set([name[:-4] for name in os.listdir(neg_path)]))  ## removing duplicate names i.e. counting only number of images
        for filex in (neg_images):
            shutil.copy2(neg_path + filex + ".jpg", f"{train_path_img}/" + filex + '.jpg')

        print(f"------ Total  {len(neg_images)} negative images added to the training data -------")

        print(f"------ TOTAL Training data created with {len(files[:train_size]) + len(neg_images)} images -------")

    ### copytin images to validation folder
    for filex in (files[train_size:]):
        if filex == 'classes':
            continue
        # print("running")
        shutil.copy2(path + filex + '.jpg', f"{val_path_img}/" + filex + '.jpg')
        shutil.copy2(path + filex + '.txt', f"{val_path_label}/" + filex + '.txt')

    print(f"------ Testing data created with a total of {len(files[train_size:])} images ----------")

    print("------ TASK COMPLETED -------")

## spliting the data into train-test and creating train.txt and test.txt files
# train_test_split('/content/drive/MyDrive/custom_notebooks/yolo_data/')

### for label_tag
train_test_split('billboard/Images_and_Labels/') ### without negative images
# train_test_split('./data/','./negative_images/') ### if you want to feed negative images
# Comment out IProgress lines
   

------ PROCESS STARTED -------
--- This folder has a total number of 3399 images---
------ Training data created with 80% split 2720 images -------
------ Testing data created with a total of 679 images ----------
------ TASK COMPLETED -------


# Import Ultralytics

In [6]:
import ultralytics
from ultralytics import YOLO
ultralytics.checks()

Ultralytics YOLOv8.0.108  Python-3.11.3 torch-2.0.1+cpu CPU
Setup complete  (8 CPUs, 15.8 GB RAM, 560.1/806.4 GB disk)


In [7]:
model = YOLO('yolov8x.pt')


Downloading https:\github.com\ultralytics\assets\releases\download\v0.0.0\yolov8x.pt to yolov8x.pt...
100%|██████████| 131M/131M [00:26<00:00, 5.23MB/s] 


In [8]:
model.train(data = 'yolo_data_billboards/dataset.yaml',epochs = 100, imgsz= 640)

New https://pypi.org/project/ultralytics/8.0.112 available  Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.108  Python-3.11.3 torch-2.0.1+cpu CPU
[34m[1myolo\engine\trainer: [0mtask=detect, mode=train, model=yolov8x.pt, data=yolo_data_billboards/dataset.yaml, epochs=100, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=False, optimizer=SGD, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, for