# YOLOv8 Fine-Tuning for Self-Driving Cars

## Description
This Notebook contains the pre-processing, training and monitoring code pertinent to the experiment.


In [None]:
%pip install comet_ml
%pip install --upgrade ultralytics


In [None]:
import os
import random
import shutil
import comet_ml
import ultralytics

from google.colab import drive
from comet_ml import Experiment
from ultralytics import YOLO

ultralytics.checks()

Ultralytics YOLOv8.0.146 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)
Setup complete ✅ (4 CPUs, 25.5 GB RAM, 24.5/166.8 GB disk)


In [None]:
drive.mount('/content/drive')

!cp 'drive/MyDrive/colab_resources/Self Driving Car.v3-fixed-small.yolov8.zip' /content
!unzip 'Self Driving Car.v3-fixed-small.yolov8.zip'
!mv export dataset


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 extracting: export/labels/1478900100165707353_jpg.rf.02dae94790fe0a5fd8145e6582060381.txt  
 ... 
  inflating: data.yaml               


In [None]:
!nvidia-smi

Tue Aug  1 20:48:10 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    23W / 300W |      2MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
def copy_files(files, dataset_dir, output_dir):
    """
    Copies image and label files from the dataset directory to the output directory.

    Parameters:
    files (list): List of image file names.
    dataset_dir (str): Path to the dataset directory.
    output_dir (str): Path to the output directory.
    """
    for file in files:
        filename = os.path.splitext(file)[0]
        image_src = os.path.join(dataset_dir, 'images', file)
        label_src = os.path.join(dataset_dir, 'labels', f'{filename}.txt')

        # Create subdirectories for images and labels in the output directory
        image_dir = os.path.join(output_dir, 'images')
        label_dir = os.path.join(output_dir, 'labels')
        os.makedirs(image_dir, exist_ok=True)
        os.makedirs(label_dir, exist_ok=True)

        # Set the destination paths for the image and label files
        image_dst = os.path.join(image_dir, file)
        label_dst = os.path.join(label_dir, f'{filename}.txt')

        # Copy the image and label files to the destination paths
        shutil.copyfile(image_src, image_dst)
        shutil.copyfile(label_src, label_dst)

def split_dataset(dataset_dir, output_dir, train_ratio=0.7, valid_ratio=0.2, test_ratio=0.1):
    """
    Splits the dataset into training, validation, and test sets according to the specified ratios.
    The image and label files for each set are then copied to their respective directories.

    Parameters:
    dataset_dir (str): Path to the dataset directory.
    output_dir (str): Path to the output directory.
    train_ratio (float, optional): Proportion of the dataset to be used for training. Default is 0.7.
    valid_ratio (float, optional): Proportion of the dataset to be used for validation. Default is 0.2.
    test_ratio (float, optional): Proportion of the dataset to be used for testing. Default is 0.1.
    """
    print("Starting to split dataset.")

    # Create output directories
    train_dir = os.path.join(output_dir, 'train')
    valid_dir = os.path.join(output_dir, 'valid')
    test_dir = os.path.join(output_dir, 'test')
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(valid_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Get the list of image files in the dataset directory
    image_files = [f for f in os.listdir(os.path.join(dataset_dir, 'images')) if f.endswith('.jpg')]
    print(f"Found {len(image_files)} image files in the dataset directory.")

    # Shuffle the image files randomly
    random.shuffle(image_files)

    # Calculate the number of files for each set
    num_files = len(image_files)
    num_test = int(test_ratio * num_files)
    num_valid = int(valid_ratio * num_files)
    num_train = num_files - num_test - num_valid

    print(f"Number of files for train: {num_train}, validation: {num_valid}, test: {num_test}.")

    # Split the image files into train, valid, and test sets
    train_files = image_files[:num_train]
    valid_files = image_files[num_train:num_train+num_valid]
    test_files = image_files[num_train+num_valid:]

    # Copy image and label files to the respective directories
    print("Starting to copy files to train, validation, and test directories.")
    copy_files(train_files, dataset_dir, train_dir)
    copy_files(valid_files, dataset_dir, valid_dir)
    copy_files(test_files, dataset_dir, test_dir)

    print('Dataset split completed successfully.')

In [None]:
split_dataset('dataset','processed_dataset')

Starting to split dataset.
Found 29800 image files in the dataset directory.
Number of files for train: 20860, validation: 5960, test: 2980.
Starting to copy files to train, validation, and test directories.
Dataset split completed successfully.


In [None]:
comet_ml.init()

# Create an experiment with your api key
experiment = Experiment(
    api_key="JJj1pW85Rqkf31PIu6I8XdNOi",
    project_name="yolov8-finetuning",
    workspace="syphinx",
)

# Retrieve pre-trained yolov8 object detection model
model = YOLO('yolov8n.pt')


[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/content' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/syphinx/yolov8-finetuning/626748230dcb4cfb96bbb775dc76edc0

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...
100%|██████████| 6.23M/6.23M [00:00<00:00, 52.7MB/s]


In [None]:
# Train YOLOv8n on my ds for  epochs=100 and batchsize=16
model.train(data='data.yaml', epochs=100, patience=30, imgsz=512, save_period=10, device=0)

Ultralytics YOLOv8.0.146 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=data.yaml, epochs=100, patience=30, batch=16, imgsz=512, save=True, save_period=10, cache=False, device=0, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dynam

In [3]:
from ultralytics.utils.benchmarks import benchmark


benchmark(model='best.pt', data='coco8.yaml', imgsz=640, half=False, device=0)


Ultralytics YOLOv8.0.147 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 27.9/166.8 GB disk)

Benchmarks complete for best.pt on coco8.yaml at imgsz=640 (307.41s)
                   Format Status❔  Size (MB)  metrics/mAP50-95(B)  Inference time (ms/im)
0                 PyTorch       ✅        5.9                  0.0                   38.96
1             TorchScript       ✅       11.9                  0.0                    7.25
2                    ONNX       ✅       11.7                  0.0                  148.82
3                OpenVINO       ❌        0.0                  NaN                     NaN
4                TensorRT       ✅       14.7                  0.0                    6.15
5                  CoreML       ❌        0.0                  NaN                     NaN
6   TensorFlow SavedModel       ✅       29.3                  0.0                   74.68
7     TensorFlow GraphDef       ✅       11.7                  0

Unnamed: 0,Format,Status❔,Size (MB),metrics/mAP50-95(B),Inference time (ms/im)
0,PyTorch,✅,5.9,0.0,38.96
1,TorchScript,✅,11.9,0.0,7.25
2,ONNX,✅,11.7,0.0,148.82
3,OpenVINO,❌,0.0,,
4,TensorRT,✅,14.7,0.0,6.15
5,CoreML,❌,0.0,,
6,TensorFlow SavedModel,✅,29.3,0.0,74.68
7,TensorFlow GraphDef,✅,11.7,0.0,132.94
8,TensorFlow Lite,❌,0.0,,
9,TensorFlow Edge TPU,❌,0.0,,
