# Finetuning

The purpose of this script is to finetune yolo to our custom drone dataset.

This involves the following steps:
- create folder structure for data
- split train/val/test
- finetune and visualize training
- test

In [1]:
# dependencies

!git clone https://github.com/ultralytics/yolov5  # clone repository
!cd yolov5
!pwd
!pip install -r yolov5/requirements.txt  # install dependencies

fatal: destination path 'yolov5' already exists and is not an empty directory.


[38;5;57m[1m⚡️ Tip[0m	Check organization access: [4mhttps://github.com/settings/connections/applications/c7457225b242a94d60c6[0m

/teamspace/studios/this_studio


In [2]:
# imports

import os
import torch

In [3]:
# download yolov5s model

# path to your custom model
model_path = "yolov5s.pt" 

# load custom model
model = torch.hub.load(
    "ultralytics/yolov5", 
    "custom", 
    path=model_path, 
    force_reload=True
)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /home/zeus/.cache/torch/hub/master.zip
YOLOv5 🚀 2024-9-10 Python-3.10.10 torch-2.2.1+cu121 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [4]:
# split into train/val/test images and corresponding labels from dataset/raw/ and put them into dataset/finetune/images/train, dataset/finetune/images/val, and dataset/finetune/images/test

import os
import shutil
import random
from pathlib import Path

# Define paths
raw_image_dir = Path('dataset/raw/images')
raw_label_dir = Path('dataset/raw/labels')

train_image_dir = Path('dataset/finetune/images/train')
val_image_dir = Path('dataset/finetune/images/val')
test_image_dir = Path('dataset/finetune/images/test')

train_label_dir = Path('dataset/finetune/labels/train')
val_label_dir = Path('dataset/finetune/labels/val')
test_label_dir = Path('dataset/finetune/labels/test')

# Create directories if they don't exist
for directory in [train_image_dir, val_image_dir, test_image_dir, train_label_dir, val_label_dir, test_label_dir]:
    directory.mkdir(parents=True, exist_ok=True)

# Get list of all images
image_files = list(raw_image_dir.glob('*.png'))  # Assuming images are .jpg, adjust extension if needed
random.shuffle(image_files)  # Shuffle to randomize

# Split percentages
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1

# Calculate split indices
total_images = len(image_files)
train_count = int(train_ratio * total_images)
val_count = int(val_ratio * total_images)

train_files = image_files[:train_count]
val_files = image_files[train_count:train_count + val_count]
test_files = image_files[train_count + val_count:]

# Function to move files
def move_files(image_list, image_dest, label_dest):
    for image_file in image_list:
        # Corresponding label file (assumes same name but with .txt extension)
        label_file = raw_label_dir / f"{image_file.stem}.txt"

        # Move the image
        shutil.copy(image_file, image_dest / image_file.name)

        # Move the label if it exists
        if label_file.exists():
            shutil.copy(label_file, label_dest / label_file.name)

# Move the files to the respective directories
move_files(train_files, train_image_dir, train_label_dir)
move_files(val_files, val_image_dir, val_label_dir)
move_files(test_files, test_image_dir, test_label_dir)

print(f"Dataset split into {len(train_files)} train, {len(val_files)} val, and {len(test_files)} test files.")

Dataset split into 219 train, 27 val, and 28 test files.


In [5]:
# Inference on images

base_path = "dataset/finetune/images/train"

# Get all image paths
imgs_paths = [os.path.join(base_path, img) for img in os.listdir(base_path) if img.endswith(".png")]

# sample
full_paths = imgs_paths[:10]

print(full_paths)

['dataset/finetune/images/train/frame000681.png', 'dataset/finetune/images/train/frame000395.png', 'dataset/finetune/images/train/frame000341.png', 'dataset/finetune/images/train/frame000987.png', 'dataset/finetune/images/train/frame000991.png', 'dataset/finetune/images/train/frame000149.png', 'dataset/finetune/images/train/frame001038.png', 'dataset/finetune/images/train/frame000676.png', 'dataset/finetune/images/train/frame000414.png', 'dataset/finetune/images/train/frame000404.png']


In [6]:
# inference trial before fine-tuning

for img in full_paths:
    # Run inference
    try:
        print(f"Processed image {img}")
        results = model(img)
    except Exception as e:
        print(f"Failed to process image {img}")
        print(e)

    # Display results
    print(results)

Processed image dataset/finetune/images/train/frame000681.png




image 1/1: 720x1280 3 persons, 1 car, 1 chair
Speed: 40.6ms pre-process, 73.8ms inference, 611.8ms NMS per image at shape (1, 3, 384, 640)
Processed image dataset/finetune/images/train/frame000395.png
image 1/1: 720x1280 5 persons
Speed: 28.3ms pre-process, 5.7ms inference, 1.2ms NMS per image at shape (1, 3, 384, 640)
Processed image dataset/finetune/images/train/frame000341.png
image 1/1: 720x1280 2 persons
Speed: 25.8ms pre-process, 5.9ms inference, 1.5ms NMS per image at shape (1, 3, 384, 640)
Processed image dataset/finetune/images/train/frame000987.png
image 1/1: 720x1280 1 person
Speed: 28.1ms pre-process, 6.2ms inference, 1.4ms NMS per image at shape (1, 3, 384, 640)
Processed image dataset/finetune/images/train/frame000991.png
image 1/1: 720x1280 (no detections)
Speed: 27.3ms pre-process, 8.0ms inference, 0.8ms NMS per image at shape (1, 3, 384, 640)
Processed image dataset/finetune/images/train/frame000149.png
image 1/1: 720x1280 3 persons
Speed: 23.4ms pre-process, 7.5ms inf

In [8]:
# finetuning

!pip install clearml

from clearml import Task

# Set ClearML environment variables
os.environ['CLEARML_WEB_HOST'] = 'https://app.clear.ml/'
os.environ['CLEARML_API_HOST'] = 'https://api.clear.ml'
os.environ['CLEARML_FILES_HOST'] = 'https://files.clear.ml'
os.environ['CLEARML_API_ACCESS_KEY'] = 'L94OV6F8DHTZT05E87M43SCIQBV870'
os.environ['CLEARML_API_SECRET_KEY'] = '7YZ_V87ju3V1RYJ9BQvrfruCAYrvEV65D-sRHR-eC7b-TJ0bnb5F2ckq9Al-iPEUgXI'

# Initialize ClearML task for finetuning YOLO on drones
task = Task.init(project_name='YOLOv5 Drone Detection', task_name='Finetuning YOLOv5 for Drones')

# finetune
!python yolov5/train.py --img 640 --batch 32 --epochs 150 --data yolov5/dataset.yaml --weights yolov5s.pt --cache

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=yolov5/dataset.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=150, batch_size=32, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=yolov5/data/hyps, resume_evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest, ndjson_console=False, ndjson_file=False
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-365-g12b577c8 Python-3.10.10 torch-2.2.1+cu121 CUDA:0 (Tesla T4, 15102MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_m

In [9]:
# test: check results on yolov5/runs/detect 

!python yolov5/detect.py --weights yolov5/runs/train/exp/weights/best.pt --img 640 --conf 0.25 --source dataset/finetune/images/test

[34m[1mdetect: [0mweights=['yolov5/runs/train/exp/weights/best.pt'], source=dataset/finetune/images/test, data=yolov5/data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_format=0, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=yolov5/runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-365-g12b577c8 Python-3.10.10 torch-2.2.1+cu121 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
image 1/28 /teamspace/studios/this_studio/dataset/finetune/images/test/frame000008.png: 384x640 (no detections), 46.0ms
image 2/28 /teamspace/studios/this_studio/dataset/finetune/images/test/frame000011.png: 384x640 (no detections), 5.6ms
image 3/28 /teamspace/studios/th