Install all required packages and libraries

In [None]:
!pip install ultralytics torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install opencv-python matplotlib seaborn pandas numpy Pillow
!pip install roboflow supervision
!pip install colorama

Get YOLO detector

In [None]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt
%cd ..

Load datasets

In [None]:
import os

from pathlib import Path

from google.colab import drive

drive.mount('/content/drive')

SHARED_PATH = Path("drive/MyDrive/Colab Notebooks/Shared")
HRSC_PATH = SHARED_PATH / "HRSC2016_Final_Splits"
DOTA_PATH = SHARED_PATH / "DOTA_Final_Splits"

print("HRSC important subfolders:", [f"{subfolder.name}/{path.name}" for subfolder in HRSC_PATH.iterdir() for path in subfolder.iterdir()])
print("DOTA important subfolders:", [f"{subfolder.name}/{path.name}" for subfolder in DOTA_PATH.iterdir() for path in subfolder.iterdir()])

HRSC_TRAIN_IMAGES = HRSC_PATH/ "train/images"
HRSC_TRAIN_ANNOTATIONS = HRSC_PATH / "train/annotations"
HRSC_VAL_IMAGES = HRSC_PATH / "val/images"
HRSC_VAL_ANNOTATIONS = HRSC_PATH / "val/annotations"
HRSC_TEST_IMAGES = HRSC_PATH / "test/images"
HRSC_TEST_ANNOTATIONS = HRSC_PATH / "test/annotations"

DOTA_TRAIN_IMAGES = DOTA_PATH / "train/images"
DOTA_TRAIN_ANNOTATIONS = DOTA_PATH / "train/hbb"
DOTA_VAL_IMAGES = DOTA_PATH / "val/images"
DOTA_VAL_ANNOTATIONS = DOTA_PATH / "val/hbb"
DOTA_TEST_IMAGES = DOTA_PATH / "test/images"
DOTA_TEST_ANNOTATIONS = DOTA_PATH / "test/hbb"

Explore datasets

In [None]:
from colorama import Fore, Style

def explore_header(dataset: str, subfolder: str):
  print("Exploring", Fore.GREEN + dataset + Style.RESET_ALL, Fore.MAGENTA + subfolder + Style.RESET_ALL, "folder...")

def explore(images_folder: Path, ext: str):
  files = list(images_folder.glob(f"*.{ext}"))
  print(f"Number of {ext.upper()} files:", len(files))
  print(f"{ext.upper()} sample files:", [path.name for path in files[:3]])

explore_header("HRSC", "train")
explore(HRSC_TRAIN_IMAGES, "bmp")
explore(HRSC_TRAIN_ANNOTATIONS, "xml")

print()

explore_header("HRSC", "val")
explore(HRSC_VAL_IMAGES, "bmp")
explore(HRSC_VAL_ANNOTATIONS, "xml")

print()

explore_header("HRSC", "test")
explore(HRSC_TEST_IMAGES, "bmp")
explore(HRSC_TEST_ANNOTATIONS, "xml")

print()

explore_header("DOTA", "train")
explore(DOTA_TRAIN_IMAGES, "png")
explore(DOTA_TRAIN_ANNOTATIONS, "txt")

print()

explore_header("DOTA", "val")
explore(DOTA_VAL_IMAGES, "png")
explore(DOTA_VAL_ANNOTATIONS, "txt")

print()

explore_header("DOTA", "test")
explore(DOTA_TEST_IMAGES, "png")
explore(DOTA_TEST_ANNOTATIONS, "txt")


Match images with annotations

In [None]:
class Dataset:
  def __init__(self, images_folder: Path, image_ext: str, annotations_folder: Path, annotation_ext: str):
    self.images_folder = images_folder
    self.annotations_folder = annotations_folder
    self.images = list(images_folder.glob(image_ext))
    self.annotations = list(annotations_folder.glob(annotation_ext))

    image_ids = set(path.stem for path in self.images)
    annotation_ids = set(path.stem for path in self.annotations)
    self.ids = set(image_ids) & set(annotation_ids)
    self.images = [path for path in self.images if path.stem in self.ids]
    self.annotations = [path for path in self.annotations if path.stem in self.ids]

HRSC_TRAIN_DATASET = Dataset(HRSC_TRAIN_IMAGES, "*.bmp", HRSC_TRAIN_ANNOTATIONS, "*.xml")
print("HRSC train dataset sample:", list(HRSC_TRAIN_DATASET.ids)[:5])
HRSC_VAL_DATASET = Dataset(HRSC_VAL_IMAGES, "*.bmp", HRSC_VAL_ANNOTATIONS, "*.xml")
print("HRSC val dataset sample:", list(HRSC_VAL_DATASET.ids)[:5])
HRSC_TEST_DATASET = Dataset(HRSC_TEST_IMAGES, "*.bmp", HRSC_TEST_ANNOTATIONS, "*.xml")
print("HRSC test dataset sample:", list(HRSC_TEST_DATASET.ids)[:5])

DOTA_TRAIN_DATASET = Dataset(DOTA_TRAIN_IMAGES, "*.png", DOTA_TRAIN_ANNOTATIONS, "*.txt")
print("DOTA train dataset sample:", list(DOTA_TRAIN_DATASET.ids)[:5])
DOTA_VAL_DATASET = Dataset(DOTA_VAL_IMAGES, "*.png", DOTA_VAL_ANNOTATIONS, "*.txt")
print("DOTA val dataset sample:", list(DOTA_VAL_DATASET.ids)[:5])
DOTA_TEST_DATASET = Dataset(DOTA_TEST_IMAGES, "*.png", DOTA_TEST_ANNOTATIONS, "*.txt")
print("DOTA test dataset sample:", list(DOTA_TEST_DATASET.ids)[:5])


Populate Yolo datasets

In [None]:
YOLO_HRSC_PATH = Path("yolo") / "HRSC"
YOLO_DOTA_PATH = Path("yolo") / "DOTA"

# YOLO expects this file structure:
# ...dataset/
# ├── images/
# │   ├── train/
# │   ├── val/
# │   └── test/
# └── labels/
#     ├── train/
#     ├── val/
#     └── test/

YOLO_HRSC_TRAIN_IMAGES = YOLO_HRSC_PATH / "images/train"
YOLO_HRSC_TRAIN_LABELS = YOLO_HRSC_PATH / "labels/train"
YOLO_HRSC_VAL_IMAGES = YOLO_HRSC_PATH / "images/val"
YOLO_HRSC_VAL_LABELS = YOLO_HRSC_PATH / "labels/val"
YOLO_HRSC_TEST_IMAGES = YOLO_HRSC_PATH / "images/test"
YOLO_HRSC_TEST_LABELS = YOLO_HRSC_PATH / "labels/test"

YOLO_DOTA_TRAIN_IMAGES = YOLO_DOTA_PATH / "images/train"
YOLO_DOTA_TRAIN_LABELS = YOLO_DOTA_PATH / "labels/train"
YOLO_DOTA_VAL_IMAGES = YOLO_DOTA_PATH / "images/val"
YOLO_DOTA_VAL_LABELS = YOLO_DOTA_PATH / "labels/val"
YOLO_DOTA_TEST_IMAGES = YOLO_DOTA_PATH / "images/test"
YOLO_DOTA_TEST_LABELS = YOLO_DOTA_PATH / "labels/test"

for p in [
  YOLO_HRSC_TRAIN_IMAGES, YOLO_HRSC_TRAIN_LABELS,
  YOLO_HRSC_VAL_IMAGES, YOLO_HRSC_VAL_LABELS,
  YOLO_HRSC_TEST_IMAGES, YOLO_HRSC_TEST_LABELS,
  YOLO_DOTA_TRAIN_IMAGES, YOLO_DOTA_TRAIN_LABELS,
  YOLO_DOTA_VAL_IMAGES, YOLO_DOTA_VAL_LABELS,
  YOLO_DOTA_TEST_IMAGES, YOLO_DOTA_TEST_LABELS,
]:
  p.mkdir(parents=True, exist_ok=True)

import itertools

def rsync_dataset(dataset: Dataset, images_folder: Path, labels_folder: Path):
  os.system(f'rsync -av --progress "{dataset.images_folder}/" "{images_folder}/"')
  os.system(f'rsync -av --progress "{dataset.annotations_folder}/" "{labels_folder}/"')

  for f in itertools.chain(images_folder.iterdir(), labels_folder.iterdir()):
    if f.stem not in dataset.ids:
      f.unlink()

print("Copying HRCS training dataset...")
rsync_dataset(HRSC_TRAIN_DATASET, YOLO_HRSC_TRAIN_IMAGES, YOLO_HRSC_TRAIN_LABELS)
print("...Copied!")
print("Copying HRCS validation dataset...")
rsync_dataset(HRSC_VAL_DATASET, YOLO_HRSC_VAL_IMAGES, YOLO_HRSC_VAL_LABELS)
print("...Copied!")
print("Copying HRCS testing dataset...")
rsync_dataset(HRSC_TEST_DATASET, YOLO_HRSC_TEST_IMAGES, YOLO_HRSC_TEST_LABELS)
print("...Copied!")
print("Copying DOTA training dataset...")
rsync_dataset(DOTA_TRAIN_DATASET, YOLO_DOTA_TRAIN_IMAGES, YOLO_DOTA_TRAIN_LABELS)
print("...Copied!")
print("Copying DOTA validation dataset...")
rsync_dataset(DOTA_VAL_DATASET, YOLO_DOTA_VAL_IMAGES, YOLO_DOTA_VAL_LABELS)
print("...Copied!")
print("Copying DOTA testing dataset...")
rsync_dataset(DOTA_TEST_DATASET, YOLO_DOTA_TEST_IMAGES, YOLO_DOTA_TEST_LABELS)
print("...Copied!")

Convert HRSC annotations to YOLO format

In [None]:
import xml.etree.ElementTree as ET

HRSC_CLASSES = {}

def convert_hrsc_annotation_to_yolo_format(label_file: Path) -> int:
  try:
    tree = ET.parse(label_file.as_posix())
    root = tree.getroot()

    # Get image dimensions
    width = int(root.find('.//Img_SizeWidth').text)
    height = int(root.find('.//Img_SizeHeight').text)

    yolo_lines = []

    # Find all objects
    objects = root.findall('.//HRSC_Object')
    for obj in objects:
      try:
        # Get bounding box coordinates
        xmin = float(obj.find('box_xmin').text)
        ymin = float(obj.find('box_ymin').text)
        xmax = float(obj.find('box_xmax').text)
        ymax = float(obj.find('box_ymax').text)

        # Convert to YOLO format (normalized)
        x_center = (xmin + xmax) / 2 / width
        y_center = (ymin + ymax) / 2 / height
        bbox_width = (xmax - xmin) / width
        bbox_height = (ymax - ymin) / height

        # Class ID
        class_id = int(obj.find('Class_ID').text)
        if class_id not in HRSC_CLASSES:
          HRSC_CLASSES[class_id] = len(HRSC_CLASSES)
        class_id_index = HRSC_CLASSES[class_id]

        yolo_lines.append(f"{class_id_index} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}")

      except Exception as e:
        print(f"Warning: Could not parse object in {label_file.as_posix()}: {e}")
        continue

    # Write YOLO format file
    with open(label_file, 'w') as f:
      f.write('\n'.join(yolo_lines))
    renamed_path = label_file.with_suffix('.txt')
    if renamed_path.exists():
      renamed_path.unlink()
    label_file.rename(renamed_path)

    return len(yolo_lines)

  except Exception as e:
    print(f"Error processing {label_file.as_posix()}: {e}")
    return 0


print("Converting training annotations...")
total_objects = 0
for label_file in YOLO_HRSC_TRAIN_LABELS.iterdir():
  objects_count = convert_hrsc_annotation_to_yolo_format(label_file)
  total_objects += objects_count
print(f"...Conversion complete: {total_objects} total objects")

print("Converting validation annotations...")
total_objects = 0
for label_file in YOLO_HRSC_VAL_LABELS.iterdir():
  objects_count = convert_hrsc_annotation_to_yolo_format(label_file)
  total_objects += objects_count
print(f"...Conversion complete: {total_objects} total objects")

print("Converting testing annotations...")
total_objects = 0
for label_file in YOLO_HRSC_TEST_LABELS.iterdir():
  objects_count = convert_hrsc_annotation_to_yolo_format(label_file)
  total_objects += objects_count
print(f"...Conversion complete: {total_objects} total objects")

Convert DOTA annotations to YOLO format

In [None]:
import itertools

from PIL import Image

DOTA_CLASSES = {}

def convert_dota_annotation_to_yolo_format(label_file: Path) -> int:
  try:
    # Get image dimensions
    img_path = label_file.parents[2] / "images" / label_file.relative_to(label_file.parents[1]).with_suffix('.png')
    with Image.open(img_path) as img:
      width, height = img.size

    yolo_lines = []

    # Find all objects
    for line in itertools.islice(label_file.read_text().splitlines(), 2, None):  # Start from line index 2
      try:
        obj = line.strip().split()
        x1, y1, x2, y2, x3, y3, x4, y4, category, difficulty = (*map(float, obj[:8]), *obj[8:])

        # Get bounding box coordinates
        xmin = min(x1, x2, x3, x4)
        ymin = min(y1, y2, y3, y4)
        xmax = max(x1, x2, x3, x4)
        ymax = max(y1, y2, y3, y4)

        # Convert to YOLO format (normalized)
        x_center = (xmin + xmax) / 2 / width
        y_center = (ymin + ymax) / 2 / height
        bbox_width = (xmax - xmin) / width
        bbox_height = (ymax - ymin) / height

        # Class ID
        if category not in DOTA_CLASSES:
          DOTA_CLASSES[category] = len(DOTA_CLASSES)
        class_id_index = DOTA_CLASSES[category]

        yolo_lines.append(f"{class_id_index} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}")

      except Exception as e:
        print(f"Warning: Could not parse object in {label_file.as_posix()}: {e}")
        continue

    # Write YOLO format file
    with open(label_file, 'w') as f:
      f.write('\n'.join(yolo_lines))

    return len(yolo_lines)

  except Exception as e:
    print(f"Error processing {label_file.as_posix()}: {e}")
    return 0


print("Converting training annotations...")
total_objects = 0
for label_file in YOLO_DOTA_TRAIN_LABELS.iterdir():
  objects_count = convert_dota_annotation_to_yolo_format(label_file)
  total_objects += objects_count
print(f"...Conversion complete: {total_objects} total objects")

print("Converting validation annotations...")
total_objects = 0
for label_file in YOLO_DOTA_VAL_LABELS.iterdir():
  objects_count = convert_dota_annotation_to_yolo_format(label_file)
  total_objects += objects_count
print(f"...Conversion complete: {total_objects} total objects")

print("Converting testing annotations...")
total_objects = 0
for label_file in YOLO_DOTA_TEST_LABELS.iterdir():
  objects_count = convert_dota_annotation_to_yolo_format(label_file)
  total_objects += objects_count
print(f"...Conversion complete: {total_objects} total objects")

Write the YOLO configuration file for HRSC dataset

In [None]:
import os

# Output directory
YOLO_HRSC_OUTPUT_DIR = Path('yolo/HRSC')
YOLO_HRSC_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

HRSC_CLASS_NAMES = [id for id, _ in sorted(HRSC_CLASSES.items(), key=lambda item: item[1])]  # Sort id by index

yaml_content = f"""
path: {YOLO_HRSC_OUTPUT_DIR}
train: {YOLO_HRSC_TRAIN_IMAGES.relative_to(YOLO_HRSC_OUTPUT_DIR)}
val: {YOLO_HRSC_VAL_IMAGES.relative_to(YOLO_HRSC_OUTPUT_DIR)}
test: {YOLO_HRSC_TEST_IMAGES.relative_to(YOLO_HRSC_OUTPUT_DIR)}
nc: {len(HRSC_CLASS_NAMES)}
names: {HRSC_CLASS_NAMES}
"""

# Write the YAML file
YOLO_HRSC_YAML_PATH = YOLO_HRSC_OUTPUT_DIR / 'HRSC.yaml'
with open(YOLO_HRSC_YAML_PATH, 'w') as f:
    f.write(yaml_content)

print("YAML configuration created:", YOLO_HRSC_YAML_PATH.as_posix())
print("YAML file content:\n")
print(YOLO_HRSC_YAML_PATH.read_text())

Write the YOLO configuration file for DOTA dataset

In [None]:
import os

# Output directory
YOLO_DOTA_OUTPUT_DIR = Path('yolo/DOTA')
YOLO_DOTA_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

DOTA_CLASS_NAMES = [id for id, _ in sorted(DOTA_CLASSES.items(), key=lambda item: item[1])]  # Sort id by index

yaml_content = f"""
path: {YOLO_DOTA_OUTPUT_DIR}
train: {YOLO_DOTA_TRAIN_IMAGES.relative_to(YOLO_DOTA_OUTPUT_DIR)}
val: {YOLO_DOTA_VAL_IMAGES.relative_to(YOLO_DOTA_OUTPUT_DIR)}
test: {YOLO_DOTA_TEST_IMAGES.relative_to(YOLO_DOTA_OUTPUT_DIR)}
nc: {len(DOTA_CLASS_NAMES)}
names: {DOTA_CLASS_NAMES}
"""

# Write the YAML file
YOLO_DOTA_YAML_PATH = YOLO_DOTA_OUTPUT_DIR / 'DOTA.yaml'
with open(YOLO_DOTA_YAML_PATH, 'w') as f:
    f.write(yaml_content)

print("YAML configuration created:", YOLO_DOTA_YAML_PATH.as_posix())
print("YAML file content:\n")
print(YOLO_DOTA_YAML_PATH.read_text())

Start baseline YOLOv5 training (HRSC)

In [None]:
from ultralytics import YOLO

# Load pretrained model
model = YOLO('yolov5s.pt')
print("✅ YOLOv5 model loaded")

# Start training
results = model.train(
  data=YOLO_HRSC_YAML_PATH.as_posix(),
  epochs=50,
  imgsz=640,
  batch=16,
  patience=10,
  save=True,
  verbose=True,
  project='baseline_results',
  name='yolov5s_hrsc2016_baseline'
)

print("✅ Baseline training started successfully!")

Start baseline YOLOv5 training (DOTA)

In [None]:
from ultralytics import YOLO

# Load pretrained model
model = YOLO('yolov5s.pt')
print("✅ YOLOv5 model loaded")

# Start training
results = model.train(
  data=YOLO_DOTA_YAML_PATH.as_posix(),
  epochs=50,
  imgsz=640,
  batch=16,
  patience=10,
  save=True,
  verbose=True,
  project='baseline_results',
  name='yolov5s_dota_baseline'
)

print("✅ Baseline training started successfully!")