In [None]:
import torch
print(torch.cuda.is_available())      
print(torch.cuda.device_count())  

### Load dataset

Dataset from: https://captain-whu.github.io/DOTA/dataset.html
Full list of albumations functions in ultralytics: https://docs.ultralytics.com/reference/data/augment/h=albumentation#ultralyticsdataaugmentBaseTransform

`convert_dota_to_yolo_obb` <br>
Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format. The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory.

`split_trainval` <br>
Restructures dataset directory and applies image augmentation to enrich dataset. 
```
data_root/
├── images/
│   ├── train/
│   └── val/
└── labels/
    ├── train_original/
    └── val_original/
```

And the output directory structure is:

```
save_dir/
├── images/
│   ├── train/
│   └── val/
└── labels/
    ├── train/
    └── val/
```

In [None]:
from ultralytics.data.converter import convert_dota_to_yolo_obb
from ultralytics.data.split_dota import split_test, split_trainval

#Run this once to convert labels and save them with images in a new directory
#convert_dota_to_yolo_obb("datasets/DOTAv2.0")

from PIL import Image
Image.MAX_IMAGE_PIXELS = None

# split train and val set, with labels.
split_trainval(
    data_root="datasets/DOTAv2.0/",
    save_dir="datasets/DOTAv2.0/processed/",
    rates=[0.5, 1.0, 1.5],  # Generate different versions or crops of the images
    gap=500, # Split into patches with a specific overlap or gap
)
# # split test set, without labels.
# split_test(
#     data_root="DOTAv1.0/",
#     save_dir="DOTAv1.0-split/",
#     rates=[0.5, 1.0, 1.5],  # multiscale
#     gap=500,
# )

### Inspect dataset

In [None]:
from pathlib import Path

root = Path("datasets/DOTAv2.0")
image_exts = {".jpg", ".jpeg", ".png"}
label_exts = {".txt"}

# Count images in each subdirectory under DOTAv2/images
print("DOTAv2.0")
print("Image counts:")
for subdir in (root / "images").iterdir():
    if subdir.is_dir():
        count = sum(1 for file in subdir.rglob("*") if file.suffix.lower() in image_exts)
        print(f"  {subdir.name}: {count}")

# Count labels in each subdirectory under DOTAv2/labels
print("\nLabel counts:")
for subdir in (root / "labels").iterdir():
    if subdir.is_dir():
        count = sum(1 for file in subdir.rglob("*") if file.suffix.lower() in label_exts)
        print(f"  {subdir.name}: {count}")
        if subdir.name == "val_original":
            print()  # Extra line break after 'val_original'

# Repeat for DOTAv2_processed
root2 = Path("datasets/DOTAv2.0/processed")
print("\nDOTAv2_processed")
print("Image counts:")
for subdir in (root2 / "images").iterdir():
    if subdir.is_dir():
        count = sum(1 for file in subdir.rglob("*") if file.suffix.lower() in image_exts)
        print(f"  {subdir.name}: {count}")

print("\nLabel counts:")
for subdir in (root2 / "labels").iterdir():
    if subdir.is_dir():
        count = sum(1 for file in subdir.rglob("*") if file.suffix.lower() in label_exts)
        print(f"  {subdir.name}: {count}")
        if subdir.name == "val_original":
            print()  # Extra line break after 'val_original'

### Load model

In [None]:
from ultralytics import YOLO

# Load a pretrained model
model = YOLO("models/yolo11x-obb.pt") # n for nano, x for extra-large

### Train model
Includes list of augmentations to apply to each batch before training loop. Doesn't create more images, just augments the existing ones (and labels accordingly) to enrich dataset. 

In [None]:
results = model.train(
    data="datasets/DOTAv2.0/dotav2.yml", # Path to custom dataset YAML
    epochs=100,                          # Total number of training epochs
    imgsz=640,                           # Input image size for training
    patience=15,                         # patience: Number of epochs with no improvement before early stopping
    batch=12,                            # batch: Batch size (use -1 or a fraction for auto GPU memory utilization)
    save=True,                           # save: Save training checkpoints and final model weights
    save_period=-1,                      # save_period: Frequency (in epochs) for saving checkpoints; -1 disables
    cache=False,                         # cache: Enable caching of dataset images (True for RAM, 'disk' for disk caching)
    device='0, 1',                       # device: Specify the computational device (e.g., 0, "cpu", or list of GPUs)
    workers=8,                           # workers: Number of worker threads for data loading
    project="test_yolo",                 # project: Name of the project directory for saving outputs
    name="experiment_1",                 # name: Name of the training run
    exist_ok=False,                      # exist_ok: Allow overwriting an existing project/name directory if True
    pretrained=False,                    # pretrained: Start from a pretrained model or provide a path to one
    optimizer="auto",                    # optimizer: Choice of optimizer (e.g., 'SGD', 'Adam', or 'auto')
    seed=18,                             # seed: Random seed for reproducibility
    single_cls=False,                    # single_cls: Treat all classes as a single class (useful for binary tasks)
    classes=None,                        # classes: List of class IDs to train on; None uses all classes
    rect=False,                          # rect: Enable rectangular training to minimize padding in batches
    multi_scale=True,                    # multi_scale: Enable multi-scale training by varying the imgsz during training
    cos_lr=False,                        # cos_lr: Use a cosine learning rate scheduler
    close_mosaic=10,                     # close_mosaic: Disable mosaic augmentation in the last N epochs
    resume=False,                        # resume: Resume training from the last checkpoint if available
    amp=True,                            # amp: Enable Automatic Mixed Precision training
    #fraction=1.0,                       # fraction: Fraction of the dataset to use for training
    freeze=None,                         # freeze: Freeze the first N layers or specify list of layers to freeze
    lr0=0.01,                            # lr0: Initial learning rate
    lrf=0.001,                           # lrf: Final learning rate as a fraction of lr0 (lr0 * lrf)
    momentum=0.937,                      # momentum: Momentum factor for the optimizer
    weight_decay=0.0005,                 # weight_decay: L2 regularization coefficient to prevent overfitting
    warmup_epochs=3.0,                   # warmup_epochs: Number of epochs for learning rate warmup
    warmup_momentum=0.8,                 # warmup_momentum: Initial momentum value during the warmup phase
    warmup_bias_lr=0.1,                  # warmup_bias_lr: Learning rate for bias parameters during warmup
    box=7.5,                             # box: Weight of the box loss component (bounding box regression)
    cls=0.5,                             # cls: Weight of the classification loss component
    dfl=1.5,                             # dfl: Weight of the distribution focal loss for fine-grained classification
    pose=12.0,                           # pose: Weight of the pose loss for keypoint or pose estimation tasks
    kobj=2.0,                            # kobj: Weight of the keypoint objectness loss in pose estimation models
    nbs=64,                              # nbs: Nominal batch size for loss normalization
    overlap_mask=True,                   # overlap_mask: Merge overlapping masks into a single mask if True
    mask_ratio=4,                        # mask_ratio: Downsample ratio for segmentation masks
    dropout=0.0,                         # dropout: Dropout rate for regularization
    val=True,                            # val: Enable validation during training
    plots=True,                          # plots: Generate and save plots of training and validation metrics
    hsv_h=0.015,  # Adjusts the hue of the image by a fraction of the color wheel
    hsv_s=0.7,    # Alters the saturation for varied color intensity
    hsv_v=0.4,    # Modifies brightness to simulate different lighting conditions
    degrees=0.0,  # Rotates the image 0° (no rotation); adjust between 0 and 180 for random rotation
    translate=0.1,  # Translates the image by 10% of its size
    scale=0.5,    # Scales the image, simulating objects at different distances
    shear=0.0,    # Shears the image 0° (no shearing); modify between -180 and 180 for effect
    perspective=0.0,  # Applies no perspective transformation; use values up to 0.001 if needed
    flipud=0.0,   # Probability (0%) of flipping the image upside down
    fliplr=0.5,   # 50% chance to horizontally flip the image
    bgr=0.0,      # No conversion from RGB to BGR (0% chance)
    mosaic=1.0,   # Enables mosaic augmentation by merging four images into one
    mixup=0.0,    # Disables mixup augmentation (blends two images)
    cutmix=0.0,   # Disables cutmix augmentation (combines portions of two images)
    copy_paste=0.0,  # Disables copy-paste augmentation for segmentation
    copy_paste_mode="flip",  # Use 'flip' strategy if copy-paste is enabled
    auto_augment="randaugment",  # Applies RandAugment, a random augmentation policy for classification
    erasing=0.4    # Randomly erases regions (40% probability) to encourage robust feature extraction
)

### Validated Model

In [7]:
from ultralytics import YOLO

model = YOLO("../models/DOTA.pt") # n for nano, x for huge
metrics = model.val(data="../datasets/DOTAv2/DOTAv2.yml")  # no arguments needed, dataset and settings remembered
metrics.box.map  # map50-95
metrics.box.map50  # map50
metrics.box.map75  # map75
metrics.box.maps  # a list contains map50-95 of each category



Ultralytics 8.3.127 🚀 Python-3.10.17 torch-2.1.2+cu118 CUDA:0 (NVIDIA RTX 6000 Ada Generation, 48639MiB)
YOLO11x-obb summary (fused): 199 layers, 58,756,393 parameters, 0 gradients, 202.8 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 532.2±115.9 MB/s, size: 351.9 KB)


[34m[1mval: [0mScanning /home/jupyter-dai7591/yolo/datasets/DOTAv2/processed/labels/val.cache... 7122 images, 2795 backgrounds, 0 corrupt: 100%|██████████| 7122/7122 [00:00<?, ?it/s]
  from .autonotebook import tqdm as notebook_tqdm
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 446/446 [01:10<00:00,  6.31it/s]


                   all       7122     148987      0.747      0.634      0.671      0.509
                 plane        758       6063      0.954      0.862      0.912      0.777
                  ship       1073      28315       0.94      0.781      0.859      0.669
          storage tank        345       5426      0.918      0.658      0.811      0.646
      baseball diamond        265        516      0.739      0.754      0.774      0.578
          tennis court        261       1662      0.943       0.92      0.951       0.91
      basketball court        138        358      0.806      0.648      0.718      0.636
    ground track field        353        417      0.671        0.7      0.678       0.53
                harbor        808       5719      0.888      0.699      0.794      0.521
                bridge        529       1045      0.705      0.481      0.553      0.335
         large vehicle        836      11317      0.837      0.739      0.802      0.629
         small vehicl

array([    0.77656,     0.66861,     0.64594,     0.57847,     0.90987,     0.63585,     0.52961,     0.52069,     0.33538,     0.62893,     0.43006,     0.44453,     0.52263,     0.43511,     0.48449,   0.0012233,     0.25676,     0.36031])