In [None]:

# STEP 1: Mount Google Drive for Persistence
from google.colab import drive
drive.mount('/content/drive')

# Create project directory structure
import os
project_root = '/content/drive/MyDrive/satellite_detection'
dirs = ['checkpoints', 'results', 'datasets', 'predictions', 'backups']
for d in dirs:
    os.makedirs(os.path.join(project_root, d), exist_ok=True)

print("Google Drive mounted and directories created")


# STEP 2: Install Dependencies

!pip install -q ultralytics opencv-python-headless matplotlib tensorboard

# Verify installation
import ultralytics
ultralytics.checks()


# STEP 3: Check GPU Availability

import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


In [None]:

# Dataset Setup for DOTA

from ultralytics import YOLO

# For quick testing: DOTA8 (8-image subset)
# For full training: DOTAv1, DOTAv2, or xView

# DOTA classes (15 categories)
DOTA_CLASSES = [
    'plane', 'ship', 'storage-tank', 'baseball-diamond',
    'tennis-court', 'basketball-court', 'ground-track-field',
    'harbor', 'bridge', 'large-vehicle', 'small-vehicle',
    'helicopter', 'roundabout', 'soccer-ball-field', 'swimming-pool'
]

print(f"Dataset classes ({len(DOTA_CLASSES)}): {DOTA_CLASSES}")

# Dataset will auto-download on first training run
# For custom dataset, follow this structure:
"""
datasets/DOTA/
  ├── images/
  │   ├── train/
  │   ├── val/
  │   └── test/
  └── labels/
      ├── train/
      ├── val/
      └── test/
"""


In [None]:

# Advanced Training Pipeline with Auto-Resume

from datetime import datetime
import yaml
import shutil

class SatelliteObjectDetector:
    """
    Complete training pipeline for small object detection in satellite imagery
    Optimized for Google Colab T4 GPU with automatic checkpointing
    """

    def __init__(self,
                 model_size='n',  # n, s, m, l, x
                 data_yaml='DOTAv1.yaml',
                 project_root='/content/drive/MyDrive/satellite_detection'):
        """
        Initialize detector

        Args:
            model_size: YOLOv11 model size (n=nano, s=small, m=medium, l=large, x=xlarge)
            data_yaml: Dataset configuration (dota8.yaml, DOTAv1.yaml, or custom)
            project_root: Root directory for all outputs
        """
        self.model_size = model_size
        self.data_yaml = data_yaml
        self.project_root = project_root
        self.checkpoint_dir = os.path.join(project_root, 'checkpoints')
        self.results_dir = os.path.join(project_root, 'results')
        self.backup_dir = os.path.join(project_root, 'backups')

        # Ensure directories exist
        for d in [self.checkpoint_dir, self.results_dir, self.backup_dir]:
            os.makedirs(d, exist_ok=True)

        self.model = None
        self.training_history = []

    def find_latest_checkpoint(self):
        """Find most recent checkpoint to resume training"""
        # Check Ultralytics default location
        last_pt = os.path.join(self.results_dir, 'train/weights/last.pt')
        if os.path.exists(last_pt):
            print(f"Found checkpoint: {last_pt}")
            return last_pt

        # Check custom checkpoint directory
        checkpoints = [f for f in os.listdir(self.checkpoint_dir) if f.endswith('.pt')]
        if checkpoints:
            latest = max([os.path.join(self.checkpoint_dir, f) for f in checkpoints],
                        key=os.path.getmtime)
            print(f"Found checkpoint: {latest}")
            return latest

        print("No checkpoint found, starting fresh")
        return None

    def backup_checkpoint(self, checkpoint_path):
        """Create timestamped backup of checkpoint"""
        if not os.path.exists(checkpoint_path):
            return

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_name = f"backup_{self.model_size}_{timestamp}.pt"
        backup_path = os.path.join(self.backup_dir, backup_name)
        shutil.copy2(checkpoint_path, backup_path)
        print(f"Backup created: {backup_name}")

    def train(self,
              epochs=100,
              imgsz=1024,  # DOTA standard size
              batch=8,     # Adjust for T4 GPU (16GB)
              resume=True,
              patience=50,
              save_period=10,
              lr0=0.01,
              optimizer='Adam',
              augment=True,
              device=0,
              **kwargs):
        """
        Train YOLOv11-OBB model with automatic checkpointing

        Args:
            epochs: Total training epochs
            imgsz: Input image size (1024 for DOTA, reduce to 640 if OOM)
            batch: Batch size (reduce to 4 if out of memory)
            resume: Auto-resume from last checkpoint
            patience: Early stopping patience
            save_period: Checkpoint frequency (epochs)
            lr0: Initial learning rate
            optimizer: Optimizer (Adam, SGD, AdamW)
            augment: Enable data augmentation
            device: GPU device (0) or 'cpu'
        """

        # Check for existing checkpoint
        checkpoint_path = None
        if resume:
            checkpoint_path = self.find_latest_checkpoint()
            if checkpoint_path:
                self.backup_checkpoint(checkpoint_path)

        # Initialize or resume model
        if checkpoint_path and resume:
            print(f"\n{'='*60}")
            print(f"RESUMING TRAINING FROM CHECKPOINT")
            print(f"{'='*60}\n")
            self.model = YOLO(checkpoint_path)
        else:
            print(f"\n{'='*60}")
            print(f"STARTING NEW TRAINING SESSION")
            print(f"Model: YOLOv11{self.model_size}-OBB")
            print(f"{'='*60}\n")
            self.model = YOLO(f'yolo11{self.model_size}-obb.pt')

        # Training configuration
        train_config = {
            'data': self.data_yaml,
            'epochs': epochs,
            'imgsz': imgsz,
            'batch': batch,
            'device': device,
            'project': self.results_dir,
            'name': 'train',
            'exist_ok': True,
            'patience': patience,
            'save': True,
            'save_period': save_period,
            'cache': False,  # Don't cache to save disk space
            'verbose': True,
            'plots': True,
            'val': True,
            'resume': resume and (checkpoint_path is not None),
            'lr0': lr0,
            'optimizer': optimizer,
            'augment': augment,
            # Small object optimization
            'mosaic': 1.0,      # Mosaic augmentation
            'mixup': 0.1,       # Mixup augmentation
            'copy_paste': 0.1,  # Copy-paste augmentation
        }

        # Merge additional arguments
        train_config.update(kwargs)

        # Print configuration
        print("Training Configuration:")
        print("-" * 60)
        for key, value in train_config.items():
            print(f"  {key:20s}: {value}")
        print("-" * 60 + "\n")

        try:
            # Start/resume training
            results = self.model.train(**train_config)

            # Save final model
            final_name = f"final_{self.model_size}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pt"
            final_path = os.path.join(self.checkpoint_dir, final_name)
            self.model.save(final_path)
            print(f"\nFinal model saved: {final_name}")

            return results

        except KeyboardInterrupt:
            print("\nTraining interrupted! Progress saved.")
            print("Resume by re-running with resume=True")

        except RuntimeError as e:
            if "out of memory" in str(e):
                print("\n GPU Out of Memory!")
                print("Solutions:")
                print("  1. Reduce batch size: batch=4 or batch=2")
                print("  2. Reduce image size: imgsz=640")
                print("  3. Use smaller model: model_size='n'")
                torch.cuda.empty_cache()
            raise

    def validate(self, checkpoint=None, split='val'):
        """Validate model on test/val set"""
        if checkpoint is None:
            checkpoint = self.find_latest_checkpoint()

        if checkpoint is None:
            print(" No checkpoint found for validation")
            return None

        print(f"\n{'='*60}")
        print(f"VALIDATION")
        print(f"{'='*60}")
        print(f"Checkpoint: {os.path.basename(checkpoint)}")
        print(f"Split: {split}\n")

        model = YOLO(checkpoint)
        metrics = model.val(data=self.data_yaml, split=split)

        # Print metrics
        print(f"\n{'='*60}")
        print("VALIDATION RESULTS")
        print(f"{'='*60}")
        print(f"mAP50:    {metrics.box.map50:.4f}")
        print(f"mAP50-95: {metrics.box.map:.4f}")
        print(f"Precision: {metrics.box.mp:.4f}")
        print(f"Recall:    {metrics.box.mr:.4f}")
        print(f"{'='*60}\n")

        return metrics

    def predict(self, source, checkpoint=None, conf=0.25, iou=0.7, save=True):
        """Run inference on images/video"""
        if checkpoint is None:
            checkpoint = self.find_latest_checkpoint()

        if checkpoint is None:
            print(" No checkpoint found for prediction")
            return None

        print(f"\nRunning inference with {os.path.basename(checkpoint)}")
        model = YOLO(checkpoint)

        results = model.predict(
            source=source,
            conf=conf,
            iou=iou,
            save=save,
            project=os.path.join(self.project_root, 'predictions'),
            name=f'predict_{datetime.now().strftime("%Y%m%d_%H%M%S")}'
        )

        return results

    def export_model(self, format='onnx', checkpoint=None):
        """Export model to different formats"""
        if checkpoint is None:
            checkpoint = self.find_latest_checkpoint()

        model = YOLO(checkpoint)
        export_path = model.export(format=format)
        print(f" Model exported to {format}: {export_path}")
        return export_path


In [None]:

# TRAINING EXECUTION


# Initialize detector
detector = SatelliteObjectDetector(
    model_size='n',
    data_yaml='DOTAv1.yaml',  # ← New dataset
    project_root='/content/drive/MyDrive/soda_detection'
)

# Start training (automatically resumes if checkpoint exists)
results = detector.train(
    epochs=50,  # Fewer epochs for fine-tuning
    imgsz=1024,
    batch=8,
    resume=False,  # Start fresh on new dataset
    lr0=0.001,         # Learning rate
    optimizer='Adam', # Adam, SGD, or AdamW
    device=0,         # Use GPU
    augment=True      # Enable augmentation for small objects
)

print("\n✓ Training complete!")


In [None]:

# MONITOR TRAINING WITH TENSORBOARD

%load_ext tensorboard
%tensorboard --logdir /content/drive/MyDrive/satellite_detection/results/train


# VISUALIZE TRAINING RESULTS

import matplotlib.pyplot as plt
from PIL import Image

results_path = '/content/drive/MyDrive/satellite_detection/results/train'

# Display training plots
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
plots = ['results.png', 'confusion_matrix.png', 'F1_curve.png', 'PR_curve.png']

for idx, plot_name in enumerate(plots):
    plot_path = os.path.join(results_path, plot_name)
    if os.path.exists(plot_path):
        img = Image.open(plot_path)
        axes[idx//2, idx%2].imshow(img)
        axes[idx//2, idx%2].axis('off')
        axes[idx//2, idx%2].set_title(plot_name.replace('.png', '').replace('_', ' ').title())

plt.tight_layout()
plt.show()


In [None]:

# VALIDATE TRAINED MODEL


# Validate on validation set
val_metrics = detector.validate(split='val')

# Validate on test set
test_metrics = detector.validate(split='test')

# Per-class performance
print("\nPer-Class Performance:")
print("-" * 60)
for i, class_name in enumerate(DOTA_CLASSES):
    map50 = val_metrics.box.maps[i] if i < len(val_metrics.box.maps) else 0
    print(f"  {class_name:25s}: mAP50 = {map50:.4f}")


In [None]:

# EXPORT TRAINED MODEL


# Export to ONNX for deployment
detector.export_model(format='onnx')

# Export to TensorRT (GPU inference)
detector.export_model(format='engine')

# Export to TFLite (mobile deployment)
detector.export_model(format='tflite')

print("\n Models exported successfully!")
