# Training pipeline
1. **first stage**: train yolov11n on original dataset with 5 fold cross validation
2. **first stage**: train yolov11n/s/m on subsampled dataset with 5 fold cross validation
3. **first stage**: train yolov11l/x on subsampled dataset only on first fold
4. **first stage**: train rtdetr-l/x on subsampled dataset only on first fold

In [None]:
# Install required packages
!pip install -U ultralytics torch

In [None]:
# Import required libraries
import torch
import os
import yaml
import json
import datetime
import psutil
from ultralytics import YOLO, RTDETR
from pathlib import Path

print('Torch:', torch.__version__, '| CUDA:', torch.version.cuda, '| Device count:', torch.cuda.device_count())
if torch.cuda.is_available():
    print('GPU:', torch.cuda.get_device_name(0))
print('RAM:', f'{psutil.virtual_memory().total/1e9:.1f} GB')

# Enable CUDNN benchmarking for faster training
torch.backends.cudnn.benchmark = True

In [None]:
# Configuration and Helper Functions

class TrainingConfig:
    """Configuration class for training parameters"""
    
    # Dataset paths - using absolute paths
    K_FOLD_CV_PATH = "datasets/k_fold_cv"
    K_FOLD_CV_AUGMENTED_PATH = "datasets/k_fold_cv_subsampled"
    
    # Training parameters
    EPOCHS = 100
    PATIENCE = 20
    IMGSZ = 320
    BATCH_SIZE = 32
    WORKERS = 8
    
    # Model configurations
    YOLO_MODELS = {
        'yolo11n': 'yolo11n.pt',
        'yolo11s': 'yolo11s.pt', 
        'yolo11m': 'yolo11m.pt',
        'yolo11l': 'yolo11l.pt',
        'yolo11x': 'yolo11x.pt'
    }
    
    RTDETR_MODELS = {
        'rtdetr-l': 'rtdetr-l.pt',
        'rtdetr-x': 'rtdetr-x.pt'
    }

def get_training_params(model_size):
    """Get training parameters based on model size"""
    base_params = {
        'epochs': TrainingConfig.EPOCHS,
        'imgsz': TrainingConfig.IMGSZ,
        'patience': TrainingConfig.PATIENCE,
        'amp': True,
        'optimizer': 'AdamW',
        'save_period': 10,
        'workers': TrainingConfig.WORKERS,
        'cache': True,
        'half': True,
        'pretrained': True,
        'lr0': 1e-4,
        'lrf': 0.01,
        'momentum': 0.937,
        'weight_decay': 1e-4,
        'augment': False
    }
    
    # Adjust batch size based on model size
    if model_size in ['yolo11n', 'yolo11s']:
        base_params['batch'] = 32
    elif model_size in ['yolo11m', 'rtdetr-l']:
        base_params['batch'] = 16
    elif model_size in ['yolo11l', 'yolo11x', 'rtdetr-x']:
        base_params['batch'] = 8
    
    return base_params

def save_results(results, filename):
    """Save training results to JSON file"""
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    filepath = f"training_results_{filename}_{timestamp}.json"
    
    with open(filepath, 'w') as f:
        json.dump(results, f, indent=2, default=str)
    
    print(f"Results saved to: {filepath}")
    return filepath

def train_model(model_type, model_name, data_yaml, fold_idx, experiment_name, project_dir="runs"):
    """Train a single model on a specific fold"""
    
    print(f"\n{'='*80}")
    print(f"Training {model_name} on Fold {fold_idx}")
    print(f"Data: {data_yaml}")
    print(f"{'='*80}")
    
    try:
        # Load model
        if model_type == 'yolo':
            model_path = TrainingConfig.YOLO_MODELS[model_name]
            model = YOLO(model_path)
        elif model_type == 'rtdetr':
            model_path = TrainingConfig.RTDETR_MODELS[model_name]
            model = RTDETR(model_path)
        else:
            raise ValueError(f"Unknown model type: {model_type}")
        
        # Get training parameters
        train_params = get_training_params(model_name)
        
        # Set training-specific parameters
        train_params.update({
            'data': data_yaml,
            'name': f"{model_name}_fold_{fold_idx}",
            'project': f"{project_dir}/{experiment_name}"
        })
        
        print(f"Training parameters: {train_params}")
        
        # Train the model
        results = model.train(**train_params)
        
        # Extract metrics
        fold_result = {
            'model_type': model_type,
            'model_name': model_name,
            'fold': fold_idx,
            'status': 'completed',
            'best_epoch': results.best_epoch if hasattr(results, 'best_epoch') else None,
            'best_fitness': results.best_fitness if hasattr(results, 'best_fitness') else None,
            'model_path': model.ckpt_path if hasattr(model, 'ckpt_path') else None,
            'experiment_name': experiment_name,
            'data_yaml': data_yaml,
            'training_params': train_params
        }
        
        print(f"\n✅ {model_name} Fold {fold_idx} completed successfully!")
        print(f"Best epoch: {fold_result['best_epoch']}")
        print(f"Best fitness: {fold_result['best_fitness']:.4f}" if fold_result['best_fitness'] else "Best fitness: N/A")
        print(f"Model saved: {fold_result['model_path']}")
        
        return fold_result
        
    except Exception as e:
        print(f"❌ Error training {model_name} on fold {fold_idx}: {str(e)}")
        return {
            'model_type': model_type,
            'model_name': model_name,
            'fold': fold_idx,
            'status': 'failed',
            'error': str(e),
            'experiment_name': experiment_name,
            'data_yaml': data_yaml
        }

print("Configuration and helper functions loaded!")

In [None]:
# Verify Dataset Structure

print("\n" + "="*100)
print("DATASET VERIFICATION")
print("="*100)

def verify_dataset_structure(dataset_path, dataset_name):
    """Verify the structure of a k-fold dataset"""
    print(f"\n📁 Verifying {dataset_name}: {dataset_path}")
    
    if not os.path.exists(dataset_path):
        print(f"❌ Dataset path not found: {dataset_path}")
        return False
    
    all_folds_valid = True
    
    for fold_idx in range(5):
        fold_dir = os.path.join(dataset_path, f"fold_{fold_idx}")
        data_yaml = os.path.join(fold_dir, "data.yaml")
        
        if os.path.exists(data_yaml):
            # Count images in train and val
            train_images = os.path.join(fold_dir, "train", "images")
            val_images = os.path.join(fold_dir, "val", "images")
            
            train_count = len([f for f in os.listdir(train_images) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]) if os.path.exists(train_images) else 0
            val_count = len([f for f in os.listdir(val_images) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]) if os.path.exists(val_images) else 0
            
            total = train_count + val_count
            
            if total > 0:
                print(f"  ✅ Fold {fold_idx}: Train={train_count}, Val={val_count}, Total={total}")
            else:
                print(f"  ❌ Fold {fold_idx}: No images found")
                all_folds_valid = False
        else:
            print(f"  ❌ Fold {fold_idx}: data.yaml not found")
            all_folds_valid = False
    
    return all_folds_valid

# Verify both datasets
k_fold_cv_valid = verify_dataset_structure(TrainingConfig.K_FOLD_CV_PATH, "k_fold_cv")
k_fold_cv_augmented_valid = verify_dataset_structure(TrainingConfig.K_FOLD_CV_AUGMENTED_PATH, "k_fold_cv_augmented")

print(f"\n📊 VERIFICATION SUMMARY:")
print(f"k_fold_cv: {'✅ Valid' if k_fold_cv_valid else '❌ Issues found'}")
print(f"k_fold_cv_augmented: {'✅ Valid' if k_fold_cv_augmented_valid else '❌ Issues found'}")

if k_fold_cv_valid and k_fold_cv_augmented_valid:
    print(f"\n🚀 All datasets verified! Ready to start training pipeline.")
else:
    print(f"\n⚠️ Please fix dataset issues before proceeding with training.")

In [None]:
# Stage 1: Train YOLO11n on all folds of k_fold_cv

print("\n" + "="*100)
print("STAGE 1: Training YOLO11n on all folds of k_fold_cv")
print("="*100)

stage1_results = []
k_fold_cv_path = TrainingConfig.K_FOLD_CV_PATH

# Check if dataset exists
if not os.path.exists(k_fold_cv_path):
    print(f"❌ Dataset path not found: {k_fold_cv_path}")
else:
    print(f"📁 Dataset path: {k_fold_cv_path}")
    
    # Train on all 5 folds (0-4)
    for fold_idx in range(5):
        fold_dir = os.path.join(k_fold_cv_path, f"fold_{fold_idx}")
        data_yaml = os.path.join(fold_dir, "data.yaml")
        
        if os.path.exists(data_yaml):
            result = train_model(
                model_type='yolo',
                model_name='yolo11n',
                data_yaml=data_yaml,
                fold_idx=fold_idx,
                experiment_name='stage1_yolo11n_k_fold_cv'
            )
            stage1_results.append(result)
        else:
            print(f"❌ Data YAML not found: {data_yaml}")
            stage1_results.append({
                'model_name': 'yolo11n',
                'fold': fold_idx,
                'status': 'failed',
                'error': f'Data YAML not found: {data_yaml}'
            })

# Save Stage 1 results
stage1_file = save_results(stage1_results, "stage1")

# Summary
successful_stage1 = [r for r in stage1_results if r['status'] == 'completed']
print(f"\n📊 Stage 1 Summary: {len(successful_stage1)}/5 folds completed successfully")

In [None]:
# Stage 2: Train YOLO11n, s, and m on all folds of subsampled

print("\n" + "="*100)
print("STAGE 2: Training YOLO11n, s, and m on all folds of k_fold_cv_subsampled")
print("="*100)

stage2_results = []
k_fold_cv_augmented_path = TrainingConfig.K_FOLD_CV_AUGMENTED_PATH
stage2_models = ['yolo11m']

# Check if dataset exists
if not os.path.exists(k_fold_cv_augmented_path):
    print(f"❌ Dataset path not found: {k_fold_cv_augmented_path}")
else:
    print(f"📁 Dataset path: {k_fold_cv_augmented_path}")
    
    # Train each model on all 5 folds (0-4)
    for model_name in stage2_models:
        print(f"\n🚀 Starting {model_name} training on all folds...")
        
        for fold_idx in range(5):
            fold_dir = os.path.join(k_fold_cv_augmented_path, f"fold_{fold_idx}")
            data_yaml = os.path.join(fold_dir, "data.yaml")
            
            if os.path.exists(data_yaml):
                result = train_model(
                    model_type='yolo',
                    model_name=model_name,
                    data_yaml=data_yaml,
                    fold_idx=fold_idx,
                    experiment_name=f'stage2_{model_name}_k_fold_cv_subsampled'
                )
                stage2_results.append(result)
            else:
                print(f"❌ Data YAML not found: {data_yaml}")
                stage2_results.append({
                    'model_name': model_name,
                    'fold': fold_idx,
                    'status': 'failed',
                    'error': f'Data YAML not found: {data_yaml}'
                })

# Save Stage 2 results
stage2_file = save_results(stage2_results, "stage2")

# Summary
successful_stage2 = [r for r in stage2_results if r['status'] == 'completed']
total_expected = len(stage2_models) * 5  # 3 models × 5 folds = 15
print(f"\n📊 Stage 2 Summary: {len(successful_stage2)}/{total_expected} trainings completed successfully")

# Per-model summary
for model_name in stage2_models:
    model_results = [r for r in stage2_results if r['model_name'] == model_name and r['status'] == 'completed']
    print(f"  {model_name}: {len(model_results)}/5 folds completed")

In [None]:
# Stage 3: Train YOLO11l and x on first fold of k_fold_cv_augmented

print("\n" + "="*100)
print("STAGE 3: Training YOLO11l and x on first fold of k_fold_cv_subsampled")
print("="*100)

stage3_results = []
stage3_models = ['yolo11l', 'yolo11x']
fold_idx = 0  # First fold

# Check if dataset exists
if not os.path.exists(k_fold_cv_augmented_path):
    print(f"❌ Dataset path not found: {k_fold_cv_augmented_path}")
else:
    fold_dir = os.path.join(k_fold_cv_augmented_path, f"fold_{fold_idx}")
    data_yaml = os.path.join(fold_dir, "data.yaml")
    
    if os.path.exists(data_yaml):
        print(f"📁 Using fold {fold_idx}: {data_yaml}")
        
        # Train each large model on fold 0
        for model_name in stage3_models:
            print(f"\n🚀 Starting {model_name} training on fold {fold_idx}...")
            
            result = train_model(
                model_type='yolo',
                model_name=model_name,
                data_yaml=data_yaml,
                fold_idx=fold_idx,
                experiment_name=f'stage3_{model_name}_fold0_subsampled'
            )
            stage3_results.append(result)
    else:
        print(f"❌ Data YAML not found: {data_yaml}")
        for model_name in stage3_models:
            stage3_results.append({
                'model_name': model_name,
                'fold': fold_idx,
                'status': 'failed',
                'error': f'Data YAML not found: {data_yaml}'
            })

# Save Stage 3 results
stage3_file = save_results(stage3_results, "stage3")

# Summary
successful_stage3 = [r for r in stage3_results if r['status'] == 'completed']
print(f"\n📊 Stage 3 Summary: {len(successful_stage3)}/{len(stage3_models)} large YOLO models completed successfully")

In [None]:
# Stage 4: Train RT-DETR-l and x on first fold of k_fold_cv_augmented

print("\n" + "="*100)
print("STAGE 4: Training RT-DETR-l and x on first fold of k_fold_cv_subsampled")
print("="*100)

stage4_results = []
stage4_models = ['rtdetr-l', 'rtdetr-x']
fold_idx = 0  # First fold

# Check if dataset exists
if not os.path.exists(k_fold_cv_augmented_path):
    print(f"❌ Dataset path not found: {k_fold_cv_augmented_path}")
else:
    fold_dir = os.path.join(k_fold_cv_augmented_path, f"fold_{fold_idx}")
    data_yaml = os.path.join(fold_dir, "data.yaml")
    
    if os.path.exists(data_yaml):
        print(f"📁 Using fold {fold_idx}: {data_yaml}")
        
        # Train each RT-DETR model on fold 0
        for model_name in stage4_models:
            print(f"\n🚀 Starting {model_name} training on fold {fold_idx}...")
            
            result = train_model(
                model_type='rtdetr',
                model_name=model_name,
                data_yaml=data_yaml,
                fold_idx=fold_idx,
                experiment_name=f'stage4_{model_name}_fold0_subsampled'
            )
            stage4_results.append(result)
    else:
        print(f"❌ Data YAML not found: {data_yaml}")
        for model_name in stage4_models:
            stage4_results.append({
                'model_name': model_name,
                'fold': fold_idx,
                'status': 'failed',
                'error': f'Data YAML not found: {data_yaml}'
            })

# Save Stage 4 results
stage4_file = save_results(stage4_results, "stage4")

# Summary
successful_stage4 = [r for r in stage4_results if r['status'] == 'completed']
print(f"\n📊 Stage 4 Summary: {len(successful_stage4)}/{len(stage4_models)} RT-DETR models completed successfully")

In [None]:
# Final Summary and Analysis

print("\n" + "="*100)
print("COMPREHENSIVE TRAINING PIPELINE - FINAL SUMMARY")
print("="*100)

# Combine all results
all_results = stage1_results + stage2_results + stage3_results + stage4_results

# Overall statistics
total_trainings = len(all_results)
successful_trainings = len([r for r in all_results if r['status'] == 'completed'])
failed_trainings = total_trainings - successful_trainings

print(f"\n📊 OVERALL STATISTICS:")
print(f"Total trainings attempted: {total_trainings}")
print(f"Successful trainings: {successful_trainings}")
print(f"Failed trainings: {failed_trainings}")
print(f"Success rate: {(successful_trainings/total_trainings)*100:.1f}%")

# Stage-by-stage breakdown
print(f"\n📋 STAGE BREAKDOWN:")
stages = [
    ("Stage 1 (YOLO11n on k_fold_cv)", stage1_results, 5),
    ("Stage 2 (YOLO11n,s,m on k_fold_cv_augmented)", stage2_results, 15),
    ("Stage 3 (YOLO11l,x on fold 0)", stage3_results, 2),
    ("Stage 4 (RT-DETR-l,x on fold 0)", stage4_results, 2)
]

for stage_name, stage_results, expected_count in stages:
    successful = len([r for r in stage_results if r['status'] == 'completed'])
    print(f"  {stage_name}: {successful}/{expected_count} completed")

# Model performance summary (for successful trainings)
print(f"\n🏆 MODEL PERFORMANCE SUMMARY:")
successful_results = [r for r in all_results if r['status'] == 'completed' and r.get('best_fitness')]

if successful_results:
    # Group by model
    model_performance = {}
    for result in successful_results:
        model_name = result['model_name']
        if model_name not in model_performance:
            model_performance[model_name] = []
        model_performance[model_name].append(result['best_fitness'])
    
    # Calculate averages
    for model_name, fitness_values in model_performance.items():
        avg_fitness = sum(fitness_values) / len(fitness_values)
        max_fitness = max(fitness_values)
        min_fitness = min(fitness_values)
        count = len(fitness_values)
        
        print(f"  {model_name}: Avg={avg_fitness:.4f}, Max={max_fitness:.4f}, Min={min_fitness:.4f} ({count} runs)")

# Best performing models
if successful_results:
    best_model = max(successful_results, key=lambda x: x['best_fitness'])
    print(f"\n🥇 BEST PERFORMING MODEL:")
    print(f"  Model: {best_model['model_name']}")
    print(f"  Fold: {best_model['fold']}")
    print(f"  Fitness: {best_model['best_fitness']:.4f}")
    print(f"  Path: {best_model.get('model_path', 'N/A')}")

# Failed trainings summary
failed_results = [r for r in all_results if r['status'] == 'failed']
if failed_results:
    print(f"\n❌ FAILED TRAININGS:")
    for result in failed_results:
        print(f"  {result['model_name']} (Fold {result['fold']}): {result.get('error', 'Unknown error')}")

# Save comprehensive results
comprehensive_results = {
    'timestamp': datetime.datetime.now().isoformat(),
    'summary': {
        'total_trainings': total_trainings,
        'successful_trainings': successful_trainings,
        'failed_trainings': failed_trainings,
        'success_rate': (successful_trainings/total_trainings)*100
    },
    'stages': {
        'stage1': stage1_results,
        'stage2': stage2_results,
        'stage3': stage3_results,
        'stage4': stage4_results
    },
    'all_results': all_results
}

comprehensive_file = save_results(comprehensive_results, "comprehensive_pipeline")

print(f"\n✅ Comprehensive training pipeline completed!")
print(f"📁 All results saved to: {comprehensive_file}")
print(f"\n🔍 Check the 'runs/' directory for individual training outputs and models.")