In [None]:
import torch
from ultralytics import YOLO
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import os
from pathlib import Path
import shutil
from sklearn.model_selection import train_test_split

class DetectionTrainer:
    def __init__(self, data_yaml_path):
        """
        Initialize the trainer with path to data.yaml file
        data_yaml_path: Path to YAML file containing dataset configuration
        """
        self.model = YOLO('yolov8n.pt')  # Start with a pre-trained YOLOv8 nano model
        self.data_yaml_path = str(Path(data_yaml_path).absolute())
        
        # Define augmentation pipeline
        self.train_transforms = A.Compose([
            A.RandomBrightnessContrast(p=0.5),
            A.RandomGamma(p=0.5),
            A.GaussNoise(p=0.3),
            A.OneOf([
                A.MotionBlur(p=0.5),
                A.MedianBlur(blur_limit=3, p=0.5),
                A.GaussianBlur(blur_limit=3, p=0.5),
            ], p=0.3),
            A.OneOf([
                A.OpticalDistortion(p=0.3),
                A.GridDistortion(p=0.3),
            ], p=0.2),
            A.RandomRotate90(p=0.5),
            A.Flip(p=0.5),
            A.Resize(640, 640),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            ),
            ToTensorV2(),
        ], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

    def train(self, epochs=100, imgsz=640, batch_size=16, val_split=0.2):
        """
        Train the model with automatic validation split
        """
        # Get list of all training images
        train_images = list(Path('train/images').glob('*.jpg'))
        
        # Split into train and validation
        train_imgs, val_imgs = train_test_split(
            train_images, 
            test_size=val_split,
            random_state=42
        )
        
        # Create temporary validation directory structure
        val_dir = Path('temp_val')
        val_img_dir = val_dir / 'images'
        val_label_dir = val_dir / 'labels'
        
        # Create temporary training directory structure
        temp_train_dir = Path('temp_train')
        temp_train_img_dir = temp_train_dir / 'images'
        temp_train_label_dir = temp_train_dir / 'labels'
        
        # Create all directories
        for dir_path in [val_img_dir, val_label_dir, temp_train_img_dir, temp_train_label_dir]:
            dir_path.mkdir(parents=True, exist_ok=True)
        
        # Move files to appropriate directories
        for img_path in train_imgs:
            # Copy training images
            new_img_path = temp_train_img_dir / img_path.name
            shutil.copy2(img_path, new_img_path)
            
            # Copy training labels
            label_path = Path('train/labels') / img_path.with_suffix('.txt').name
            if label_path.exists():
                new_label_path = temp_train_label_dir / label_path.name
                shutil.copy2(label_path, new_label_path)
        
        for img_path in val_imgs:
            # Copy validation images
            new_img_path = val_img_dir / img_path.name
            shutil.copy2(img_path, new_img_path)
            
            # Copy validation labels
            label_path = Path('train/labels') / img_path.with_suffix('.txt').name
            if label_path.exists():
                new_label_path = val_label_dir / label_path.name
                shutil.copy2(label_path, new_label_path)
        
        # Get absolute paths for training
        project_dir = Path().absolute()
        
        # Update data.yaml with temporary directories
        temp_yaml_path = 'temp_data.yaml'
        with open(self.data_yaml_path, 'r') as f:
            yaml_content = f.read()
        yaml_content = yaml_content.replace('train/images', 'temp_train/images')
        with open(temp_yaml_path, 'w') as f:
            f.write(yaml_content)
        
        # Training arguments
        args = dict(
            data=temp_yaml_path,
            epochs=epochs,
            imgsz=imgsz,
            batch=batch_size,
            patience=20,  # Early stopping patience
            save_period=10,  # Save checkpoint every 10 epochs
            verbose=True,
            device='cuda' if torch.cuda.is_available() else 'cpu',
            project=str(project_dir / 'runs'),  # Save results to runs/
            augment=True  # Use built-in YOLOv8 augmentations
        )
        
        # Train the model
        try:
            self.model.train(**args)
        finally:
            # Clean up temporary directories
            for temp_dir in [val_dir, temp_train_dir]:
                if temp_dir.exists():
                    shutil.rmtree(temp_dir)
            # Remove temporary yaml file
            if os.path.exists(temp_yaml_path):
                os.remove(temp_yaml_path)

    def predict(self, image_path):
        """
        Run inference on a single image
        """
        results = self.model.predict(
            source=image_path,
            conf=0.25,  # Confidence threshold
            iou=0.45    # NMS IoU threshold
        )
        return results

def prepare_dataset_structure():
    """
    Create the simplified dataset structure
    """
    dirs = ['train/images', 'train/labels', 
            'test/images', 'test/labels']
    
    for dir_path in dirs:
        Path(dir_path).mkdir(parents=True, exist_ok=True)

def create_data_yaml(dataset_path):
    """
    Create the data.yaml file for YOLOv8
    """
    # Convert to absolute path
    abs_path = str(Path(dataset_path).absolute())
    
    yaml_content = f"""
path: {abs_path}  # dataset root directory
train: {abs_path}/train/images  # train images
val: {abs_path}/temp_val/images  # temporary validation images directory
test: {abs_path}/test/images    # test images

# Classes
names:
  0: shopping cart
  1: filled BOB
  2: empty BOB
    """
    
    with open('data.yaml', 'w') as f:
        f.write(yaml_content)

def main():
    # Get absolute path of current directory
    current_dir = str(Path().absolute())
    
    # Create dataset structure
    prepare_dataset_structure()
    
    # Create data.yaml with absolute paths
    create_data_yaml(current_dir)
    
    # Initialize trainer
    trainer = DetectionTrainer('data.yaml')
    
    # Train model with automatic validation split
    trainer.train(val_split=0.2)  # 20% validation split
    
    # Example inference
    results = trainer.predict('test/images/example.jpg')
    
if __name__ == "__main__":
    main()

0


  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
  A.Flip(p=0.5),


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.