In [3]:
!pip install ultralytics
import os
import sys
import yaml
import torch
import ultralytics 
import numpy as np
from pathlib import Path
import shutil
import json
from collections import defaultdict
from ultralytics import YOLO
from PIL import Image
import cv2
from sklearn.model_selection import StratifiedShuffleSplit
import albumentations as A

class EnhancedYOLOEnsemble:
    def __init__(self, model_configs, weights=None, tta_config=None):
        self.models = []
        self.model_names = []
        self.weights = weights if weights else [1.0] * len(model_configs)
        self.tta_config = tta_config or {
            'enable': True,
            'scales': [0.8, 1.0, 1.2],
            'flips': [False, True],
            'rotations': [0, 90, 180, 270]
        }
        
        for config in model_configs:
            model = YOLO(config['model_name'])
            self.models.append(model)
            self.model_names.append(config['model_name'])
        print(f"Initialized ensemble with {len(self.models)} models:")
        for name in self.model_names:
            print(f"  - {name}")

    def get_augmentation_pipeline(self):
        """Enhanced data augmentation pipeline"""
        return A.Compose([
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
            A.GaussianBlur(blur_limit=(1, 3), p=0.3),
            A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
            A.RandomGamma(gamma_limit=(80, 120), p=0.3),
            A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.3),
            A.RandomShadow(p=0.2),
            A.RandomFog(fog_coef_lower=0.1, fog_coef_upper=0.3, p=0.1),
            A.Cutout(num_holes=8, max_h_size=32, max_w_size=32, p=0.3),
            A.CoarseDropout(max_holes=8, max_height=32, max_width=32, p=0.3),
        ], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

    def train_models(self, data_yaml, epochs=100, imgsz=640, batch_size=16, patience=20):
        """Enhanced training with better hyperparameters"""
        self.trained_models = []
        
        # Enhanced training parameters
        enhanced_params = {
            'epochs': epochs,
            'imgsz': imgsz,
            'batch': batch_size,
            'patience': patience,
            'device': 0 if torch.cuda.is_available() else 'cpu',
            'optimizer': 'AdamW',
            'lr0': 0.001,  # Lower initial learning rate
            'lrf': 0.01,   # Final learning rate factor
            'momentum': 0.937,
            'weight_decay': 0.0005,
            'warmup_epochs': 3,
            'warmup_momentum': 0.8,
            'warmup_bias_lr': 0.1,
            'box': 7.5,    # Box loss gain
            'cls': 0.5,    # Class loss gain  
            'dfl': 1.5,    # DFL loss gain
            'pose': 12.0,  # Pose loss gain
            'kobj': 1.0,   # Keypoint obj loss gain
            'label_smoothing': 0.0,
            'nbs': 64,     # Nominal batch size
            'hsv_h': 0.015,  # Image HSV-Hue augmentation
            'hsv_s': 0.7,    # Image HSV-Saturation augmentation  
            'hsv_v': 0.4,    # Image HSV-Value augmentation
            'degrees': 0.0,  # Image rotation (+/- deg)
            'translate': 0.1, # Image translation (+/- fraction)
            'scale': 0.5,    # Image scale (+/- gain)
            'shear': 0.0,    # Image shear (+/- deg)
            'perspective': 0.0, # Image perspective (+/- fraction)
            'flipud': 0.0,      # Image flip up-down (probability)
            'fliplr': 0.5,      # Image flip left-right (probability)
            'mosaic': 1.0,      # Image mosaic (probability)
            'mixup': 0.1,       # Image mixup (probability)
            'copy_paste': 0.1,  # Segment copy-paste (probability)
            'auto_augment': 'randaugment',
            'erasing': 0.4,     # Random erasing probability
            'crop_fraction': 1.0, # Image crop fraction
        }
        
        for i, model in enumerate(self.models):
            print(f"\nTraining Model {i+1}/{len(self.models)}: {self.model_names[i]}")
            try:
                results = model.train(
                    data=data_yaml,
                    project=f"enhanced_train_{self.model_names[i]}".replace('.', '_'),
                    name="run",
                    **enhanced_params
                )
                self.trained_models.append(model)
                print(f"✅ Trained {self.model_names[i]}")
            except Exception as e:
                print(f"❌ Error training {self.model_names[i]}: {e}")
                self.trained_models.append(model)

    def apply_tta_transforms(self, image):
        """Apply Test Time Augmentation transforms"""
        transforms = []
        
        if not self.tta_config['enable']:
            return [image]
        
        # Original image
        transforms.append(('original', image, lambda x: x))
        
        # Scale transforms
        for scale in self.tta_config['scales']:
            if scale != 1.0:
                h, w = image.shape[:2]
                new_h, new_w = int(h * scale), int(w * scale)
                scaled = cv2.resize(image, (new_w, new_h))
                # Inverse transform function
                inv_fn = lambda x, orig_h=h, orig_w=w: cv2.resize(x, (orig_w, orig_h))
                transforms.append((f'scale_{scale}', scaled, inv_fn))
        
        # Flip transforms
        for flip in self.tta_config['flips']:
            if flip:
                flipped = cv2.flip(image, 1)  # Horizontal flip
                inv_fn = lambda x: cv2.flip(x, 1)
                transforms.append(('flip_h', flipped, inv_fn))
        
        return transforms

    def ensemble_predict_with_tta(self, image_path, conf_threshold=0.1, iou_threshold=0.4):
        """Enhanced prediction with Test Time Augmentation and lower thresholds"""
        image = cv2.imread(str(image_path))
        if image is None:
            return {'boxes': np.array([]), 'scores': np.array([]), 'classes': np.array([])}
        
        all_predictions = []
        
        # Apply TTA transforms
        tta_transforms = self.apply_tta_transforms(image)
        
        for model_idx, model in enumerate(self.trained_models):
            model_predictions = []
            
            for transform_name, transformed_img, inverse_fn in tta_transforms:
                try:
                    # Save transformed image temporarily
                    temp_path = f"/tmp/temp_tta_{transform_name}.jpg"
                    cv2.imwrite(temp_path, transformed_img)
                    
                    # Multiple confidence thresholds for better recall
                    for conf_thresh in [0.05, 0.1, 0.15, 0.2]:
                        results = model.predict(
                            temp_path, 
                            conf=conf_thresh, 
                            iou=iou_threshold,
                            verbose=False,
                            agnostic_nms=True,  # Class-agnostic NMS
                            max_det=1000       # Allow more detections
                        )
                        
                        if results and len(results) > 0 and results[0].boxes is not None:
                            boxes = results[0].boxes
                            if len(boxes) > 0:
                                predictions = {
                                    'boxes': boxes.xyxy.cpu().numpy(),
                                    'scores': boxes.conf.cpu().numpy(),
                                    'classes': boxes.cls.cpu().numpy().astype(int),
                                    'model_weight': self.weights[model_idx],
                                    'transform': transform_name,
                                    'conf_thresh': conf_thresh
                                }
                                model_predictions.append(predictions)
                    
                    # Clean up temp file
                    if os.path.exists(temp_path):
                        os.remove(temp_path)
                        
                except Exception as e:
                    print(f"⚠️ Error in TTA prediction: {e}")
            
            all_predictions.extend(model_predictions)
        
        return self._advanced_combine_predictions(all_predictions, iou_threshold, conf_threshold)

    def _advanced_combine_predictions(self, all_predictions, iou_threshold=0.4, final_conf_threshold=0.25):
        """Advanced prediction combination with weighted voting and confidence calibration"""
        if not all_predictions:
            return {'boxes': np.array([]), 'scores': np.array([]), 'classes': np.array([])}
        
        # Collect all predictions with metadata
        prediction_data = []
        for pred in all_predictions:
            for i in range(len(pred['boxes'])):
                prediction_data.append({
                    'box': pred['boxes'][i],
                    'score': pred['scores'][i],
                    'class': pred['classes'][i],
                    'weight': pred['model_weight'],
                    'transform': pred.get('transform', 'original'),
                    'conf_thresh': pred.get('conf_thresh', 0.25)
                })
        
        if not prediction_data:
            return {'boxes': np.array([]), 'scores': np.array([]), 'classes': np.array([])}
        
        # Group predictions by class
        class_groups = defaultdict(list)
        for pred in prediction_data:
            class_groups[pred['class']].append(pred)
        
        final_boxes, final_scores, final_classes = [], [], []
        
        # Process each class separately
        for class_id, class_preds in class_groups.items():
            if not class_preds:
                continue
            
            # Convert to arrays for processing
            boxes = np.array([p['box'] for p in class_preds])
            scores = np.array([p['score'] * p['weight'] for p in class_preds])
            
            # Apply Soft-NMS instead of regular NMS for better recall
            keep_indices = self._soft_nms(boxes, scores, iou_threshold)
            
            for idx in keep_indices:
                if scores[idx] >= final_conf_threshold:
                    final_boxes.append(boxes[idx])
                    final_scores.append(scores[idx])
                    final_classes.append(class_id)
        
        return {
            'boxes': np.array(final_boxes) if final_boxes else np.array([]),
            'scores': np.array(final_scores) if final_scores else np.array([]),
            'classes': np.array(final_classes) if final_classes else np.array([])
        }

    def _soft_nms(self, boxes, scores, iou_threshold, sigma=0.5):
        """Soft-NMS implementation for better recall"""
        if len(boxes) == 0:
            return []
        
        x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
        areas = (x2 - x1) * (y2 - y1)
        order = scores.argsort()[::-1]
        
        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)
            
            if order.size == 1:
                break
            
            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])
            
            w = np.maximum(0.0, xx2 - xx1)
            h = np.maximum(0.0, yy2 - yy1)
            inter = w * h
            ovr = inter / (areas[i] + areas[order[1:]] - inter)
            
            # Soft-NMS: reduce scores instead of removing boxes
            scores[order[1:]] = scores[order[1:]] * np.exp(-(ovr ** 2) / sigma)
            
            # Keep boxes with sufficient scores
            inds = np.where(scores[order[1:]] > 0.001)[0]
            order = order[inds + 1]
        
        return keep

    # Update the main predict method
    def ensemble_predict(self, image_path, conf_threshold=0.25, iou_threshold=0.45):
        """Use enhanced prediction with TTA"""
        return self.ensemble_predict_with_tta(image_path, conf_threshold, iou_threshold)

def enhanced_setup_yolo_dataset(original_data_path, working_path, stratify=True):
    """Enhanced dataset setup with stratified splitting"""
    data_path = Path(original_data_path)
    images = list((data_path / "images").glob("*.[jp][pn]g"))
    labels = list((data_path / "labels").glob("*.txt"))
    
    if not images or not labels:
        return None, None
    
    print(f"Detected {len(images)} images and {len(labels)} labels.")
    
    # Create class distribution for stratified splitting
    class_counts = defaultdict(int)
    image_class_map = {}
    
    for img in images:
        label_file = data_path / 'labels' / f"{img.stem}.txt"
        if label_file.exists():
            classes = set()
            with open(label_file, 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if parts:
                        classes.add(int(parts[0]))
            
            # Use dominant class for stratification
            if classes:
                dominant_class = max(classes) if len(classes) == 1 else -1  # Multi-class images get -1
                image_class_map[img] = dominant_class
                class_counts[dominant_class] += 1
    
    # Stratified split if possible
    if stratify and len(set(image_class_map.values())) > 1:
        try:
            from sklearn.model_selection import train_test_split
            images_array = np.array(images)
            labels_array = np.array([image_class_map.get(img, 0) for img in images])
            
            # First split: train vs (val + test)
            train_imgs, temp_imgs, _, temp_labels = train_test_split(
                images_array, labels_array, test_size=0.3, stratify=labels_array, random_state=42
            )
            
            # Second split: val vs test
            val_imgs, test_imgs = train_test_split(
                temp_imgs, test_size=0.5, stratify=temp_labels, random_state=42
            )
            
            print("Used stratified splitting based on class distribution")
            
        except Exception as e:
            print(f"Stratified splitting failed ({e}), using random split")
            stratify = False
    
    if not stratify:
        # Fallback to random splitting
        indices = np.arange(len(images))
        np.random.seed(42)
        np.random.shuffle(indices)
        n = len(images)
        train_idx = indices[:int(0.7*n)]
        val_idx = indices[int(0.7*n):int(0.85*n)]
        test_idx = indices[int(0.85*n):]
        
        train_imgs = [images[i] for i in train_idx]
        val_imgs = [images[i] for i in val_idx]
        test_imgs = [images[i] for i in test_idx]
    
    structure = {
        'train': {
            'images': train_imgs,
            'labels': [data_path / 'labels' / f"{img.stem}.txt" for img in train_imgs]
        },
        'val': {
            'images': val_imgs,
            'labels': [data_path / 'labels' / f"{img.stem}.txt" for img in val_imgs]
        },
        'test': {
            'images': test_imgs,
            'labels': [data_path / 'labels' / f"{img.stem}.txt" for img in test_imgs]
        }
    }
    
    output_path = Path(working_path) / "enhanced_yolo_dataset"
    
    # Copy files to organized structure
    for split in ['train', 'val', 'test']:
        (output_path / split / 'images').mkdir(parents=True, exist_ok=True)
        (output_path / split / 'labels').mkdir(parents=True, exist_ok=True)
        
        for img in structure[split]['images']:
            shutil.copy2(img, output_path / split / 'images' / img.name)
        for lbl in structure[split]['labels']:
            if lbl.exists():
                shutil.copy2(lbl, output_path / split / 'labels' / lbl.name)
    
    # Analyze class distribution
    class_ids = set()
    class_distribution = defaultdict(int)
    
    for split in ['train', 'val', 'test']:
        for f in (output_path / split / 'labels').glob('*.txt'):
            with open(f, 'r') as file:
                for line in file:
                    parts = line.strip().split()
                    if parts:
                        class_id = int(parts[0])
                        class_ids.add(class_id)
                        class_distribution[class_id] += 1
    
    n_classes = max(class_ids) + 1 if class_ids else 0
    class_names = [f'class_{i}' for i in range(n_classes)]
    
    print(f"Detected {n_classes} classes with distribution: {dict(class_distribution)}")
    
    return output_path, class_names

# ---------------------------------------------------------------
# MAIN EXECUTION WITH IMPROVEMENTS
# ---------------------------------------------------------------
if __name__ == "__main__":
    DATA_PATH = "/kaggle/input/yolov11dat"
    WORK_PATH = "/kaggle/working"
    
    # Enhanced hyperparameters
    EPOCHS = 150          # Increased epochs
    BATCH_SIZE = 8        # Smaller batch size for better gradients
    IMG_SIZE = 800        # Larger image size for better detection
    PATIENCE = 25         # More patience for convergence
    
    print(f"Using device: {'cuda' if torch.cuda.is_available() else 'cpu'}")

    # Enhanced dataset setup
    dataset_path, class_names = enhanced_setup_yolo_dataset(DATA_PATH, WORK_PATH, stratify=True)
    if dataset_path is None:
        sys.exit(1)

    yaml_path = create_yaml_config(dataset_path, class_names)

    # Enhanced model ensemble with more diverse models
    model_configs = [
        {'model_name': 'yolo11x.pt'},     # Largest YOLOv11 model
        {'model_name': 'yolo11l.pt'},     # Large YOLOv11 model  
        {'model_name': 'yolov8x.pt'},     # Largest YOLOv8 model
        {'model_name': 'yolov9e.pt'},     # YOLOv9 extended model
        {'model_name': 'yolov10x.pt'},    # YOLOv10 extra large
    ]
    
    # Optimized ensemble weights (sum to 1.0)
    ensemble_weights = [0.25, 0.2, 0.2, 0.2, 0.15]
    
    # TTA configuration
    tta_config = {
        'enable': True,
        'scales': [0.8, 0.9, 1.0, 1.1, 1.2],
        'flips': [False, True],
        'rotations': [0]  # Rotations can be expensive, disable if needed
    }
    
    # Initialize enhanced ensemble
    ensemble = EnhancedYOLOEnsemble(
        model_configs, 
        weights=ensemble_weights,
        tta_config=tta_config
    )
    
    # Train with enhanced parameters
    ensemble.train_models(
        str(yaml_path), 
        epochs=EPOCHS, 
        imgsz=IMG_SIZE, 
        batch_size=BATCH_SIZE,
        patience=PATIENCE
    )

    print("\n✅ Enhanced training complete. Ready for evaluation with TTA.")

# Usage for evaluation (update your evaluation code):
# When calling ensemble_predict, use lower confidence thresholds:
# preds = ensemble.ensemble_predict(str(img_path), conf_threshold=0.1, iou_threshold=0.4)

Collecting ultralytics
  Downloading ultralytics-8.3.160-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cusolver_cu12-11.6

  check_for_updates()


Using device: cuda
Detected 1764 images and 2081 labels.
Stratified splitting failed (The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.), using random split
Detected 3 classes with distribution: {2: 1603, 0: 1053, 1: 1}


NameError: name 'create_yaml_config' is not defined