In [None]:
!pip install -U ultralytics

In [None]:
"""
SAM 2 Auto-Annotation for Black Sigatoka Disease Detection
Strategy: Color-based detection + SAM 2 refinement
Works specifically for banana leaf disease images
"""

import os
import cv2
import numpy as np
from pathlib import Path
import json
import shutil
from ultralytics import SAM

In [None]:
# """
# SAM 2 Auto-Annotation for Black Sigatoka Disease Detection
# Strategy: Color-based detection + SAM 2 refinement
# Works specifically for banana leaf disease images
# """

# import os
# import cv2
# import numpy as np
# from pathlib import Path
# import json
# import shutil
# from ultralytics import SAM

class BlackSigatokaAnnotator:
    def __init__(self, sam_model="sam2_b.pt"):
        """Initialize SAM 2 model"""
        print(f"Loading SAM 2 model: {sam_model}")
        self.sam = SAM(sam_model)
        print("‚úì Model loaded successfully!")
        
        self.classes = {
            0: "whole_leaf",
            1: "lesion_discoloration"
        }
    
    def detect_leaf_and_lesions(self, image):
        """
        Detect leaf and lesion regions using color analysis
        Returns bounding boxes for SAM prompts
        """
        # Make sure image is valid
        if image is None:
            print("  ‚úó Invalid image")
            return []
            
        h, w = image.shape[:2]
        
        try:
            # Convert to HSV color space
            hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
            
            # === STEP 1: Detect whole leaf (green regions) ===
            # Fix: Ensure lowerb and upperb are proper numpy arrays
            lower_green = np.array([25, 20, 20], dtype=np.uint8)
            upper_green = np.array([95, 255, 255], dtype=np.uint8)
            leaf_mask = cv2.inRange(hsv, lower_green, upper_green)
            
            # Clean up leaf mask
            kernel_large = np.ones((15, 15), np.uint8)
            leaf_mask = cv2.morphologyEx(leaf_mask, cv2.MORPH_CLOSE, kernel_large)
            leaf_mask = cv2.morphologyEx(leaf_mask, cv2.MORPH_OPEN, kernel_large)
            
            # Find main leaf contour
            contours, _ = cv2.findContours(leaf_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            detections = []
            
            if contours:
                # Get largest contour (main leaf)
                largest_contour = max(contours, key=cv2.contourArea)
                area = cv2.contourArea(largest_contour)
                
                # Only process if leaf is significant portion of image
                if area > (h * w * 0.1):  # At least 10% of image
                    x, y, w_box, h_box = cv2.boundingRect(largest_contour)
                    
                    # Expand bbox slightly for better SAM results
                    padding = 20
                    x1 = max(0, x - padding)
                    y1 = max(0, y - padding)
                    x2 = min(w, x + w_box + padding)
                    y2 = min(h, y + h_box + padding)
                    
                    detections.append({
                        'bbox': [x1, y1, x2, y2],
                        'class': 0,  # whole_leaf
                        'type': 'leaf'
                    })
                    
                    # === STEP 2: Detect lesions within leaf area ===
                    # Create mask for leaf area only
                    leaf_region_mask = np.zeros_like(leaf_mask)
                    cv2.drawContours(leaf_region_mask, [largest_contour], -1, 255, -1)
                    
                    # Detect lesions: dark brown/reddish-brown spots (disease)
                    # Method 1: Brown/reddish-brown in HSV (actual disease color)
                    # More specific ranges for disease lesions
                    lower_brown1 = np.array([0, 30, 0], dtype=np.uint8)      # Dark brown/black lesions
                    upper_brown1 = np.array([25, 255, 80], dtype=np.uint8)  # Brown lesions (lower value = darker)
                    
                    # Reddish-brown lesions
                    lower_brown2 = np.array([160, 30, 0], dtype=np.uint8)
                    upper_brown2 = np.array([180, 255, 80], dtype=np.uint8)
                    
                    brown_mask1 = cv2.inRange(hsv, lower_brown1, upper_brown1)
                    brown_mask2 = cv2.inRange(hsv, lower_brown2, upper_brown2)
                    brown_mask = cv2.bitwise_or(brown_mask1, brown_mask2)
                    brown_mask = cv2.bitwise_and(brown_mask, brown_mask, mask=leaf_region_mask)
                    
                    # Method 2: Dark areas in LAB (significantly darker than healthy leaf)
                    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
                    l_channel = lab[:, :, 0]
                    masked_l = cv2.bitwise_and(l_channel, l_channel, mask=leaf_region_mask)
                    
                    # Get mean and std brightness of healthy leaf
                    mean_brightness = cv2.mean(l_channel, mask=leaf_region_mask)[0]
                    std_brightness = np.std(masked_l[masked_l > 0])
                    
                    # Only detect areas SIGNIFICANTLY darker (disease lesions are much darker)
                    dark_threshold = mean_brightness - max(20, std_brightness * 1.5)
                    dark_mask = np.where((masked_l < dark_threshold) & (masked_l > 0), 255, 0).astype(np.uint8)
                    dark_mask = cv2.bitwise_and(dark_mask, dark_mask, mask=leaf_region_mask)
                    
                    # Method 3: Yellow/light areas (for early stage lesions only)
                    lower_yellow = np.array([15, 40, 40], dtype=np.uint8)  # More saturated yellow
                    upper_yellow = np.array([35, 255, 255], dtype=np.uint8)
                    yellow_mask = cv2.inRange(hsv, lower_yellow, upper_yellow)
                    yellow_mask = cv2.bitwise_and(yellow_mask, yellow_mask, mask=leaf_region_mask)
                    
                    # Combine all methods
                    lesion_mask = cv2.bitwise_or(brown_mask, dark_mask)
                    lesion_mask = cv2.bitwise_or(lesion_mask, yellow_mask)
                    
                    # Clean up lesion mask (remove noise)
                    kernel_small = np.ones((7, 7), np.uint8)  # Larger kernel to remove small noise
                    lesion_mask = cv2.morphologyEx(lesion_mask, cv2.MORPH_OPEN, kernel_small)
                    lesion_mask = cv2.morphologyEx(lesion_mask, cv2.MORPH_CLOSE, kernel_small)
                    
                    # Find lesion contours
                    # Find lesion contours
                    lesion_contours, _ = cv2.findContours(lesion_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                    
                    for contour in lesion_contours:
                        area = cv2.contourArea(contour)
                        
                        # Filter 1: Minimum size (small lesions allowed)
                        min_area_pixels = 200
                        min_area_ratio = 0.0002
                        min_area = max(min_area_pixels, h * w * min_area_ratio)
                        
                        if area < min_area:
                            continue
                        
                        # Get bounding box for shape analysis
                        x, y, w_box, h_box = cv2.boundingRect(contour)
                        
                        # Filter 2: Filter only extremely elongated shapes (veins)
                        aspect_ratio = max(w_box, h_box) / max(min(w_box, h_box), 1)
                        if aspect_ratio > 12:
                            continue
                        
                        # Filter 3: Basic circularity (very lenient)
                        perimeter = cv2.arcLength(contour, True)
                        if perimeter > 0:
                            circularity = 4 * np.pi * area / (perimeter * perimeter)
                            if circularity < 0.12:  # Very lenient
                                continue
                        
                        # Filter 4: Color verification (optional - can be removed if too strict)
                        try:
                            mask_roi = np.zeros((h, w), dtype=np.uint8)
                            cv2.drawContours(mask_roi, [contour], -1, 255, -1)
                            
                            mean_color = cv2.mean(image, mask=mask_roi)[:3]
                            mean_hsv = cv2.cvtColor(np.uint8([[mean_color]]), cv2.COLOR_BGR2HSV)[0][0]
                            
                            hue = mean_hsv[0]
                            saturation = mean_hsv[1]
                            value = mean_hsv[2]
                            
                            # More lenient color check
                            is_brown = (hue < 30 or hue > 160) and saturation > 20 and value < 120
                            is_dark = value < (mean_brightness * 0.7)
                            is_yellowish = (hue >= 15 and hue <= 40) and value > 50
                            is_green = (hue >= 35 and hue <= 85) and saturation > 40
                            
                            # Skip only if clearly green (not brown/dark/yellow)
                            if is_green and not (is_brown or is_dark or is_yellowish):
                                continue
                        except:
                            # If color check fails, continue anyway (don't filter out)
                            pass
                        
                        # Passed all filters - this is likely a lesion
                        padding = 10
                        x1 = max(0, x - padding)
                        y1 = max(0, y - padding)
                        x2 = min(w, x + w_box + padding)
                        y2 = min(h, y + h_box + padding)
                        
                        detections.append({
                            'bbox': [x1, y1, x2, y2],
                            'class': 1,  # lesion
                            'type': 'lesion'
                        })
            
            return detections
            
        except Exception as e:
            print(f"  ‚úó Error in color detection: {e}")
            return []
    
    def segment_with_sam(self, image_path, detections):
        """Use SAM to create precise masks from bboxes"""
        annotations = []
        
        img = cv2.imread(str(image_path))
        if img is None:
            print(f"  ‚úó Cannot read image for SAM: {image_path}")
            return annotations
            
        h, w = img.shape[:2]
        
        for det in detections:
            bbox = det['bbox']
            class_id = det['class']
            
            try:
                # Run SAM with bbox prompt
                results = self.sam(str(image_path), bboxes=[bbox], verbose=False)
                
                if results and len(results) > 0 and results[0].masks is not None:
                    masks_data = results[0].masks.data
                    
                    if len(masks_data) > 0:
                        mask = masks_data[0].cpu().numpy()
                        
                        # Convert to polygon
                        mask_uint8 = (mask * 255).astype(np.uint8)
                        contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                        
                        if contours:
                            largest = max(contours, key=cv2.contourArea)
                            
                            # Simplify polygon
                            epsilon = 0.002 * cv2.arcLength(largest, True)
                            polygon = cv2.approxPolyDP(largest, epsilon, True).reshape(-1, 2)
                            
                            if len(polygon) >= 3:
                                annotations.append({
                                    'class': class_id,
                                    'polygon': polygon,
                                    'mask': mask,
                                    'type': det['type']
                                })
                                continue
                
                # Fallback: use bbox as polygon
                print(f"  ‚ö† SAM failed for {det['type']}, using bbox")
                x1, y1, x2, y2 = bbox
                polygon = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]])
                mask = np.zeros((h, w), dtype=np.float32)
                cv2.rectangle(mask, (x1, y1), (x2, y2), 1.0, -1)
                
                annotations.append({
                    'class': class_id,
                    'polygon': polygon,
                    'mask': mask,
                    'type': det['type']
                })
                
            except Exception as e:
                print(f"  ‚úó Error on {det['type']}: {e}")
        
        return annotations
    
    def polygon_to_yolo(self, polygon, img_width, img_height):
        """Convert polygon to YOLO format"""
        normalized = []
        for point in polygon:
            x_norm = max(0, min(1, point[0] / img_width))
            y_norm = max(0, min(1, point[1] / img_height))
            normalized.extend([x_norm, y_norm])
        return ' '.join(f"{x:.6f}" for x in normalized)
    
    def annotate_image(self, image_path, output_dir):
        """Process single image"""
        # Read image
        img = cv2.imread(str(image_path))
        if img is None:
            print(f"‚úó Cannot read image: {image_path}")
            return None
        
        h, w = img.shape[:2]
        
        # Step 1: Color-based detection
        detections = self.detect_leaf_and_lesions(img)
        
        if not detections:
            print(f"  ‚ö† No leaf detected")
            return None
        
        # Step 2: SAM refinement
        annotations = self.segment_with_sam(image_path, detections)
        
        if not annotations:
            print(f"  ‚ö† No valid annotations")
            return None
        
        # Step 3: Save YOLO format
        labels_dir = Path(output_dir) / 'labels'
        labels_dir.mkdir(parents=True, exist_ok=True)
        
        label_file = labels_dir / f"{Path(image_path).stem}.txt"
        
        with open(label_file, 'w') as f:
            for ann in annotations:
                yolo_str = self.polygon_to_yolo(ann['polygon'], w, h)
                f.write(f"{ann['class']} {yolo_str}\n")
        
        # Step 4: Visualization
        vis_dir = Path(output_dir) / 'visualizations'
        vis_dir.mkdir(parents=True, exist_ok=True)
        
        vis_img = img.copy()
        colors = {0: (0, 255, 0), 1: (0, 255, 255)}
        
        for ann in annotations:
            color = colors[ann['class']]
            mask_uint8 = (ann['mask'] * 255).astype(np.uint8)
            contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            # Draw with transparency
            overlay = vis_img.copy()
            cv2.drawContours(overlay, contours, -1, color, -1)
            vis_img = cv2.addWeighted(vis_img, 0.7, overlay, 0.3, 0)
            cv2.drawContours(vis_img, contours, -1, color, 2)
            
            # Add label
            if contours:
                M = cv2.moments(contours[0])
                if M["m00"] != 0:
                    cx = int(M["m10"] / M["m00"])
                    cy = int(M["m01"] / M["m00"])
                    label = self.classes[ann['class']]
                    cv2.putText(vis_img, label, (cx-40, cy), 
                              cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
        
        vis_file = vis_dir / f"{Path(image_path).stem}_annotated.jpg"
        cv2.imwrite(str(vis_file), vis_img)
        
        return annotations
    
    def process_dataset(self, image_dir, output_dir):
        """Process entire dataset"""
        image_dir = Path(image_dir)
        output_dir = Path(output_dir)
        
        # Create output structure
        images_dir = output_dir / 'images'
        images_dir.mkdir(parents=True, exist_ok=True)
        
        # Get all images
        image_files = list(image_dir.glob('*.jpg')) + \
                     list(image_dir.glob('*.jpeg')) + \
                     list(image_dir.glob('*.png')) + \
                     list(image_dir.glob('*.JPG')) + \
                     list(image_dir.glob('*.JPEG')) + \
                     list(image_dir.glob('*.PNG'))
        
        if not image_files:
            print(f"‚úó No images found in {image_dir}")
            return []
        
        print(f"\n{'='*60}")
        print(f"Processing {len(image_files)} images")
        print(f"{'='*60}\n")
        
        results = []
        
        for i, img_path in enumerate(image_files, 1):
            print(f"[{i}/{len(image_files)}] {img_path.name}...")
            
            # Copy image
            try:
                shutil.copy(img_path, images_dir / img_path.name)
            except Exception as e:
                print(f"  ‚úó Error copying image: {e}")
                continue
            
            # Annotate
            try:
                annotations = self.annotate_image(img_path, output_dir)
                
                if annotations:
                    leaf_count = sum(1 for a in annotations if a['class'] == 0)
                    lesion_count = sum(1 for a in annotations if a['class'] == 1)
                    print(f"  ‚úì Leaf: {leaf_count}, Lesions: {lesion_count}")
                    
                    results.append({
                        'image': img_path.name,
                        'status': 'success',
                        'leaf_count': leaf_count,
                        'lesion_count': lesion_count
                    })
                else:
                    print(f"  ‚ö† No annotations generated")
                    results.append({
                        'image': img_path.name,
                        'status': 'no_detection'
                    })
            except Exception as e:
                print(f"  ‚úó Error: {e}")
                results.append({
                    'image': img_path.name,
                    'status': 'error',
                    'error': str(e)
                })
        
        # Summary
        print(f"\n{'='*60}")
        print("SUMMARY")
        print(f"{'='*60}")
        success = sum(1 for r in results if r['status'] == 'success')
        print(f"‚úì Successfully annotated: {success}/{len(image_files)}")
        print(f"‚úó Failed: {len(image_files) - success}")
        
        total_lesions = sum(r.get('lesion_count', 0) for r in results)
        print(f"\nüìä Total lesions detected: {total_lesions}")
        
        # Save summary
        with open(output_dir / 'summary.json', 'w') as f:
            json.dump(results, f, indent=2)
        
        # Create dataset YAML
        yaml_content = f"""# Black Sigatoka Dataset
path: {output_dir}
train: images
val: images

names:
  0: whole_leaf
  1: lesion_discoloration

nc: 2
"""
        with open(output_dir / 'dataset.yaml', 'w') as f:
            f.write(yaml_content)
        
        print(f"\n‚úì Dataset config: {output_dir / 'dataset.yaml'}")
        
        return results


# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    # Configuration
    IMAGE_DIR = "/kaggle/input/early-stage/Sigatoka pics/Stage1"  # CHANGE THIS!
    OUTPUT_DIR = "/kaggle/working/black_sigatoka_dataset"
    
    print("="*60)
    print("BLACK SIGATOKA AUTO-ANNOTATION")
    print("="*60)
    print("Strategy: Color Detection + SAM 2 Refinement")
    print("="*60)
    
    # Initialize
    annotator = BlackSigatokaAnnotator(sam_model="sam2_b.pt")
    
    # Process
    results = annotator.process_dataset(IMAGE_DIR, OUTPUT_DIR)
    
    print("\n" + "="*60)
    print("‚úì COMPLETE!")
    print("="*60)
    print(f"üìÅ Images:         {OUTPUT_DIR}/images/")
    print(f"üè∑Ô∏è  Labels:         {OUTPUT_DIR}/labels/")
    print(f"üé® Visualizations: {OUTPUT_DIR}/visualizations/")
    print(f"üìù Config:         {OUTPUT_DIR}/dataset.yaml")
    print("="*60)

In [None]:
# Display annotated images (No matplotlib - uses IPython only)
from IPython.display import display, Image, HTML
from pathlib import Path

output_dir = Path("/kaggle/working/black_sigatoka_dataset")
vis_dir = output_dir / 'visualizations'

if vis_dir.exists():
    vis_files = sorted(list(vis_dir.glob('*_annotated.jpg')))
    
    if vis_files:
        print(f"\n{'='*60}")
        print(f"üñºÔ∏è  ANNOTATED IMAGES ({len(vis_files)}):")
        print(f"{'='*60}\n")
        
        # Display images in grid using HTML
        images_html = []
        for vis_file in vis_files:
            # Read and encode image
            import base64
            with open(vis_file, 'rb') as f:
                img_data = base64.b64encode(f.read()).decode()
            
            images_html.append(f"""
            <div style='display: inline-block; margin: 10px; text-align: center; vertical-align: top;'>
                <p style='font-size: 11px; margin: 5px 0; font-weight: bold;'>{vis_file.stem.replace('_annotated', '')}</p>
                <img src='data:image/jpeg;base64,{img_data}' 
                     style='max-width: 400px; max-height: 400px; border: 2px solid #4CAF50; border-radius: 5px; box-shadow: 0 2px 5px rgba(0,0,0,0.2);'/>
            </div>
            """)
        
        html_content = f"""
        <div style='display: flex; flex-wrap: wrap; justify-content: center; gap: 15px; padding: 20px;'>
            {''.join(images_html)}
        </div>
        """
        
        display(HTML(html_content))
        
        # Print list
        print(f"\nüìã Image List ({len(vis_files)}):")
        for i, vf in enumerate(vis_files, 1):
            print(f"  {i:2d}. {vf.name}")
    else:
        print("‚ö† No visualization files found")
else:
    print("‚ö† Visualizations directory not found")