In [1]:
!pip install --upgrade pip
!pip install opencv-python pillow "numpy>=1.24.0,<2.0.0" scipy>=1.10.0 matplotlib albumentations
!pip install git+https://github.com/facebookresearch/segment-anything.git
!pip install torch torchvision

Collecting pip
  Downloading pip-25.3-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.3-py3-none-any.whl (1.8 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.8/1.8 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
dopamine-rl 4.1.2 requires gymnasium>=1.0.0, but you have gymnasium 0.29.0 which is incompatible.[0m[31m
[0mCollecting git+https://github.com/facebookresearch/segment-anything.git
  Cloning https://github.com/facebookresearch/segment-anything.git to /tmp/pip-req-buil

In [7]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import shutil
from PIL import Image
import albumentations as A
import torch

# SAM imports
try:
    from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
    SAM_AVAILABLE = True
    print("‚úÖ SAM library loaded successfully!")
except ImportError:
    SAM_AVAILABLE = False
    print("‚ö†Ô∏è  SAM not available, will use improved color-based detection")

print("‚úÖ All libraries imported successfully!")

# Download SAM model weights (vit_h is the largest, most accurate model)
# Alternative: vit_l (large) or vit_b (base) - faster but less accurate
SAM_MODEL_TYPE = "vit_h"  # Options: "vit_h", "vit_l", "vit_b"
SAM_CHECKPOINT_PATH = "/kaggle/working/sam_vit_h_4b8939.pth"

# Download model if not exists
if not os.path.exists(SAM_CHECKPOINT_PATH):
    print("üì• Downloading SAM model weights...")
    import urllib.request
    urllib.request.urlretrieve(
        "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth",
        SAM_CHECKPOINT_PATH
    )
    print("‚úÖ Model downloaded!")

# Initialize SAM model
sam_predictor = None
if SAM_AVAILABLE:
    try:
        print("üîÑ Loading SAM model...")
        device = "cuda" if torch.cuda.is_available() else "cpu"
        sam = sam_model_registry[SAM_MODEL_TYPE](checkpoint=SAM_CHECKPOINT_PATH)
        sam.to(device=device)
        sam_predictor = SamPredictor(sam)
        print(f"‚úÖ SAM model loaded on {device}!")
    except Exception as e:
        print(f"‚ö†Ô∏è  Error loading SAM: {e}")
        print("   Will use improved color-based detection instead")
        SAM_AVAILABLE = False

‚úÖ SAM library loaded successfully!
‚úÖ All libraries imported successfully!
üì• Downloading SAM model weights...
‚úÖ Model downloaded!
üîÑ Loading SAM model...
‚úÖ SAM model loaded on cuda!


In [9]:
# ============================================================================
# CELL 3: Setup Directories (KAGGLE VERSION)
# ============================================================================
# I-setup ang folder structure para sa Kaggle
# ============================================================================

# Kaggle Paths Configuration
KAGGLE_INPUT_DIR = "/kaggle/input"
KAGGLE_WORKING_DIR = "/kaggle/working"
DATASET_NAME = "early-stage"
SIGATOKA_FOLDER = "Sigatoka pics"

# Input: Read-only from Kaggle input
RAW_DATA_DIR = f"{KAGGLE_INPUT_DIR}/{DATASET_NAME}/{SIGATOKA_FOLDER}"

# Output: Writeable sa Kaggle working directory
BASE_DIR = f"{KAGGLE_WORKING_DIR}/banana_sigatoka_dataset"
ANNOTATED_DIR = f"{BASE_DIR}/annotated"
AUGMENTED_DIR = f"{BASE_DIR}/augmented"
VISUALIZATIONS_DIR = f"{BASE_DIR}/visualizations"

# Create output directories
directories = [
    f"{ANNOTATED_DIR}/images",
    f"{ANNOTATED_DIR}/labels",
    f"{AUGMENTED_DIR}/images",
    f"{AUGMENTED_DIR}/labels",
    VISUALIZATIONS_DIR
]

for directory in directories:
    os.makedirs(directory, exist_ok=True)

print("‚úÖ Directories created successfully!")
print(f"\nüìÅ Input: {RAW_DATA_DIR}")
print(f"üìÅ Output: {BASE_DIR}")

# Check if input directory exists and count images
if os.path.exists(RAW_DATA_DIR):
    print(f"\n‚úÖ Dataset found!")
    
    # Count images in stage folders
    stage_folders = ['Stage1', 'Stage2', 'Stage3']
    stage_counts = {}
    total_images = 0
    
    for stage in stage_folders:
        stage_path = os.path.join(RAW_DATA_DIR, stage)
        if os.path.exists(stage_path):
            images = []
            for ext in ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']:
                images.extend(list(Path(stage_path).glob(ext)))
            if len(images) > 0:
                stage_counts[stage] = len(images)
                total_images += len(images)
    
    # Display summary
    print(f"\nüìä Images found:")
    for stage, count in stage_counts.items():
        print(f"   {stage}: {count} images")
    print(f"\nüìà Total: {total_images} images")
    
else:
    print(f"\n‚ö†Ô∏è  Dataset not found: {RAW_DATA_DIR}")
    print(f"üí° Make sure you've added the dataset '{DATASET_NAME}' to your Kaggle notebook")

‚úÖ Directories created successfully!

üìÅ Input: /kaggle/input/early-stage/Sigatoka pics
üìÅ Output: /kaggle/working/banana_sigatoka_dataset

‚úÖ Dataset found!

üìä Images found:
   Stage1: 13 images
   Stage2: 13 images
   Stage3: 13 images

üìà Total: 39 images


In [10]:
# ============================================================================
# CELL 4: Auto-Annotation Function (with SAM for accurate detection)
# ============================================================================
# Automatic detection ng spots at whole leaf using SAM
# ============================================================================

def detect_spots_with_sam(image, sam_predictor, min_spot_area, w, h):
    """
    Use SAM to detect spots more accurately
    """
    if sam_predictor is None:
        return []
    
    # Convert BGR to RGB for SAM
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Set image for SAM
    sam_predictor.set_image(image_rgb)
    
    # First, use color-based detection to get candidate points
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l_channel, _, _ = cv2.split(lab)
    
    # Detect potential spot areas
    _, dark_spots = cv2.threshold(l_channel, 80, 255, cv2.THRESH_BINARY_INV)
    lower_brown = np.array([5, 50, 50])
    upper_brown = np.array([30, 255, 220])
    brown_spots = cv2.inRange(hsv, lower_brown, upper_brown)
    spot_mask = cv2.bitwise_or(dark_spots, brown_spots)
    
    # Clean up
    kernel = np.ones((5, 5), np.uint8)
    spot_mask = cv2.morphologyEx(spot_mask, cv2.MORPH_OPEN, kernel)
    spot_mask = cv2.morphologyEx(spot_mask, cv2.MORPH_CLOSE, kernel)
    
    # Find contours to get candidate points
    contours, _ = cv2.findContours(spot_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    spots = []
    for contour in contours:
        area = cv2.contourArea(contour)
        if min_spot_area < area < (w * h * 0.15):
            # Get center point of contour
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                
                # Use SAM to segment at this point
                try:
                    masks, scores, logits = sam_predictor.predict(
                        point_coords=np.array([[cx, cy]]),
                        point_labels=np.array([1]),  # 1 = foreground
                        multimask_output=False,
                    )
                    
                    # Get the best mask
                    if len(masks) > 0 and scores[0] > 0.5:  # Confidence threshold
                        mask = masks[0]
                        
                        # Convert mask to bounding box
                        y_indices, x_indices = np.where(mask)
                        if len(x_indices) > 0 and len(y_indices) > 0:
                            x_min, x_max = int(x_indices.min()), int(x_indices.max())
                            y_min, y_max = int(y_indices.min()), int(y_indices.max())
                            box_w = x_max - x_min
                            box_h = y_max - y_min
                            
                            # Filter by size
                            if min_spot_area < (box_w * box_h) < (w * h * 0.15):
                                spots.append((x_min, y_min, box_w, box_h))
                except:
                    continue
    
    return spots


def detect_spots_improved(image, hsv, lab, min_spot_area, w, h):
    """
    Improved color-based spot detection (fallback if SAM not available)
    """
    l_channel, a_channel, b_channel = cv2.split(lab)
    
    # Method 1: Detect dark spots
    _, dark_spots = cv2.threshold(l_channel, 80, 255, cv2.THRESH_BINARY_INV)
    
    # Method 2: Detect brown/yellow discoloration
    lower_brown = np.array([5, 50, 50])
    upper_brown = np.array([30, 255, 220])
    brown_spots = cv2.inRange(hsv, lower_brown, upper_brown)
    
    # Method 3: Detect red/brown spots
    lower_red = np.array([0, 50, 50])
    upper_red = np.array([10, 255, 255])
    red_spots = cv2.inRange(hsv, lower_red, upper_red)
    
    # Combine all methods
    spot_mask = cv2.bitwise_or(dark_spots, brown_spots)
    spot_mask = cv2.bitwise_or(spot_mask, red_spots)
    
    # Better noise removal
    kernel_small = np.ones((5, 5), np.uint8)
    spot_mask = cv2.morphologyEx(spot_mask, cv2.MORPH_OPEN, kernel_small)
    spot_mask = cv2.morphologyEx(spot_mask, cv2.MORPH_CLOSE, kernel_small)
    spot_mask = cv2.morphologyEx(spot_mask, cv2.MORPH_OPEN, np.ones((7, 7), np.uint8))
    
    # Find spot contours
    spot_contours, _ = cv2.findContours(spot_mask, cv2.RETR_EXTERNAL, 
                                         cv2.CHAIN_APPROX_SIMPLE)
    
    spots = []
    for contour in spot_contours:
        area = cv2.contourArea(contour)
        if min_spot_area < area < (w * h * 0.15):
            x, y, box_w, box_h = cv2.boundingRect(contour)
            aspect_ratio = box_w / box_h if box_h > 0 else 0
            if 0.3 < aspect_ratio < 3.0:
                spots.append((x, y, box_w, box_h))
    
    return spots


def auto_annotate_banana_leaf(image_path, output_image_dir, output_label_dir, 
                               visualize=True, min_spot_area=50, output_filename=None,
                               stage_class_id=1, use_sam=True):
    """
    Auto-detect banana leaf spots and create YOLO annotations using SAM
    
    Parameters:
    - image_path: path ng image
    - output_image_dir: saan isasave yung images
    - output_label_dir: saan isasave yung annotations
    - visualize: True kung gusto mo makita yung results
    - min_spot_area: minimum size ng spot
    - output_filename: custom filename
    - stage_class_id: class ID para sa stage (1=stage1, 2=stage2, 3=stage3)
    - use_sam: True kung gusto mo gamitin ang SAM
    
    Classes:
    0 = whole_leaf (buong dahon)
    1 = stage1
    2 = stage2
    3 = stage3
    """
    
    # Read image
    image = cv2.imread(image_path)
    if image is None:
        print(f"‚ùå Cannot read image: {image_path}")
        return None
    
    original = image.copy()
    h, w = image.shape[:2]
    
    # Convert to different color spaces
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    
    annotations = []
    
    # ========================================
    # STEP 1: Detect WHOLE LEAF (buong dahon)
    # ========================================
    
    # Improved green detection
    lower_green = np.array([30, 30, 30])
    upper_green = np.array([90, 255, 255])
    leaf_mask = cv2.inRange(hsv, lower_green, upper_green)
    
    # Better morphological operations
    kernel_large = np.ones((20, 20), np.uint8)
    leaf_mask = cv2.morphologyEx(leaf_mask, cv2.MORPH_CLOSE, kernel_large)
    leaf_mask = cv2.morphologyEx(leaf_mask, cv2.MORPH_OPEN, kernel_large)
    leaf_mask = cv2.morphologyEx(leaf_mask, cv2.MORPH_CLOSE, np.ones((30, 30), np.uint8))
    
    # Find contours for whole leaf
    contours, _ = cv2.findContours(leaf_mask, cv2.RETR_EXTERNAL, 
                                     cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        x, y, box_w, box_h = cv2.boundingRect(largest_contour)
        
        # Convert to YOLO format
        x_center = (x + box_w / 2) / w
        y_center = (y + box_h / 2) / h
        norm_width = box_w / w
        norm_height = box_h / h
        
        annotations.append(f"0 {x_center:.6f} {y_center:.6f} {norm_width:.6f} {norm_height:.6f}")
        
        if visualize:
            cv2.rectangle(original, (x, y), (x + box_w, y + box_h), (0, 255, 0), 5)
            cv2.putText(original, "WHOLE LEAF", (x, y - 10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 3)
    
    # ========================================
    # STEP 2: Detect SPOTS using SAM or improved method
    # ========================================
    
    if use_sam and SAM_AVAILABLE and sam_predictor is not None:
        spots = detect_spots_with_sam(image, sam_predictor, min_spot_area, w, h)
        method = "SAM"
    else:
        spots = detect_spots_improved(image, hsv, lab, min_spot_area, w, h)
        method = "Color-based"
    
    # Filter and annotate spots
    spot_count = 0
    
    for spot in spots:
        x, y, box_w, box_h = spot
        
        # Convert to YOLO format
        x_center = (x + box_w / 2) / w
        y_center = (y + box_h / 2) / h
        norm_width = box_w / w
        norm_height = box_h / h
        
        # Add spot annotation with correct stage class ID
        annotations.append(f"{stage_class_id} {x_center:.6f} {y_center:.6f} {norm_width:.6f} {norm_height:.6f}")
        
        if visualize:
            stage_label = f"STAGE{stage_class_id}"
            cv2.rectangle(original, (x, y), (x + box_w, y + box_h), (0, 0, 255), 4)
            cv2.putText(original, stage_label, (x, y - 5), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
        
        spot_count += 1
    
    # Save files
    if output_filename:
        image_name = output_filename
    else:
        image_name = Path(image_path).stem
    
    # Save annotation file (.txt)
    label_path = os.path.join(output_label_dir, f"{image_name}.txt")
    with open(label_path, 'w') as f:
        f.write('\n'.join(annotations))
    
    # Copy original image to output
    output_img_path = os.path.join(output_image_dir, f"{image_name}.jpg")
    shutil.copy(image_path, output_img_path)
    
    # Save visualization
    if visualize:
        vis_path = os.path.join(VISUALIZATIONS_DIR, f"{image_name}_annotated.jpg")
        cv2.imwrite(vis_path, original)
    
    stage_name = f"stage{stage_class_id}"
    print(f"‚úÖ {image_name}: {len(annotations)} total boxes (1 leaf + {spot_count} {stage_name} spots) [{method}]")
    
    return annotations, original

In [11]:
# ============================================================================
# CELL 5: Process All Images (Batch Auto-Annotation) - IMPROVED SAM
# ============================================================================

def annotate_all_images(input_folder, output_image_dir, output_label_dir, 
                        min_spot_area=100, preserve_stage_info=True, use_sam=True):
    """
    Process all images using improved SAM for accurate annotation
    """
    
    # Safely check if SAM is available
    try:
        sam_available = SAM_AVAILABLE if 'SAM_AVAILABLE' in globals() else False
        sam_gen = sam_mask_generator if 'sam_mask_generator' in globals() and sam_mask_generator is not None else None
    except:
        sam_available = False
        sam_gen = None
    
    supported_formats = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']
    image_files = []
    
    for ext in supported_formats:
        image_files.extend(Path(input_folder).rglob(f"*{ext}"))
    
    if not image_files:
        print(f"‚ùå No images found in {input_folder}")
        return
    
    print(f"üì∏ Found {len(image_files)} images")
    
    # Determine detection method
    if use_sam and sam_available and sam_gen is not None:
        detection_method = "SAM (Improved)"
    else:
        detection_method = "Color-based"
        if use_sam:
            print("‚ö†Ô∏è  SAM requested but not available. Using color-based detection.")
    
    print(f"üîß Using {detection_method} detection")
    print("üöÄ Starting auto-annotation...\n")
    
    success_count = 0
    stage_counts = {'Stage1': 0, 'Stage2': 0, 'Stage3': 0, 'other': 0}
    
    for idx, image_path in enumerate(image_files, 1):
        # Extract stage info
        path_parts = image_path.parts
        stage_name = None
        stage_class_id = 1
        
        for part in path_parts:
            if 'stage1' in part.lower() or part == 'Stage1':
                stage_name = 'Stage1'
                stage_class_id = 1
                break
            elif 'stage2' in part.lower() or part == 'Stage2':
                stage_name = 'Stage2'
                stage_class_id = 2
                break
            elif 'stage3' in part.lower() or part == 'Stage3':
                stage_name = 'Stage3'
                stage_class_id = 3
                break
        
        original_name = image_path.stem
        if preserve_stage_info and stage_name:
            new_filename = f"{stage_name.lower()}_{original_name}"
            stage_counts[stage_name] = stage_counts.get(stage_name, 0) + 1
        else:
            new_filename = original_name
            stage_counts['other'] += 1
        
        print(f"[{idx}/{len(image_files)}] Processing: {image_path.name} ‚Üí {stage_name or 'unknown'}")
        
        try:
            result = auto_annotate_banana_leaf(
                str(image_path), 
                output_image_dir, 
                output_label_dir,
                visualize=True,
                min_spot_area=min_spot_area,
                output_filename=new_filename,
                stage_class_id=stage_class_id,
                use_sam=use_sam
            )
            if result:
                success_count += 1
        except Exception as e:
            print(f"‚ùå Error processing {image_path.name}: {e}")
    
    print(f"\n{'='*70}")
    print(f"‚úÖ Annotation complete!")
    print(f"{'='*70}")
    print(f"üìä Successfully annotated: {success_count}/{len(image_files)} images")
    print(f"\nüìà Images by stage:")
    for stage, count in stage_counts.items():
        if count > 0:
            print(f"   {stage}: {count} images")
    print(f"\nüìÅ Images saved to: {output_image_dir}")
    print(f"üìÅ Labels saved to: {output_label_dir}")
    print(f"üìÅ Visualizations: {VISUALIZATIONS_DIR}")


# RUN ANNOTATION - Uncomment to execute
annotate_all_images(
    input_folder=RAW_DATA_DIR,
    output_image_dir=f"{ANNOTATED_DIR}/images",
    output_label_dir=f"{ANNOTATED_DIR}/labels",
    min_spot_area=100,
    preserve_stage_info=True,
    use_sam=True  # Will auto-fallback to color-based if SAM not available
)

üì∏ Found 39 images
‚ö†Ô∏è  SAM requested but not available. Using color-based detection.
üîß Using Color-based detection
üöÄ Starting auto-annotation...

[1/39] Processing: IMG_20251122_094534.jpg ‚Üí Stage1
‚úÖ stage1_IMG_20251122_094534: 1 total boxes (1 leaf + 0 stage1 spots) [SAM]
[2/39] Processing: IMG_20251122_093458.jpg ‚Üí Stage1
‚úÖ stage1_IMG_20251122_093458: 1 total boxes (1 leaf + 0 stage1 spots) [SAM]
[3/39] Processing: IMG_20251122_090007.jpg ‚Üí Stage1
‚úÖ stage1_IMG_20251122_090007: 2 total boxes (1 leaf + 1 stage1 spots) [SAM]
[4/39] Processing: IMG_20251122_093454.jpg ‚Üí Stage1
‚úÖ stage1_IMG_20251122_093454: 1 total boxes (1 leaf + 0 stage1 spots) [SAM]
[5/39] Processing: IMG_20251122_095349.jpg ‚Üí Stage1
‚úÖ stage1_IMG_20251122_095349: 1 total boxes (1 leaf + 0 stage1 spots) [SAM]
[6/39] Processing: IMG_20251122_094843.jpg ‚Üí Stage1
‚úÖ stage1_IMG_20251122_094843: 1 total boxes (1 leaf + 0 stage1 spots) [SAM]
[7/39] Processing: IMG_20251122_094954.jpg ‚Üí Sta

In [6]:
# ============================================================================
# CELL 5: Process All Images (Batch Auto-Annotation) - KAGGLE VERSION with SAM
# ============================================================================

def annotate_all_images(input_folder, output_image_dir, output_label_dir, 
                        min_spot_area=50, preserve_stage_info=True, use_sam=True):
    """
    Process all images using SAM for accurate annotation
    """
    
    supported_formats = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']
    image_files = []
    
    for ext in supported_formats:
        image_files.extend(Path(input_folder).rglob(f"*{ext}"))
    
    if not image_files:
        print(f"‚ùå No images found in {input_folder}")
        return
    
    print(f"üì∏ Found {len(image_files)} images")
    print(f"üîß Using {'SAM' if (use_sam and SAM_AVAILABLE and sam_predictor is not None) else 'Color-based'} detection")
    print("üöÄ Starting auto-annotation...\n")
    
    success_count = 0
    stage_counts = {'Stage1': 0, 'Stage2': 0, 'Stage3': 0, 'other': 0}
    
    for idx, image_path in enumerate(image_files, 1):
        # Extract stage info from path
        path_parts = image_path.parts
        stage_name = None
        stage_class_id = 1
        
        for part in path_parts:
            if 'stage1' in part.lower() or part == 'Stage1':
                stage_name = 'Stage1'
                stage_class_id = 1
                break
            elif 'stage2' in part.lower() or part == 'Stage2':
                stage_name = 'Stage2'
                stage_class_id = 2
                break
            elif 'stage3' in part.lower() or part == 'Stage3':
                stage_name = 'Stage3'
                stage_class_id = 3
                break
        
        # Create filename with stage info
        original_name = image_path.stem
        if preserve_stage_info and stage_name:
            new_filename = f"{stage_name.lower()}_{original_name}"
            stage_counts[stage_name] = stage_counts.get(stage_name, 0) + 1
        else:
            new_filename = original_name
            stage_counts['other'] += 1
        
        print(f"[{idx}/{len(image_files)}] Processing: {image_path.name} ‚Üí {stage_name or 'unknown'}")
        
        try:
            result = auto_annotate_banana_leaf(
                str(image_path), 
                output_image_dir, 
                output_label_dir,
                visualize=True,
                min_spot_area=min_spot_area,
                output_filename=new_filename,
                stage_class_id=stage_class_id,
                use_sam=use_sam
            )
            if result:
                success_count += 1
        except Exception as e:
            print(f"‚ùå Error processing {image_path.name}: {e}")
    
    print(f"\n{'='*70}")
    print(f"‚úÖ Annotation complete!")
    print(f"{'='*70}")
    print(f"üìä Successfully annotated: {success_count}/{len(image_files)} images")
    print(f"\nüìà Images by stage:")
    for stage, count in stage_counts.items():
        if count > 0:
            print(f"   {stage}: {count} images")
    print(f"\nüìÅ Images saved to: {output_image_dir}")
    print(f"üìÅ Labels saved to: {output_label_dir}")
    print(f"üìÅ Visualizations: {VISUALIZATIONS_DIR}")


# RUN ANNOTATION - Uncomment to execute
annotate_all_images(
    input_folder=RAW_DATA_DIR,
    output_image_dir=f"{ANNOTATED_DIR}/images",
    output_label_dir=f"{ANNOTATED_DIR}/labels",
    min_spot_area=50,
    preserve_stage_info=True,
    use_sam=True  # Set to False kung gusto mo color-based lang
)

‚ùå No images found in /kaggle/working/banana_sigatoka_dataset/annotated/images
üí° Run annotation first (Cell 5)


  A.GaussNoise(                         # Add realistic noise


In [None]:
# ============================================================================
# CELL 7: Visualize Annotation Results
# ============================================================================
# Tignan kung tama yung annotations
# ============================================================================

def visualize_annotation(image_path, label_path, figsize=(15, 10)):
    """
    Display image with bounding boxes
    """
    
    image = cv2.imread(image_path)
    if image is None:
        print(f"‚ùå Cannot read image: {image_path}")
        return
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    h, w = image.shape[:2]
    
    # Read annotations
    if not os.path.exists(label_path):
        print(f"‚ö†Ô∏è  No label file: {label_path}")
        plt.figure(figsize=figsize)
        plt.imshow(image)
        plt.title("Image (No annotations)")
        plt.axis('off')
        plt.show()
        return
    
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue
                
            class_id = int(parts[0])
            x_center, y_center, width, height = [float(x) for x in parts[1:5]]
            
            # Convert to pixel coordinates
            x1 = int((x_center - width / 2) * w)
            y1 = int((y_center - height / 2) * h)
            x2 = int((x_center + width / 2) * w)
            y2 = int((y_center + height / 2) * h)
            
            # Draw
            color = (0, 255, 0) if class_id == 0 else (255, 0, 0)
            label = "WHOLE_LEAF" if class_id == 0 else "SPOT"
            thickness = 3 if class_id == 0 else 2
            
            cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
            cv2.putText(image, label, (x1, max(y1 - 10, 20)), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
    
    # Display
    plt.figure(figsize=figsize)
    plt.imshow(image)
    plt.title(f'Annotated: {Path(image_path).name}')
    plt.axis('off')
    plt.tight_layout()
    plt.show()


def show_random_samples(image_dir, label_dir, num_samples=3):
    """
    Show random annotated images
    """
    
    image_files = list(Path(image_dir).glob("*.jpg")) + list(Path(image_dir).glob("*.png"))
    
    if not image_files:
        print(f"‚ùå No images in {image_dir}")
        return
    
    # Random sample
    import random
    samples = random.sample(image_files, min(num_samples, len(image_files)))
    
    print(f"üì∏ Showing {len(samples)} random samples:\n")
    
    for img_path in samples:
        label_path = Path(label_dir) / f"{img_path.stem}.txt"
        visualize_annotation(str(img_path), str(label_path))


VISUALIZE - Uncomment to see results
show_random_samples(
    image_dir=f"{ANNOTATED_DIR}/images",
    label_dir=f"{ANNOTATED_DIR}/labels",
    num_samples=3
)


In [None]:
# ============================================================================
# CELL 8: Check Dataset Statistics
# ============================================================================

def check_dataset_stats(image_dir, label_dir):
    """
    Display dataset statistics
    """
    
    image_files = list(Path(image_dir).glob("*.jpg")) + list(Path(image_dir).glob("*.png"))
    
    if not image_files:
        print(f"‚ùå No images in {image_dir}")
        return
    
    total_images = len(image_files)
    total_whole_leaf = 0
    total_spots = 0
    images_with_spots = 0
    
    for img_path in image_files:
        label_path = Path(label_dir) / f"{img_path.stem}.txt"
        
        if label_path.exists():
            with open(label_path, 'r') as f:
                lines = f.readlines()
                has_spot = False
                for line in lines:
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        class_id = int(parts[0])
                        if class_id == 0:
                            total_whole_leaf += 1
                        else:
                            total_spots += 1
                            has_spot = True
                
                if has_spot:
                    images_with_spots += 1
    
    print("="*70)
    print("üìä DATASET STATISTICS")
    print("="*70)
    print(f"üì∏ Total images: {total_images}")
    print(f"üçÉ Whole leaf boxes: {total_whole_leaf}")
    print(f"üî¥ Disease spots: {total_spots}")
    print(f"üìà Images with spots: {images_with_spots}/{total_images} ({images_with_spots/total_images*100:.1f}%)")
    print(f"üìä Average spots per image: {total_spots/total_images:.2f}")
    print("="*70)


CHECK STATS - Uncomment to see
print("\nüìä ANNOTATED DATASET:")
check_dataset_stats(f"{ANNOTATED_DIR}/images", f"{ANNOTATED_DIR}/labels")

print("\nüìä AUGMENTED DATASET:")
check_dataset_stats(f"{AUGMENTED_DIR}/images", f"{AUGMENTED_DIR}/labels")


In [None]:
def run_annotation_pipeline(raw_folder, augmentations_per_image=5, min_spot_area=50):
    """
    Run complete annotation + augmentation pipeline
    
    Steps:
    1. Auto-annotate all raw images
    2. Augment annotated dataset
    3. Show statistics
    """
    
    print("="*70)
    print("üçå BANANA BLACK SIGATOKA - ANNOTATION & AUGMENTATION")
    print("="*70)
    
    # Step 1: Annotation
    print("\nüìç STEP 1: Auto-Annotation")
    print("-"*70)
    annotate_all_images(
        input_folder=raw_folder,
        output_image_dir=f"{ANNOTATED_DIR}/images",
        output_label_dir=f"{ANNOTATED_DIR}/labels",
        min_spot_area=min_spot_area
    )
    
    # Step 2: Augmentation
    print("\nüìç STEP 2: Data Augmentation")
    print("-"*70)
    augment_dataset(
        image_dir=f"{ANNOTATED_DIR}/images",
        label_dir=f"{ANNOTATED_DIR}/labels",
        output_image_dir=f"{AUGMENTED_DIR}/images",
        output_label_dir=f"{AUGMENTED_DIR}/labels",
        augmentations_per_image=augmentations_per_image
    )
    
    # Step 3: Statistics
    print("\nüìç STEP 3: Dataset Statistics")
    print("-"*70)
    print("\nüìä ANNOTATED DATASET:")
    check_dataset_stats(f"{ANNOTATED_DIR}/images", f"{ANNOTATED_DIR}/labels")
    
    print("\nüìä AUGMENTED DATASET:")
    check_dataset_stats(f"{AUGMENTED_DIR}/images", f"{AUGMENTED_DIR}/labels")
    
    print("\n" + "="*70)
    print("‚úÖ PIPELINE COMPLETE!")
    print("="*70)
    print(f"\nüìÅ Output folders:")
    print(f"   ‚Ä¢ Annotated: {ANNOTATED_DIR}")
    print(f"   ‚Ä¢ Augmented: {AUGMENTED_DIR}")
    print(f"   ‚Ä¢ Visualizations: {VISUALIZATIONS_DIR}")
    print(f"\nüí° Next: Review visualizations, then ready for training!")
