**ULTIMATE YOLOv8 FISH DETECTION 1.0**

In [None]:

IMAGES_DIR = Path("/content/drive/MyDrive/kaggle_fish_dataset/fish_image")
MASKS_DIR = Path("/content/drive/MyDrive/kaggle_fish_dataset/mask_image")

print("Images directory:", IMAGES_DIR)
print("Masks directory :", MASKS_DIR)

print("\nChecking directories exist:")
print("Images folder exists:", IMAGES_DIR.exists())
print("Masks folder exists :", MASKS_DIR.exists())

if not IMAGES_DIR.exists() or not MASKS_DIR.exists():
    raise ValueError("‚ùå One or both dataset folders do NOT exist.")


Images directory: /content/drive/MyDrive/kaggle_fish_dataset/fish_image
Masks directory : /content/drive/MyDrive/kaggle_fish_dataset/mask_image

Checking directories exist:
Images folder exists: True
Masks folder exists : True


In [None]:
def add_extensions(folder):
    print(f"\nüîß Fixing filenames inside: {folder}")
    fixed = 0

    for f in folder.iterdir():
        if f.is_file() and "." not in f.name:     # no extension
            new_name = f.with_suffix(".png")
            os.rename(f, new_name)
            fixed += 1
            print("RENAMED:", f.name, "‚Üí", new_name.name)

    if fixed == 0:
        print("‚úîÔ∏è No files needed renaming.")
    else:
        print(f"‚úîÔ∏è Fixed {fixed} filename(s).")

add_extensions(IMAGES_DIR)
add_extensions(MASKS_DIR)


üîß Fixing filenames inside: /content/drive/MyDrive/kaggle_fish_dataset/fish_image
‚úîÔ∏è No files needed renaming.

üîß Fixing filenames inside: /content/drive/MyDrive/kaggle_fish_dataset/mask_image
‚úîÔ∏è No files needed renaming.


In [None]:
from ultralytics import YOLO
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import shutil
import yaml
from tqdm import tqdm
import os
from PIL import Image
import torch
import random

print("‚úÖ Libraries imported!")
print(f"OpenCV version: {cv2.__version__}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")


Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
‚úÖ Libraries imported!
OpenCV version: 4.12.0
PyTorch version: 2.9.0+cu126
CUDA available: False


**add the dataset to your drive and Run the code bellow :)) !!!! **

In [None]:
"""
ULTIMATE YOLOv8 FISH DETECTION - SINGLE CELL SOLUTION
Fixed for nested folders with subfolder-based mapping (fish_01 -> mask_01)
Handles 27,404 images with proper conversion logic
"""

# ============================================================================
# SETUP & IMPORTS
# ============================================================================
print("=" * 80)
print("üêü YOLO FISH DETECTION - ULTIMATE VERSION")
print("=" * 80)

# Mount Google Drive
print("\n[1/9] Mounting Google Drive...")
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    print("‚úÖ Mounted!")
except:
    print("‚ö†Ô∏è  Not in Colab")

# Install packages
print("\n[2/9] Installing packages...")
import sys
!{sys.executable} -m pip install -q ultralytics opencv-python-headless
print("‚úÖ Done!")

# Import libraries
print("\n[3/9] Importing libraries...")
from ultralytics import YOLO
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import shutil
import yaml
from tqdm import tqdm
import torch
import random

print("‚úÖ Ready!")

# ============================================================================
# GPU CHECK
# ============================================================================
print("\n[4/9] GPU Status...")
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
if DEVICE == 'cuda':
    print(f"  ‚úÖ GPU: {torch.cuda.get_device_name(0)}")
else:
    print("  ‚ö†Ô∏è  CPU only - Training will be SLOW!")
    print("  Enable GPU: Runtime ‚Üí Change runtime type ‚Üí T4 GPU")

# ============================================================================
# CONFIGURATION - EDIT HERE
# ============================================================================
print("\n[5/9] Configuration...")

YOUR_DATASET = "/content/drive/MyDrive/kaggle_fish_dataset"
FISH_FOLDER = "fish_image"
MASK_FOLDER = "mask_image"
OUTPUT = "/content/yolo_dataset"
TRAINING_OUTPUT = "/content/drive/MyDrive/yolo_training"

MODEL = 'n'  # 'n'=fast, 's'=small, 'm'=medium, 'l'=large
EPOCHS = 50  # Start with 50 for testing
IMG_SIZE = 416  # Smaller size for faster training
BATCH = 16 if DEVICE == 'cuda' else 4

print(f"  Dataset: {YOUR_DATASET}")
print(f"  Model: YOLOv8{MODEL} | Device: {DEVICE.upper()}")
print(f"  Epochs: {EPOCHS} | Batch: {BATCH}")

# ============================================================================
# SMART MASK-TO-YOLO CONVERTER
# ============================================================================
print("\n[6/9] Converting dataset...")

class SmartConverter:
    def __init__(self, fish_dir, mask_dir, output_dir):
        self.fish_dir = Path(fish_dir)
        self.mask_dir = Path(mask_dir)
        self.output_dir = Path(output_dir)

        # Clean output directory
        if self.output_dir.exists():
            shutil.rmtree(self.output_dir)

        # Create structure
        for split in ['train', 'val']:
            (self.output_dir / "images" / split).mkdir(parents=True)
            (self.output_dir / "labels" / split).mkdir(parents=True)

    def mask_to_bbox(self, mask_path):
        """Convert mask to YOLO bounding boxes"""
        mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
        if mask is None:
            return None

        # Ensure binary
        _, mask = cv2.threshold(mask, 1, 255, cv2.THRESH_BINARY)

        # Find contours
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if not contours:
            return None

        bboxes = []
        h, w = mask.shape

        for contour in contours:
            area = cv2.contourArea(contour)
            if area < 100:  # Skip tiny noise
                continue

            x, y, bw, bh = cv2.boundingRect(contour)

            # Convert to YOLO format (normalized)
            x_center = np.clip((x + bw/2) / w, 0, 1)
            y_center = np.clip((y + bh/2) / h, 0, 1)
            width = np.clip(bw / w, 0, 1)
            height = np.clip(bh / h, 0, 1)

            bboxes.append(f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

        return bboxes if bboxes else None

    def convert(self, train_split=0.8):
        """Main conversion with subfolder mapping"""
        print("  Building file map...")

        # Get all images and masks from subfolders
        fish_files = sorted(self.fish_dir.glob("*/*.*"))
        mask_files = sorted(self.mask_dir.glob("*/*.*"))

        print(f"  Found {len(fish_files)} fish images")
        print(f"  Found {len(mask_files)} mask images")

        if not fish_files or not mask_files:
            raise ValueError("No files found in subfolders!")

        # Group by subfolder name
        fish_groups = {}
        for f in fish_files:
            fish_groups.setdefault(f.parent.name, []).append(f)

        mask_groups = {}
        for f in mask_files:
            mask_groups.setdefault(f.parent.name, []).append(f)

        # Create paired list (fish_01 -> mask_01 mapping)
        pairs = []
        for fish_folder in sorted(fish_groups.keys()):
            mask_folder = fish_folder.replace('fish', 'mask')

            if mask_folder in mask_groups:
                fish_list = sorted(fish_groups[fish_folder])
                mask_list = sorted(mask_groups[mask_folder])

                # Pair by index (1st fish with 1st mask, etc.)
                for fish_img, mask_img in zip(fish_list, mask_list):
                    pairs.append((fish_img, mask_img))

        print(f"  Mapped {len(pairs)} image-mask pairs")

        if not pairs:
            raise ValueError("No image-mask pairs found! Check folder naming.")

        # Shuffle and split
        random.shuffle(pairs)
        split_idx = int(len(pairs) * train_split)
        train_pairs = pairs[:split_idx]
        val_pairs = pairs[split_idx:]

        print(f"  Train: {len(train_pairs)} | Val: {len(val_pairs)}")

        # Convert
        converted = {'train': 0, 'val': 0}

        for split_name, split_pairs in [('train', train_pairs), ('val', val_pairs)]:
            for fish_path, mask_path in tqdm(split_pairs, desc=f"  {split_name}"):
                # Get bounding boxes
                bboxes = self.mask_to_bbox(mask_path)

                if bboxes:
                    # Save image
                    dst_img = self.output_dir / "images" / split_name / fish_path.name
                    shutil.copy(fish_path, dst_img)

                    # Save label
                    label_file = self.output_dir / "labels" / split_name / f"{fish_path.stem}.txt"
                    label_file.write_text('\n'.join(bboxes))

                    converted[split_name] += 1

        print(f"\n  ‚úÖ Converted: Train={converted['train']}, Val={converted['val']}")
        return converted['train'] > 0 and converted['val'] > 0

# Run conversion
converter = SmartConverter(
    f"{YOUR_DATASET}/{FISH_FOLDER}",
    f"{YOUR_DATASET}/{MASK_FOLDER}",
    OUTPUT
)

success = converter.convert(train_split=0.8)
if not success:
    raise ValueError("Conversion failed - check your dataset structure")

# ============================================================================
# CREATE CONFIG
# ============================================================================
print("\n[7/9] Creating config...")

config = {
    'path': OUTPUT,
    'train': 'images/train',
    'val': 'images/val',
    'nc': 1,
    'names': ['fish']
}

config_file = Path(OUTPUT) / "data.yaml"
config_file.write_text(yaml.dump(config, sort_keys=False))
print(f"  ‚úÖ Config: {config_file}")

# ============================================================================
# VISUALIZE SAMPLES
# ============================================================================
print("\n[8/9] Visualizing samples...")

def visualize(output_dir, n=6):
    img_dir = Path(output_dir) / "images" / "train"
    lbl_dir = Path(output_dir) / "labels" / "train"

    images = list(img_dir.glob("*"))
    if not images:
        print("  ‚ö†Ô∏è  No images to show")
        return

    random.shuffle(images)
    images = images[:n]

    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    axes = axes.flatten()

    for idx, img_path in enumerate(images):
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]

        lbl_path = lbl_dir / f"{img_path.stem}.txt"
        if lbl_path.exists():
            for line in lbl_path.read_text().strip().split('\n'):
                try:
                    _, xc, yc, bw, bh = map(float, line.split())
                    x1 = int((xc - bw/2) * w)
                    y1 = int((yc - bh/2) * h)
                    x2 = int((xc + bw/2) * w)
                    y2 = int((yc + bh/2) * h)
                    cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
                except:
                    pass

        axes[idx].imshow(img)
        axes[idx].axis('off')
        axes[idx].set_title(img_path.name[:20], fontsize=8)

    plt.tight_layout()
    plt.savefig('/content/preview.png', dpi=100)
    plt.show()
    print("  ‚úÖ Saved preview.png")

try:
    visualize(OUTPUT)
except Exception as e:
    print(f"  ‚ö†Ô∏è  Viz failed: {e}")

# ============================================================================
# TRAIN MODEL
# ============================================================================
print("\n[9/9] Training model...")
print(f"  Device: {DEVICE} | Model: YOLOv8{MODEL}")
print(f"  Epochs: {EPOCHS} | Batch: {BATCH} | Size: {IMG_SIZE}")

if DEVICE == 'cpu':
    print("\n  ‚ö†Ô∏è  Training on CPU is VERY SLOW!")
    response = input("  Continue? (y/n): ")
    if response.lower() != 'y':
        raise KeyboardInterrupt("Stopped by user")

print("\nüöÄ Starting training...\n")

model = YOLO(f'yolov8{MODEL}.pt')

try:
    results = model.train(
        data=str(config_file),
        epochs=EPOCHS,
        imgsz=IMG_SIZE,
        batch=BATCH,
        name='fish_detect',
        project=TRAINING_OUTPUT,
        patience=15,
        save=True,
        plots=True,
        device=DEVICE,

        # Augmentation
        hsv_h=0.015,
        hsv_s=0.7,
        hsv_v=0.4,
        degrees=15.0,
        translate=0.1,
        scale=0.5,
        flipud=0.5,
        fliplr=0.5,
        mosaic=1.0,

        # Optimization
        optimizer='AdamW',
        lr0=0.001,
        lrf=0.01,
        momentum=0.937,
        weight_decay=0.0005,

        # Performance
        workers=4,
        cache='ram' if DEVICE == 'cuda' else False,
        amp=True,
    )

    print("\n" + "=" * 80)
    print("‚úÖ TRAINING COMPLETE!")
    print("=" * 80)

    # Evaluate
    best_model = f"{TRAINING_OUTPUT}/fish_detect/weights/best.pt"
    if Path(best_model).exists():
        print("\nüìä Evaluating best model...")
        trained = YOLO(best_model)
        metrics = trained.val(data=str(config_file))

        print(f"\nüéØ Results:")
        print(f"  mAP@50:    {metrics.box.map50:.3f}")
        print(f"  mAP@50-95: {metrics.box.map:.3f}")
        print(f"  Precision: {metrics.box.mp:.3f}")
        print(f"  Recall:    {metrics.box.mr:.3f}")

        # Test predictions
        print("\nüîç Testing on validation images...")
        val_imgs = list(Path(OUTPUT).glob("images/val/*"))[:6]

        if val_imgs:
            fig, axes = plt.subplots(2, 3, figsize=(15, 10))
            axes = axes.flatten()

            for idx, img_path in enumerate(val_imgs):
                result = trained.predict(str(img_path), conf=0.3, verbose=False)[0]
                im = result.plot()
                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

                axes[idx].imshow(im)
                axes[idx].axis('off')
                n_fish = len(result.boxes) if result.boxes else 0
                axes[idx].set_title(f"{img_path.name[:15]} | {n_fish} fish", fontsize=9)

            plt.tight_layout()
            plt.savefig('/content/predictions.png', dpi=100)
            plt.show()
            print("  ‚úÖ Saved predictions.png")

        print("\n" + "=" * 80)
        print("üéâ ALL DONE!")
        print("=" * 80)
        print(f"\nüìÅ Outputs:")
        print(f"  Model: {best_model}")
        print(f"  Dataset: {OUTPUT}")
        print(f"  Logs: {TRAINING_OUTPUT}/fish_detect/")
        print("\nüí° To use your model:")
        print(f"  from ultralytics import YOLO")
        print(f"  model = YOLO('{best_model}')")
        print(f"  results = model('your_image.jpg')")

except KeyboardInterrupt:
    print("\n‚ö†Ô∏è  Training interrupted")
except Exception as e:
    print(f"\n‚ùå Error: {e}")
    raise

print("\n‚úÖ Pipeline complete!")

üêü YOLO FISH DETECTION - ULTIMATE VERSION

[1/9] Mounting Google Drive...
Mounted at /content/drive
‚úÖ Mounted!

[2/9] Installing packages...
‚úÖ Done!

[3/9] Importing libraries...
‚úÖ Ready!

[4/9] GPU Status...
  ‚ö†Ô∏è  CPU only - Training will be SLOW!
  Enable GPU: Runtime ‚Üí Change runtime type ‚Üí T4 GPU

[5/9] Configuration...
  Dataset: /content/drive/MyDrive/kaggle_fish_dataset
  Model: YOLOv8n | Device: CPU
  Epochs: 50 | Batch: 4

[6/9] Converting dataset...
  Building file map...
  Found 27404 fish images
  Found 27417 mask images
  Mapped 27370 image-mask pairs
  Train: 21896 | Val: 5474


  train:  28%|‚ñà‚ñà‚ñä       | 6145/21896 [1:01:52<2:42:01,  1.62it/s]