In [1]:
"""
Make generate images that distort the existing crab images.
These images are saved to a file and will later be used to train the model.
Distort by: color, haze, gausian noise, motion blur, brightness, roation, scale

Code by Anika and Claude, 2025, for MATE
"""

'\nMake generate images that distort the existing crab images.\nThese images are saved to a file and will later be used to train the model.\nDistort by: color, haze, gausian noise, motion blur, brightness, roation, scale\n\nCode by Anika and Claude, 2025, for MATE\n'

In [2]:
import cv2
import numpy as np
import os
from pathlib import Path

In [3]:
class UnderwaterAugmentation:
    """
    Generate realistic underwater image augmentations for green crab detection.
    Based on underwater distortion characteristics from scientific literature.
    """
    
    def __init__(self, output_dir):
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        
    def add_underwater_color_distortion(self, img, depth_factor=0.5):
        """
        Simulate wavelength-dependent light absorption.
        Red light is absorbed quickly, blue/green penetrates deeper.
        
        Args:
            depth_factor: 0-1, higher values simulate deeper water
        """
        img_float = img.astype(np.float32)
        
        # Reduce red channel (absorbed first)
        img_float[:,:,2] *= (1.0 - depth_factor * 0.6)
        
        # Slightly reduce green
        img_float[:,:,1] *= (1.0 - depth_factor * 0.3)
        
        # Boost blue slightly (penetrates deepest)
        img_float[:,:,0] *= (1.0 + depth_factor * 0.2)
        
        # Add blue-green tint
        blue_green_tint = np.full_like(img_float, [20, 10, 0]) * depth_factor
        img_float = cv2.add(img_float, blue_green_tint)
        
        return np.clip(img_float, 0, 255).astype(np.uint8)
    
    def add_underwater_haze(self, img, haze_strength=0.3):
        """
        Simulate light scattering and haze from suspended particles.
        """
        # Create atmospheric light (bluish-white)
        atmospheric_light = np.array([180, 170, 150])
        
        # Blend image with atmospheric light
        haze_layer = np.full_like(img, atmospheric_light, dtype=np.float32)
        img_float = img.astype(np.float32)
        
        hazy = cv2.addWeighted(img_float, 1-haze_strength, 
                               haze_layer, haze_strength, 0)
        
        return hazy.astype(np.uint8)
    
    def add_gaussian_noise(self, img, mean=0, sigma=15):
        """Add Gaussian noise to simulate sensor noise in low light."""
        noise = np.random.normal(mean, sigma, img.shape).astype(np.float32)
        noisy = img.astype(np.float32) + noise
        return np.clip(noisy, 0, 255).astype(np.uint8)
    
    def add_motion_blur(self, img, kernel_size=5):
        """Simulate blur from water movement or camera motion."""
        kernel = np.zeros((kernel_size, kernel_size))
        kernel[int((kernel_size-1)/2), :] = np.ones(kernel_size)
        kernel = kernel / kernel_size
        
        # Random blur direction
        angle = np.random.randint(0, 180)
        M = cv2.getRotationMatrix2D((kernel_size/2, kernel_size/2), angle, 1)
        kernel = cv2.warpAffine(kernel, M, (kernel_size, kernel_size))
        
        return cv2.filter2D(img, -1, kernel)
    
    def adjust_brightness(self, img, factor=1.0):
        """Adjust brightness to simulate different depths/lighting."""
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float32)
        hsv[:,:,2] *= factor
        hsv[:,:,2] = np.clip(hsv[:,:,2], 0, 255)
        return cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR)
    
    def generate_augmented_dataset(self, input_dir):
        """
        Generate underwater augmentations for all images in a directory.
        Each image is augmented once with random underwater effects.
        
        Args:
            input_dir: Path to directory containing images to augment
        """
        import os
        
        input_path = Path(input_dir)
        if not input_path.exists():
            print(f"Error: Directory {input_dir} does not exist!")
            return
        
        # Get all jpg images
        image_files = sorted([f for f in os.listdir(input_path) if f.endswith('.jpg')],
                           key=lambda x: int(x.split('.')[0]) if x.split('.')[0].isdigit() else 0)
        
        if not image_files:
            print(f"Error: No .jpg images found in {input_dir}!")
            return
        
        print(f"Found {len(image_files)} images to augment")
        
        for img_file in image_files:
            img_path = input_path / img_file
            img = cv2.imread(str(img_path))
            
            if img is None:
                print(f"Warning: Could not load {img_path}")
                continue
            
            base_name = Path(img_file).stem
            print(f"Processing {img_file}...")
            
            # Apply underwater augmentation
            augmented = img.copy()
            
            # Underwater color distortion (varying depths)
            depth = np.random.uniform(0.3, 0.7)
            augmented = self.add_underwater_color_distortion(augmented, depth)
            
            # Haze/turbidity
            haze = np.random.uniform(0.2, 0.4)
            augmented = self.add_underwater_haze(augmented, haze)
            
            # Brightness variation
            brightness = np.random.uniform(0.7, 1.2)
            augmented = self.adjust_brightness(augmented, brightness)
            
            # Gaussian noise (50% chance)
            if np.random.random() > 0.5:
                sigma = np.random.randint(8, 15)
                augmented = self.add_gaussian_noise(augmented, sigma=sigma)
            
            # Motion blur (40% chance)
            if np.random.random() > 0.6:
                kernel_size = np.random.choice([3, 5])
                augmented = self.add_motion_blur(augmented, kernel_size)
            
            # Save augmented image
            output_path = self.output_dir / f"{base_name}_underwater.jpg"
            cv2.imwrite(str(output_path), augmented)
        
        print(f"\nAugmentation complete! Images saved to: {self.output_dir}")
        total_images = len(list(self.output_dir.glob("*.jpg")))
        print(f"Total underwater images: {total_images}")

In [4]:
# Initialize augmentation pipeline
augmenter = UnderwaterAugmentation(output_dir='crabs_on_background_underwater')
augmenter.generate_augmented_dataset('crabs_on_background_aboveGround')


Found 20 images to augment
Processing 0.jpg...
Processing 1.jpg...
Processing 2.jpg...
Processing 3.jpg...
Processing 4.jpg...
Processing 5.jpg...
Processing 6.jpg...
Processing 7.jpg...
Processing 8.jpg...
Processing 9.jpg...
Processing 10.jpg...
Processing 11.jpg...
Processing 12.jpg...
Processing 13.jpg...
Processing 14.jpg...
Processing 15.jpg...
Processing 16.jpg...
Processing 17.jpg...
Processing 18.jpg...
Processing 19.jpg...

Augmentation complete! Images saved to: crabs_on_background_underwater
Total underwater images: 20
