In [1]:
import os
import itertools
import cv2
import numpy as np
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, jaccard_score

class WatershedParameterExplorer:
    def __init__(self, images_dir, masks_dir):
        """
        Initialize explorer with image and mask directories
        
        Parameters:
        images_dir (str): Directory containing input images
        masks_dir (str): Directory containing ground truth masks
        """
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        
        # Load image and mask pairs
        self.image_files = sorted([f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
        self.mask_files = sorted([f for f in os.listdir(masks_dir) if f.endswith(('.jpg', '.png', '.jpeg'))])
        
        # Validate image-mask pairs
        assert len(self.image_files) == len(self.mask_files), \
            "Number of images and masks must be the same"
        
        # Limit to first 100 images if more exist
        self.image_files = self.image_files[:100]
        self.mask_files = self.mask_files[:100]
    
    def watershed_segmentation(self, image, blur_kernel, custom_thresh, morph_kernel, dilate_iter):
        """
        Perform watershed segmentation with given parameters
        
        Returns:
        numpy.ndarray: Segmented image
        """
        # Convert to odd kernel size
        blur_kernel_size = (blur_kernel * 2 + 1, blur_kernel * 2 + 1)
        
        # Grayscale conversion
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
        # Gaussian blur
        blurred = cv2.GaussianBlur(gray, blur_kernel_size, 0)
        
        # Binary thresholding
        _, thresh = cv2.threshold(blurred, custom_thresh, 255, cv2.THRESH_BINARY)
        
        # Morphological kernel
        kernel = np.ones((morph_kernel, morph_kernel), np.uint8)
        
        # Morphological opening
        opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
        
        # Dilation
        dilated = cv2.dilate(opening, kernel, iterations=dilate_iter)
        
        # Distance transform
        dist_transform = cv2.distanceTransform(dilated, cv2.DIST_L2, 5)
        
        # Foreground threshold
        _, sure_fg = cv2.threshold(dist_transform, 0.5*dist_transform.max(), 255, 0)
        sure_fg = sure_fg.astype(np.uint8)
        
        # Connected components
        _, markers = cv2.connectedComponents(sure_fg)
        
        # Watershed preparation
        markers = markers + 1
        markers[sure_fg == 0] = 0
        
        # Create result image
        result = image.copy()
        cv2.watershed(result, markers)
        
        # Create binary segmentation
        segmented = np.zeros_like(gray)
        segmented[markers == -1] = 255
        
        return segmented
    
    def calculate_metrics(self, ground_truth, prediction):
        """
        Calculate segmentation metrics
        
        Returns:
        dict: Metrics including precision, recall, F1, and Jaccard
        """
        # Ensure binary images
        gt_binary = (ground_truth > 0).astype(int).ravel()
        pred_binary = (prediction > 0).astype(int).ravel()
        
        return {
            'precision': precision_score(gt_binary, pred_binary, zero_division=0),
            'recall': recall_score(gt_binary, pred_binary, zero_division=0),
            'f1_score': f1_score(gt_binary, pred_binary, zero_division=0),
            'jaccard': jaccard_score(gt_binary, pred_binary, zero_division=0)
        }
    
    def generate_parameter_combinations(self):
        """
        Automatically generate parameter combinations
        
        Returns:
        list: List of parameter dictionaries
        """
        # Define parameter ranges
        blur_kernels = [1, 3, 5, 7, 9]
        custom_thresholds = [50, 100, 127, 150, 200]
        morph_kernels = [3, 5, 7]
        dilate_iterations = [1, 2, 3]
        
        # Generate all combinations
        parameter_combinations = list(itertools.product(
            blur_kernels,
            custom_thresholds,
            morph_kernels,
            dilate_iterations
        ))
        
        # Convert to list of dictionaries
        parameter_sets = [
            {
                'blur_kernel': combo[0],
                'custom_thresh': combo[1],
                'morph_kernel': combo[2],
                'dilate_iter': combo[3]
            }
            for combo in parameter_combinations
        ]
        
        return parameter_sets
    
    def explore_parameters(self, parameter_sets):
        """
        Explore multiple parameter sets across all images
        
        Parameters:
        parameter_sets (list): List of parameter dictionaries to test
        
        Returns:
        pandas.DataFrame: Average metrics for each parameter set
        """
        # Results storage
        results = []
        
        # Total combinations for progress tracking
        total_combinations = len(parameter_sets)
        
        # Iterate through parameter sets
        for idx, params in enumerate(parameter_sets, 1):
            # Metrics for this parameter set
            set_metrics = []
            
            # Process all images
            for image_file, mask_file in zip(self.image_files, self.mask_files):
                # Read image and mask
                image = cv2.imread(os.path.join(self.images_dir, image_file))
                mask = cv2.imread(os.path.join(self.masks_dir, mask_file), cv2.IMREAD_GRAYSCALE)
                
                # Perform segmentation
                segmented = self.watershed_segmentation(
                    image, 
                    params['blur_kernel'], 
                    params['custom_thresh'], 
                    params['morph_kernel'], 
                    params['dilate_iter']
                )
                
                # Calculate metrics
                metrics = self.calculate_metrics(mask, segmented)
                set_metrics.append(metrics)
            
            # Calculate average metrics for this parameter set
            avg_metrics = {
                metric: np.mean([m[metric] for m in set_metrics])
                for metric in ['precision', 'recall', 'f1_score', 'jaccard']
            }
            
            # Store results with parameters
            result = {**params, **avg_metrics}
            results.append(result)
            
            # Progress tracking
            print(f"\nProcessing Combination {idx}/{total_combinations}")
            print(f"Parameters: {params}")
            for metric, value in avg_metrics.items():
                print(f"  {metric.replace('_', ' ').title()}: {value:.4f}")
        
        # Convert to DataFrame
        results_df = pd.DataFrame(results)
        
        # Sort by F1 score in descending order
        results_df = results_df.sort_values('f1_score', ascending=False)
        
        return results_df

def main():
    # Directories for images and ground truth masks
    images_dir = 'images/'
    masks_dir = 'masks/'
    
    # Create explorer
    explorer = WatershedParameterExplorer(images_dir, masks_dir)
    
    # Generate parameter combinations
    parameter_sets = explorer.generate_parameter_combinations()
    
    print(f"Total parameter combinations to explore: {len(parameter_sets)}")
    
    # Explore parameters
    results_df = explorer.explore_parameters(parameter_sets)
    
    # Save results to CSV
    results_df.to_csv('watershed_parameter_exploration.csv', index=False)
    
    # Print top 10 best parameter configurations
    print("\n--- Top 10 Best Parameter Configurations ---")
    print(results_df.head(10).to_string(index=False))
    
    # Print best parameter configuration
    best_params = results_df.iloc[0]
    print("\n--- Best Overall Parameter Configuration ---")
    print("Blur Kernel:", best_params['blur_kernel'])
    print("Custom Threshold:", best_params['custom_thresh'])
    print("Morph Kernel:", best_params['morph_kernel'])
    print("Dilate Iterations:", best_params['dilate_iter'])
    print("\nMetrics:")
    for metric in ['precision', 'recall', 'f1_score', 'jaccard']:
        print(f"  {metric.replace('_', ' ').title()}: {best_params[metric]:.4f}")

if __name__ == '__main__':
    main()

Total parameter combinations to explore: 225

Processing Combination 1/225
Parameters: {'blur_kernel': 1, 'custom_thresh': 50, 'morph_kernel': 3, 'dilate_iter': 1}
  Precision: 0.0128
  Recall: 0.0035
  F1 Score: 0.0053
  Jaccard: 0.0026

Processing Combination 2/225
Parameters: {'blur_kernel': 1, 'custom_thresh': 50, 'morph_kernel': 3, 'dilate_iter': 2}
  Precision: 0.0128
  Recall: 0.0035
  F1 Score: 0.0053
  Jaccard: 0.0026

Processing Combination 3/225
Parameters: {'blur_kernel': 1, 'custom_thresh': 50, 'morph_kernel': 3, 'dilate_iter': 3}
  Precision: 0.0128
  Recall: 0.0035
  F1 Score: 0.0053
  Jaccard: 0.0026

Processing Combination 4/225
Parameters: {'blur_kernel': 1, 'custom_thresh': 50, 'morph_kernel': 5, 'dilate_iter': 1}
  Precision: 0.0128
  Recall: 0.0035
  F1 Score: 0.0053
  Jaccard: 0.0026

Processing Combination 5/225
Parameters: {'blur_kernel': 1, 'custom_thresh': 50, 'morph_kernel': 5, 'dilate_iter': 2}
  Precision: 0.0128
  Recall: 0.0035
  F1 Score: 0.0053
  Jaccar