In [2]:
import cv2
import numpy as np
import os
import pandas as pd
from glob import glob

import warnings
warnings.filterwarnings('ignore')

In [3]:
MASK_FILES = [
    r"NIH-NLM-ThinBloodSmearsPf\Polygon Set\209C68P29N_ThinF\Masks\IMG_20150819_133236.png",
    r"NIH-NLM-ThinBloodSmearsPf\Polygon Set\231C89P50ThinF\Masks\IMG_20150820_162153.png",
    r"NIH-NLM-ThinBloodSmearsPf\Polygon Set\306C213ThinF\Masks\IMG_20151106_112252.png",
    r"NIH-NLM-ThinBloodSmearsPf\Polygon Set\362C177P138NThinF\Masks\IMG_20151201_142846.png",
    r"NIH-NLM-ThinBloodSmearsPf\Polygon Set\156C55P16thinF\Masks\IMG_20150728_123237.png"
]

In [12]:
def create_dataset_from_list(mask_paths_list):
    dataset = []
    
    for mask_path in mask_paths_list:
        mask_path = os.path.normpath(mask_path)
        img_path = mask_path.replace("Masks", "Img").replace(".png", ".jpg")
        
        basename = os.path.basename(mask_path)
        img_id = os.path.splitext(basename)[0]
        
        if not os.path.exists(img_path):
            print(f"Warning: Image not found at predicted path: {img_path}")
        
        dataset.append({
            "id": img_id,
            "img_path": img_path,
            "mask_path": mask_path
        })
            
    return dataset

In [5]:
def get_channel(img_rgb, mode='green'):
    if mode == 'green':
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        g_c = clahe.apply(img_rgb[:, :, 1])
        green_inverted = 255 - g_c

        gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
        _, roi_mask = cv2.threshold(gray, 15, 255, cv2.THRESH_BINARY)

        img_green = cv2.bitwise_and(green_inverted, green_inverted, mask=roi_mask)
        return img_green

    elif mode == 'hue':
        hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)
        h, s, v = cv2.split(hsv)

        _, v_mask = cv2.threshold(v, 30, 255, cv2.THRESH_BINARY)

        img_hue = cv2.bitwise_and(h, h, mask=v_mask)
        return img_hue

    else:
        return cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)

def refine_mask(mask):
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    filled_mask = np.zeros_like(mask)
    cv2.drawContours(filled_mask, contours, -1, 255, -1)
    
    # kernel = np.ones((3,3), np.uint8)
    # filled_mask = cv2.morphologyEx(filled_mask, cv2.MORPH_OPEN, kernel, iterations=1)
    
    return filled_mask

In [20]:
def run_otsu(img_gray, img_rgb, params):
    blur_ksize = params.get('blur_ksize', 0)
    morph_kernel_size = params.get('morph_kernel_size', 0)
    min_area = params.get('min_area', 50)
    
    if blur_ksize > 0:
        if blur_ksize % 2 == 0: blur_ksize += 1
        img_proc = cv2.GaussianBlur(img_gray, (blur_ksize, blur_ksize), 0)
    else:
        img_proc = img_gray

    _, binary = cv2.threshold(img_proc, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    if morph_kernel_size > 0:
        kernel = np.ones((morph_kernel_size, morph_kernel_size), np.uint8)
        binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=1)
        binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=1)
    
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary)
    
    clean_mask = np.zeros_like(binary)
    
    for i in range(1, num_labels):
        area = stats[i, cv2.CC_STAT_AREA]
        if area > min_area:
            clean_mask[labels == i] = 255
            
    return clean_mask


def run_watershed(img_gray, img_rgb, params):
    blur_ksize = params.get('blur_ksize', 0)
    morph_kernel_size = params.get('morph_kernel_size', 3)
    dist_mask_size = params.get('dist_mask_size', 5)
    dist_thresh_ratio = params.get('dist_thresh_ratio', 0.5)
    min_area = params.get('min_area', 50)
    
    if blur_ksize > 0:
        if blur_ksize % 2 == 0: blur_ksize += 1
        img_proc = cv2.GaussianBlur(img_gray, (blur_ksize, blur_ksize), 0)
    else:
        img_proc = img_gray
        
    _, binary = cv2.threshold(img_proc, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    if morph_kernel_size > 0:
        kernel = np.ones((morph_kernel_size, morph_kernel_size), np.uint8)
        closing = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=2)
        opening = cv2.morphologyEx(closing, cv2.MORPH_OPEN, kernel, iterations=2)
    else:
        opening = binary
        kernel = np.ones((3, 3), np.uint8)

    sure_bg = cv2.dilate(opening, kernel, iterations=3)
    dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, dist_mask_size)
    
    if dist_transform.max() == 0: 
        return np.zeros_like(img_gray)
    
    _, sure_fg = cv2.threshold(dist_transform, dist_thresh_ratio * dist_transform.max(), 255, 0)
    sure_fg = np.uint8(sure_fg)
    
    unknown = cv2.subtract(sure_bg, sure_fg)
    _, markers = cv2.connectedComponents(sure_fg)
    markers = markers + 1
    markers[unknown == 255] = 0

    markers = cv2.watershed(img_rgb, markers)
    
    unique, counts = np.unique(markers, return_counts=True)
    
    valid_labels = []
    for label, count in zip(unique, counts):
        if label > 1 and count > min_area:
            valid_labels.append(label)
            
    object_mask = np.isin(markers, valid_labels).astype(np.uint8) * 255
    
    return object_mask


def run_hough(img_gray, img_rgb, params):
    blur_ksize = params.get('blur_ksize', 5)
    dp = params.get('dp', 1.2)
    minDist = params.get('minDist', 25)
    param1 = params.get('param1', 50)
    param2 = params.get('param2', 30)
    min_radius = params.get('min_radius', 15)
    
    if blur_ksize > 0:
        if blur_ksize % 2 == 0: blur_ksize += 1
        blur = cv2.medianBlur(img_gray, blur_ksize)
    else:
        blur = img_gray
    
    circles = cv2.HoughCircles(
        blur, cv2.HOUGH_GRADIENT, dp=dp, minDist=minDist,
        param1=param1, param2=param2, minRadius=min_radius, maxRadius=55
    )
    
    final_mask = np.zeros_like(img_gray)
    
    if circles is not None:
        circles = np.uint16(np.around(circles))
        for i in circles[0, :]:
            cv2.circle(final_mask, (i[0], i[1]), i[2], 255, -1)
            
    return final_mask

In [9]:
def calculate_metrics(pred_mask, gt_mask):
    _, pred_bin = cv2.threshold(pred_mask, 127, 255, cv2.THRESH_BINARY)
    _, gt_bin = cv2.threshold(gt_mask, 127, 255, cv2.THRESH_BINARY)
    
    #IoU
    intersection = cv2.bitwise_and(pred_bin, gt_bin)
    union = cv2.bitwise_or(pred_bin, gt_bin)
    
    area_inter = cv2.countNonZero(intersection)
    area_union = cv2.countNonZero(union)
    
    iou = area_inter / area_union if area_union > 0 else 0.0
    
    #Precision/Recall based on Centroids
    num_pred, _, _, centroids_pred = cv2.connectedComponentsWithStats(pred_bin)
    
    num_gt, _, _, _ = cv2.connectedComponentsWithStats(gt_bin)
    count_pred = num_pred - 1
    count_gt = num_gt - 1
    
    tp = 0
    fp = 0
    
    for i in range(1, num_pred):
        cx, cy = int(centroids_pred[i][0]), int(centroids_pred[i][1])
        
        if gt_bin[cy, cx] == 255:
            tp += 1
        else:
            fp += 1
            
    fn = count_gt - tp
    if fn < 0: fn = 0 
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return {
        "iou": round(iou, 3),
        "precision": round(precision, 3),
        "recall": round(recall, 3),
        "f1": round(f1, 3),
        "count_pred": count_pred,
        "count_gt": count_gt,
        "count_diff": abs(count_pred - count_gt)
    }

In [21]:
CONFIG = {
    "Otsu_Green": {
        "func": run_otsu,
        "channel": "green",
        "params": {"blur_ksize": 0, "morph_kernel_size": 3, "min_area": 50}
    },
    "Otsu_Hue": {
        "func": run_otsu,
        "channel": "hue",
        "params": {"blur_ksize": 0, "morph_kernel_size": 3, "min_area": 50}
    },
    "Watershed_Green": {
        "func": run_watershed,
        "channel": "green",
        "params": {"blur_ksize": 0, "dist_mask_size": 3, "dist_thresh_ratio": 0.2, "min_area": 75}
    },
    "Watershed_Hue": {
        "func": run_watershed,
        "channel": "hue",
        "params": {"blur_ksize": 0, "dist_mask_size": 3, "dist_thresh_ratio": 0.2, "min_area": 75}
    },
    "Hough_Green": {
        "func": run_hough,
        "channel": "green",
        "params": {"blur_ksize": 5, "param1": 50, "param2": 20, "min_radius": 50}
    }
}

In [22]:
dataset = create_dataset_from_list(MASK_FILES)
print(f"Loaded {len(dataset)} pairs for testing.")

results = []

if len(dataset) > 0:
    for method_name, config in CONFIG.items():
        print(f"Running {method_name}...")
        
        for data in dataset:
            if not os.path.exists(data['img_path']):
                continue
                
            img_rgb = cv2.imread(data['img_path'])
            img_rgb = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2RGB)
            gt_mask = cv2.imread(data['mask_path'], cv2.IMREAD_GRAYSCALE)
            
            img_gray = get_channel(img_rgb, config['channel'])
            
            raw_mask = config['func'](img_gray, img_rgb, config['params'])
            
            metrics_raw = calculate_metrics(raw_mask, gt_mask)
            res_raw = metrics_raw.copy()
            res_raw['method'] = f"{method_name} (Raw)"
            res_raw['image_id'] = data['id']
            results.append(res_raw)
            
            refined_mask = refine_mask(raw_mask)
            
            metrics_ref = calculate_metrics(refined_mask, gt_mask)
            res_ref = metrics_ref.copy()
            res_ref['method'] = f"{method_name} (+ Refine)"
            res_ref['image_id'] = data['id']
            results.append(res_ref)

    df_results = pd.DataFrame(results)
    summary = df_results.groupby('method')[['iou', 'f1', 'precision', 'recall', 'count_diff']].mean()
    print(summary.sort_values(by='f1', ascending=False))


Loaded 5 pairs for testing.
Running Otsu_Green...
Running Otsu_Hue...
Running Watershed_Green...
Running Watershed_Hue...
Running Hough_Green...
                               iou      f1  precision  recall  count_diff
method                                                                   
Hough_Green (+ Refine)      0.5958  0.9650     0.9522  0.9782        41.2
Hough_Green (Raw)           0.5958  0.9650     0.9522  0.9782        41.2
Watershed_Hue (Raw)         0.6106  0.6864     0.7102  0.7376        70.0
Watershed_Hue (+ Refine)    0.6454  0.6836     0.7074  0.7350        70.0
Otsu_Hue (+ Refine)         0.7170  0.6624     0.6596  0.7212        50.0
Otsu_Hue (Raw)              0.7028  0.6610     0.6540  0.7212        48.8
Watershed_Green (+ Refine)  0.7044  0.5922     0.8090  0.5862        48.2
Watershed_Green (Raw)       0.7114  0.5850     0.8002  0.5798        48.2
Otsu_Green (Raw)            0.7274  0.4012     0.3566  0.4910        89.2
Otsu_Green (+ Refine)       0.7100  0.400

In [23]:
summary = df_results.groupby('method')[['iou', 'f1', 'precision', 'recall', 'count_diff']].mean()
print(summary.sort_values(by='iou', ascending=False))

                               iou      f1  precision  recall  count_diff
method                                                                   
Otsu_Green (Raw)            0.7274  0.4012     0.3566  0.4910        89.2
Otsu_Hue (+ Refine)         0.7170  0.6624     0.6596  0.7212        50.0
Watershed_Green (Raw)       0.7114  0.5850     0.8002  0.5798        48.2
Otsu_Green (+ Refine)       0.7100  0.4000     0.4546  0.4830        75.2
Watershed_Green (+ Refine)  0.7044  0.5922     0.8090  0.5862        48.2
Otsu_Hue (Raw)              0.7028  0.6610     0.6540  0.7212        48.8
Watershed_Hue (+ Refine)    0.6454  0.6836     0.7074  0.7350        70.0
Watershed_Hue (Raw)         0.6106  0.6864     0.7102  0.7376        70.0
Hough_Green (Raw)           0.5958  0.9650     0.9522  0.9782        41.2
Hough_Green (+ Refine)      0.5958  0.9650     0.9522  0.9782        41.2
