In [2]:
import cv2
import numpy as np
import os
from sklearn.cluster import DBSCAN

# ---------------------------------------
# Utility: dedupe list entries
# ---------------------------------------
def dedupe_positions(arr, gap=5):
    result = []
    prev = -999
    for x in arr:
        if x - prev > gap:
            result.append(x)
            prev = x
    return result

# ---------------------------------------
# PROCESS A SINGLE IMAGE
# ---------------------------------------
def process_image(img_path, output_root):

    print(f"\n=== Processing: {os.path.basename(img_path)} ===")

    # Load
    orig = cv2.imread(img_path)
    gray = cv2.cvtColor(orig, cv2.COLOR_BGR2GRAY)

    # Smooth
    blur = cv2.GaussianBlur(gray, (5,5), 0)

    # Adaptive threshold
    th = cv2.adaptiveThreshold(
        blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
        cv2.THRESH_BINARY_INV, 15, 8
    )

    # Morphological vertical/horizontal extraction
    kernel_v = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 50))
    vertical = cv2.morphologyEx(th, cv2.MORPH_OPEN, kernel_v)

    kernel_h = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 4))
    horizontal = cv2.morphologyEx(th, cv2.MORPH_OPEN, kernel_h)

    # Sum along axes
    v_sum = vertical.sum(axis=0)
    h_sum = horizontal.sum(axis=1)

    v_lines = np.where(v_sum > 0)[0].tolist()
    h_lines = np.where(h_sum > 0)[0].tolist()

    # Deduplicate
    v_lines = dedupe_positions(v_lines, gap=10)
    h_lines = dedupe_positions(h_lines, gap=10)

    # DBSCAN merge
    if len(v_lines) > 0:
        pts = np.array(v_lines).reshape(-1, 1)
        clusters = DBSCAN(eps=12, min_samples=1).fit(pts)
        merged_v = sorted([int(pts[clusters.labels_ == label].mean())
                           for label in set(clusters.labels_)])
    else:
        merged_v = []

    if len(h_lines) > 0:
        pts = np.array(h_lines).reshape(-1, 1)
        clusters = DBSCAN(eps=12, min_samples=1).fit(pts)
        merged_h = sorted([int(pts[clusters.labels_ == label].mean())
                           for label in set(clusters.labels_)])
    else:
        merged_h = []

    print("Vertical lines:", len(merged_v))
    print("Horizontal lines:", len(merged_h))

    # Output folder for this module
    module_name = os.path.splitext(os.path.basename(img_path))[0]
    save_dir = os.path.join(output_root, module_name)
    os.makedirs(save_dir, exist_ok=True)

    # ---------------------------------------
    # EXTRACT **ALL** CELLS (NO FILTER)
    # ---------------------------------------
    cell_count = 0

    for r in range(len(merged_h) - 1):
        y1, y2 = merged_h[r], merged_h[r+1]

        for c in range(len(merged_v) - 1):
            x1, x2 = merged_v[c], merged_v[c+1]

            cell = orig[y1:y2, x1:x2]

            filename = os.path.join(
                save_dir, f"cell_r{r+1}_c{c+1}.png"
            )
            cv2.imwrite(filename, cell)
            cell_count += 1

    print(f"Extracted {cell_count} cells → {save_dir}")

# ---------------------------------------
# RUN ON ALL IMAGES IN A DIRECTORY
# ---------------------------------------

input_folder = r"D:\PROGRAMMING\Internships_assignments\ResearchInternIITMandi\Full_modules_datasets\Full modules datasets\18.11.2024\ok"
output_root = "all_cells_extracted"

os.makedirs(output_root, exist_ok=True)

valid_ext = {".jpg", ".jpeg", ".png", ".tif"}

for fname in os.listdir(input_folder):
    if os.path.splitext(fname)[1].lower() in valid_ext:
        process_image(os.path.join(input_folder, fname), output_root)

print("\n=== Batch Extraction Complete ===")



=== Processing: WS11249040878571.jpg ===
Vertical lines: 27
Horizontal lines: 9
Extracted 208 cells → all_cells_extracted\WS11249040878571

=== Processing: WS11249040884052.jpg ===
Vertical lines: 27
Horizontal lines: 8
Extracted 182 cells → all_cells_extracted\WS11249040884052

=== Processing: WS11249040884166.jpg ===
Vertical lines: 27
Horizontal lines: 9
Extracted 208 cells → all_cells_extracted\WS11249040884166

=== Processing: WS11249040885422.jpg ===
Vertical lines: 23
Horizontal lines: 8
Extracted 154 cells → all_cells_extracted\WS11249040885422

=== Processing: WS11249040885456.jpg ===
Vertical lines: 24
Horizontal lines: 8
Extracted 161 cells → all_cells_extracted\WS11249040885456

=== Processing: WS11249040899466.jpg ===
Vertical lines: 23
Horizontal lines: 7
Extracted 132 cells → all_cells_extracted\WS11249040899466

=== Processing: WS11249040899511.jpg ===
Vertical lines: 23
Horizontal lines: 8
Extracted 154 cells → all_cells_extracted\WS11249040899511

=== Processing: WS1