In [25]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

In [26]:
def show(img, title="", cmap="gray", size=6):
    plt.figure(figsize=(size, size))
    plt.imshow(img, cmap=cmap)
    plt.title(title)
    plt.axis("off")
    plt.show()

In [27]:
def order_points(pts):
    rect = np.zeros((4, 2), dtype="float32")

    s = pts.sum(axis=1)
    diff = np.diff(pts, axis=1)

    rect[0] = pts[np.argmin(s)]      # top-left
    rect[2] = pts[np.argmax(s)]      # bottom-right
    rect[1] = pts[np.argmin(diff)]   # top-right
    rect[3] = pts[np.argmax(diff)]   # bottom-left

    return rect

In [31]:
def extract_middle_cell(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    median = cv2.medianBlur(gray, 3)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(median)
    th = cv2.adaptiveThreshold(enhanced, 255,
                           cv2.ADAPTIVE_THRESH_MEAN_C,
                           cv2.THRESH_BINARY_INV,
                           15, 5)
    kernel_vert = cv2.getStructuringElement(cv2.MORPH_RECT, (3, img.shape[0]//2))
    vert_lines = cv2.morphologyEx(th, cv2.MORPH_OPEN, kernel_vert)
    kernel_horz = cv2.getStructuringElement(cv2.MORPH_RECT, (img.shape[1]//2, 1))
    horz_lines = cv2.morphologyEx(th, cv2.MORPH_OPEN, kernel_horz)
    grid = cv2.addWeighted(vert_lines, 1, horz_lines, 1, 0)
    kernel_dilate = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
    grid_dilated = cv2.dilate(grid, kernel_dilate, iterations=1)
    contours, _ = cv2.findContours(grid_dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if len(contours) == 0:
        return None   # or raise an error
    
    cnt = max(contours, key=cv2.contourArea)

    
    contour_img = cv2.cvtColor(enhanced, cv2.COLOR_GRAY2BGR)
    cv2.drawContours(contour_img, [cnt], -1, (0, 255, 0), 3)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
    intersections = cv2.bitwise_and(vert_lines, horz_lines)
    intersections_dilated = cv2.dilate(intersections, kernel, iterations=1)
    _, intersections_thresh = cv2.threshold(intersections_dilated, 127, 255, cv2.THRESH_BINARY)

    coords = cv2.findNonZero(intersections_thresh)
    if coords is None:
        return None   # no intersections found
    
    points = coords[:,0,:]

    hull = cv2.convexHull(points)

    hull_points = hull[:, 0, :]
    if len(hull) < 4:
        return None

    corners = order_points(hull_points)
    top_left = np.array(corners[0])
    top_right = np.array(corners[1])
    bottom_right = np.array(corners[2])
    bottom_left = np.array(corners[3])
    
    src_pts = np.array([top_left, top_right, bottom_right, bottom_left], dtype=np.float32)

    width, height = 256, 256
    dst_pts = np.array([[0,0], [width-1,0], [width-1,height-1], [0,height-1]], dtype=np.float32)

    M = cv2.getPerspectiveTransform(src_pts, dst_pts)

    warped = cv2.warpPerspective(img, M, (width, height))

    return warped

In [34]:
from tqdm.notebook import tqdm
def process_dataset(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    supported = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"}

    files = [f for f in os.listdir(input_dir)
             if os.path.splitext(f)[1].lower() in supported]

    for filename in tqdm(files, desc="Processing images"):
        in_path = os.path.join(input_dir, filename)
        base, ext = os.path.splitext(filename)
        out_path = os.path.join(output_dir, f"{base}_cell{ext}")


        img = cv2.imread(in_path)
        if img is None:
            print(f"Skipping unreadable file: {filename}")
            continue

        # Extract the cell
        cell = extract_middle_cell(img)

        # Save output
        cell = extract_middle_cell(img)
        if cell is not None:
            cv2.imwrite(out_path, cell)
        else:
            print(f"Skipping {filename}: no cell found")


In [None]:
input_dir = r"D:\PROGRAMMING\Internships_assignments\ResearchInternIITMandi\dataset\dataset"
output_dir = r"D:\PROGRAMMING\Internships_assignments\ResearchInternIITMandi\dataset\extracted_cells"

process_dataset(input_dir, output_dir)


Processing images:   0%|          | 0/2212 [00:00<?, ?it/s]

Skipping ARTS_00005_r3_c5.png: no cell found
Skipping ARTS_00005_r4_c5.png: no cell found
Skipping ARTS_00006_r10_c3.png: no cell found
Skipping ARTS_00007_r12_c1.png: no cell found
Skipping ARTS_00007_r4_c5.png: no cell found
Skipping ARTS_00020_r10_c1.png: no cell found
Skipping ARTS_00020_r5_c1.png: no cell found
