In [66]:
import json
import numpy as np
import matplotlib.pyplot as plt
from tiatoolbox.tools.patchextraction import get_patch_extractor
import os
import skimage
from tiatoolbox.wsicore.wsireader import VirtualWSIReader
import cv2
from tqdm.auto import tqdm

In [None]:
splits_file_path = "/media/u1910100/Extreme SSD/data/tiger/cell_detection/splits.json"
with open(splits_file_path, "r") as file:
    splits = json.load(file)

In [None]:
save_dir = "/media/u1910100/Extreme SSD/data/tiger/cell_detection/dilation/patches_new"
tile_dir = "/media/u1910100/Extreme SSD/data/tiger/cell_detection/dilation/tiles"

In [None]:
def img_to_shape(a, shape):
    y_, x_ = shape
    y, x = a.shape[0], a.shape[1]
    y_pad = y_ - y
    x_pad = x_ - x
    return np.pad(
        a,
        (
            (y_pad // 2, y_pad // 2 + y_pad % 2),
            (x_pad // 2, x_pad // 2 + x_pad % 2),
            (0, 0),
        ),
        mode="constant",
    )


def mask_to_shape(a, shape):
    y_, x_ = shape
    y, x = a.shape[0], a.shape[1]
    y_pad = y_ - y
    x_pad = x_ - x
    return np.pad(
        a,
        ((y_pad // 2, y_pad // 2 + y_pad % 2), (x_pad // 2, x_pad // 2 + x_pad % 2)),
        mode="constant",
    )


def get_cell_centers(cell_mask):
    mask_label = skimage.measure.label(cell_mask)
    stats = skimage.measure.regionprops(mask_label)
    xs = []
    ys = []
    for region in stats:
        centroid = region["centroid"]
        xs.append(centroid[1])
        ys.append(centroid[0])
    return xs, ys


def extract_cell_img_mask(tile_img, tile_mask, img_name):
    img_reader = VirtualWSIReader.open(tile_img)
    mask_reader = VirtualWSIReader.open(tile_mask, mode="bool")

    extractor = get_patch_extractor(
        input_img=img_reader,
        method_name="slidingwindow",
        patch_size=(128, 128),
        resolution=0,
        units="level",
        # min_mask_ratio=0.3
    )

    results = {}
    idx = 0

    for coords in extractor.coordinate_list:
        img_patch = img_reader.read_bounds(coords, resolution=0, units="level")
        mask_patch = mask_reader.read_bounds(coords, resolution=0, units="level")

        img_mask_file = np.zeros(shape=(128, 128, 4), dtype=np.uint8)
        img_mask_file[:, :, 0:3] = img_patch
        img_mask_file[:, :, 3] = mask_patch[:, :, 0]

        non_zeros = np.count_nonzero(mask_patch[:, :, 0])

        img_name_without_ext = os.path.splitext(img_name)[0]

        if non_zeros > 0:
            save_name = f"{img_name_without_ext}_{idx}_p.npy"
        else:
            save_name = f"{img_name_without_ext}_{idx}_n.npy"

        results[save_name] = img_mask_file

        idx += 1

    return results


def erode_cell_mask(mask):
    "Reduce mask size of a single cell from 49 to 29 px"
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    mask = cv2.erode(mask, kernel, iterations=1)
    return mask

In [71]:
fold = 5
fold_save_dir = os.path.join(save_dir, f"fold_{fold}")
os.makedirs(fold_save_dir, exist_ok=True)

fold_tile_dir = os.path.join(tile_dir, f"fold_{fold}")
fold_files = os.listdir(fold_tile_dir)


for tile_file in tqdm(fold_files):
    print(tile_file)
    tile_path = os.path.join(fold_tile_dir, tile_file)
    tile = np.load(tile_path)
    print(tile.shape, tile.dtype)

    tile_img = tile[:, :, 0:3]
    mask = tile[:, :, 3]
    # mask = erode_cell_mask(mask)

    if tile_img.shape[0] < 128 and tile_img.shape[1] < 128:
        tile_img = img_to_shape(tile_img, (128, 128))
        mask = mask_to_shape(mask, (128, 128))

    # xs, ys = get_cell_centers(mask)
    # fig, axes = plt.subplots(1,2)
    # axes[0].imshow(tile_img)
    # axes[0].scatter(xs, ys, s=3, alpha=0.5, color='red')
    # axes[1].imshow(mask)
    # plt.show()

    patch_result = extract_cell_img_mask(tile_img, mask, tile_file)

    for result in patch_result.items():
        fn = result[0]
        patch = result[1]

        save_path = os.path.join(fold_save_dir, fn)
        np.save(save_path, patch)
        # print(fn)
        # fig, axes = plt.subplots(1,2)
        # rgb = patch[:,:,0:3]
        # mask = patch[:,:,3]
        # axes[0].imshow(rgb, alpha=1)
        # axes[0].imshow(mask, cmap='gray', alpha=0.4)
        # axes[1].imshow(mask)
        # plt.show()

  0%|          | 0/49 [00:00<?, ?it/s]



136B_[55424, 17753, 56685, 18932].npy
(1179, 1261, 5) uint8
136B_[64379, 16227, 65677, 17403].npy
(1176, 1298, 5) uint8




136B_[73791, 8482, 75066, 9666].npy
(1184, 1275, 5) uint8
198B_[19017, 3481, 20279, 4695].npy
(1214, 1262, 5) uint8




198B_[6510, 2478, 7759, 3691].npy
(1213, 1249, 5) uint8
198B_[6851, 17444, 8175, 18614].npy
(1170, 1324, 5) uint8




231S_[16115, 21334, 17314, 22502].npy
(1168, 1199, 5) uint8
231S_[28124, 36253, 29320, 37434].npy
(1181, 1196, 5) uint8




231S_[51893, 17063, 53106, 18221].npy
(1158, 1213, 5) uint8
TCGA-A2-A0T0-01Z-00-DX1.npy
(155, 144, 5) uint8
TCGA-A2-A0T2-01Z-00-DX1.npy
(147, 159, 5) uint8
TCGA-A7-A0CE-01Z-00-DX1.npy
(142, 151, 5) uint8
TCGA-A7-A4SD-01Z-00-DX1.npy
(144, 143, 5) uint8




TCGA-A7-A4SE-01Z-00-DX1.npy
(141, 129, 5) uint8
TCGA-AC-A2BK-01Z-00-DX1.npy
(144, 139, 5) uint8
TCGA-AN-A0AR-01Z-00-DX1.npy
(142, 134, 5) uint8
TCGA-AN-A0G0-01Z-00-DX1.npy
(160, 144, 5) uint8
TCGA-AO-A0J2-01Z-00-DX1.npy
(293, 287, 5) uint8
TCGA-AO-A12F-01Z-00-DX1.npy
(309, 307, 5) uint8
TCGA-AQ-A54N-01Z-00-DX1.npy
(152, 155, 5) uint8
TCGA-AR-A0TU-01Z-00-DX1.npy
(149, 152, 5) uint8
TCGA-AR-A0U4-01Z-00-DX1.npy
(146, 142, 5) uint8
TCGA-BH-A0AV-01Z-00-DX1.npy
(138, 145, 5) uint8
TCGA-BH-A0B3-01Z-00-DX1.npy
(143, 140, 5) uint8
TCGA-BH-A0B9-01Z-00-DX1.npy
(132, 149, 5) uint8
TCGA-BH-A0E6-01Z-00-DX1.npy
(134, 142, 5) uint8




TCGA-D8-A13Z-01Z-00-DX1.npy
(138, 144, 5) uint8
TCGA-D8-A147-01Z-00-DX1.npy
(143, 140, 5) uint8
TCGA-D8-A1JF-01Z-00-DX1.npy
(159, 137, 5) uint8
TCGA-EW-A1OV-01Z-00-DX1.npy
(154, 150, 5) uint8
TCGA-EW-A1P1-01Z-00-DX1.npy
(145, 142, 5) uint8
TCGA-GM-A2DH-01Z-00-DX1.npy
(146, 150, 5) uint8
TCGA-LL-A5YO-01Z-00-DX1.npy
(135, 141, 5) uint8
TCGA-OL-A5RW-01Z-00-DX1.npy
(280, 296, 5) uint8
TC_S01_P000015_C0001_B104_[50898, 85713, 52270, 86889].npy
(1176, 1372, 5) uint8




TC_S01_P000015_C0001_B104_[63892, 85360, 65079, 86444].npy
(1084, 1187, 5) uint8
TC_S01_P000015_C0001_B104_[64395, 87751, 65649, 88824].npy
(1073, 1254, 5) uint8




TC_S01_P000054_C0001_B101_[20557, 96591, 21758, 97719].npy
(1128, 1201, 5) uint8
TC_S01_P000054_C0001_B101_[27671, 88267, 28946, 89493].npy
(1226, 1275, 5) uint8




TC_S01_P000054_C0001_B101_[28802, 78876, 29888, 79946].npy
(1070, 1086, 5) uint8
TC_S01_P000057_C0001_B105_[107951, 75246, 109161, 76317].npy
(1071, 1210, 5) uint8




TC_S01_P000057_C0001_B105_[112700, 64564, 113976, 65709].npy
(1145, 1276, 5) uint8
TC_S01_P000057_C0001_B105_[92616, 75953, 93709, 77000].npy
(1047, 1093, 5) uint8




TC_S01_P000147_C0001_B104_[23485, 67614, 24676, 68722].npy
(1108, 1191, 5) uint8
TC_S01_P000147_C0001_B104_[31102, 58696, 32133, 59655].npy
(959, 1031, 5) uint8




TC_S01_P000147_C0001_B104_[44555, 80802, 45637, 81863].npy
(1061, 1082, 5) uint8
TC_S01_P000158_C0001_B101_[102793, 79018, 104054, 80142].npy
(1124, 1261, 5) uint8




TC_S01_P000158_C0001_B101_[103602, 82756, 104838, 83891].npy
(1135, 1236, 5) uint8




TC_S01_P000158_C0001_B101_[114472, 87563, 115698, 88757].npy
(1194, 1226, 5) uint8
