In [6]:
import numpy as np
import os
from skimage.io import imread, imsave
from skimage import draw
from scipy.io import loadmat
from read_roi import read_roi_zip
import xml.etree.ElementTree as ET
from scipy.ndimage import label

In [17]:
path_raw = r"\\10.99.68.53\Digital pathology image lib\_Image libraries for training\2023-05-09 Published HE Nuclei Datasets\_Raw Downloads"
path_40x = r"\\10.99.68.53\Digital pathology image lib\_Image libraries for training\2023-05-09 Published HE Nuclei Datasets\40x Native"

In [18]:
# CoNSeP
path_raw_dataset = os.path.join(path_raw, 'CoNSeP')
dataset = {'Tile Names': [], 'Images': [], 'Masks': []}
for filename in os.listdir(path_raw_dataset):
    if filename == 'Train' or filename == 'Test':
        path_test_train = os.path.join(path_raw_dataset, filename)
        path_imgs = os.path.join(path_test_train, 'Images')
        path_msks = os.path.join(path_test_train, 'Labels')
        for tile_name in os.listdir(path_imgs):
            if tile_name.endswith('.png'):
                path_img = os.path.join(path_imgs, tile_name)
                path_msk = os.path.join(path_msks, tile_name[:-4] + '.mat')
                img = imread(path_img)[:, :, 0:3].astype(np.uint8)  # RGBAs with A=255
                msk = loadmat(path_msk)['inst_map'].astype(np.uint16)
                dataset['Tile Names'].append(tile_name[:-4])
                dataset['Images'].append(img)
                dataset['Masks'].append(msk)

path_40x_dataset = os.path.join(path_40x, 'CoNSep')
path_imgs = os.path.join(path_40x_dataset, 'images')
path_msks = os.path.join(path_40x_dataset, 'masks')
for i, name in enumerate(dataset['Tile Names']):
    img, msk = dataset['Images'][i], dataset['Masks'][i]
    imsave(os.path.join(path_imgs, name + '.tif'), img)
    imsave(os.path.join(path_msks, name + '.tif'), msk, check_contrast=False)

In [23]:
# CryoNuSeg
path_raw_dataset = os.path.join(path_raw, 'CryoNuSeg')
path_imgs = os.path.join(path_raw_dataset, 'tissue images')
path_msks = os.path.join(path_raw_dataset, 'Imagj_zips')
dataset = {'Tile Names': [], 'Images': [], 'Masks': []}
for tile_name in os.listdir(path_imgs):
    if tile_name.endswith('.tif'):
        img_path = os.path.join(path_imgs, tile_name)
        zip_path = os.path.join(path_msks, tile_name[:-4] + '.zip')
        img = imread(img_path).astype(np.uint8)
        rois = read_roi_zip(zip_path)
        msk = np.zeros(img.shape[:2], dtype=np.uint16)
        for i, key in enumerate(list(rois.keys())):
            x, y = rois[key]['x'], rois[key]['y']
            x_crds_fill, y_crds_fill = draw.polygon(x, y, msk.shape)
            msk[x_crds_fill, y_crds_fill] = i + 1
        msk = msk.T.astype(np.uint16)
        dataset['Tile Names'].append(tile_name[:-4])
        dataset['Images'].append(img)
        dataset['Masks'].append(msk)
   
path_40x_dataset = os.path.join(path_40x, 'CryoNuSeg')
path_imgs = os.path.join(path_40x_dataset, 'images')
path_msks = os.path.join(path_40x_dataset, 'masks')
for i, name in enumerate(dataset['Tile Names']):
    img, msk = dataset['Images'][i], dataset['Masks'][i]
    imsave(os.path.join(path_imgs, name + '.tif'), img)
    imsave(os.path.join(path_msks, name + '.tif'), msk, check_contrast=False)

In [26]:
# MoNuSeg
path_raw_dataset = os.path.join(path_raw, 'MoNuSeg')
dataset = {'Tile Names': [], 'Images': [], 'Masks': []}
for item_name in os.listdir(path_raw_dataset):
    if item_name == 'MoNuSeg_Train' or item_name == 'MoNuSeg_Test':
        path_test_train = os.path.join(path_raw_dataset, item_name)
        for tile_name in os.listdir(path_test_train):
            if tile_name.endswith('.tif'):
                path_img = os.path.join(path_test_train, tile_name)
                img = imread(path_img).astype(np.uint8)
                msk = np.zeros(img.shape[:2], dtype=np.uint16)
                forest = ET.parse(os.path.join(path_test_train, tile_name[:-4] + '.xml'))
                tree = forest.getroot()
                count = 0
                for branch in tree:
                    for twig in branch:
                        for leaf in twig:
                            for vein in leaf:
                                if vein.tag == 'Vertices':
                                    count += 1
                                    trace = np.zeros((len(vein), 2))
                                    for i, vertex in enumerate(vein):
                                        trace[i][0] = vertex.attrib['X']
                                        trace[i][1] = vertex.attrib['Y']
                                    x, y = trace[:, 0], trace[:, 1]
                                    x_crds_fill, y_crds_fill = draw.polygon(x, y, msk.shape)
                                    msk[x_crds_fill, y_crds_fill] = count + 1
                msk = msk.T.astype(np.uint16)
                dataset['Tile Names'].append(tile_name[:-4])
                dataset['Images'].append(img)
                dataset['Masks'].append(msk)

path_40x_dataset = os.path.join(path_40x, 'MoNuSeg')
path_imgs = os.path.join(path_40x_dataset, 'images')
path_msks = os.path.join(path_40x_dataset, 'masks')
for i, name in enumerate(dataset['Tile Names']):
    img, msk = dataset['Images'][i], dataset['Masks'][i]
    imsave(os.path.join(path_imgs, name + '.tif'), img)
    imsave(os.path.join(path_msks, name + '.tif'), msk, check_contrast=False)

In [29]:
# TNBC
path_raw_dataset = os.path.join(path_raw, 'TNBC')
dataset = {'Tile Names': [], 'Images': [], 'Masks': []}
for item_name in os.listdir(path_raw_dataset):
    base, num = item_name.split('_')
    if base == 'Slide':
        path_imgs = os.path.join(path_raw_dataset, item_name)
        path_msks = os.path.join(path_raw_dataset, 'GT_' + num)
        for tile_name in os.listdir(path_imgs):
            if tile_name.endswith('png'):
                img_path = os.path.join(path_imgs, tile_name)
                msk_path = os.path.join(path_msks, tile_name)
                img = imread(img_path)[:, :, 0:3].astype(np.uint8)  # RGBAs with A=255
                msk = imread(msk_path)
                msk, _ = label(msk)
                msk = np.asarray(msk).astype(np.uint16)
                dataset['Tile Names'].append(tile_name[:-4])
                dataset['Images'].append(img)
                dataset['Masks'].append(msk)

path_40x_dataset = os.path.join(path_40x, 'TNBC')
path_imgs = os.path.join(path_40x_dataset, 'images')
path_msks = os.path.join(path_40x_dataset, 'masks')
for i, name in enumerate(dataset['Tile Names']):
    img, msk = dataset['Images'][i], dataset['Masks'][i]
    imsave(os.path.join(path_imgs, name + '.tif'), img)
    imsave(os.path.join(path_msks, name + '.tif'), msk, check_contrast=False)