# Imports and parameters

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import imageio
from tqdm import tqdm

from PIL import Image

In [2]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
    # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


# YOUR PARAMETERS

In [4]:
# PLEASE FILL YOUR FOLDERS HERE

SIZE = 410 # crops width and height
N_THREADS = 32 # Number of threads

images_folder = "data/train"
cell_masks_folder = "data/hpa_cell_mask"
nuclei_masks_folder = "data/hpa_nuclei_mask/"
destination_folder = "C:/cell_crops"

csv_path = "data/train_bboxes.csv"

#### THANK YOU

In [5]:
images_folder = images_folder + ("/" if images_folder[-1] != "/" else "")
cell_masks_folder = cell_masks_folder + ("/" if cell_masks_folder[-1] != "/" else "")
nuclei_masks_folder = nuclei_masks_folder + ("/" if nuclei_masks_folder[-1] != "/" else "")
desitnation_folder = destination_folder + ("/" if destination_folder[-1] != "/" else "")

In [6]:
classes = ["Nucleoplasm",
"Nuclear membrane",
"Nucleoli",
"Nucleoli fibrillar center",
"Nuclear speckles",
"Nuclear bodies",
"Endoplasmic reticulum",
"Golgi apparatus",
"Intermediate filaments",
"Actin filaments",
"Microtubules",
"Mitotic spindle",
"Centrosome",
"Plasma membrane",
"Mitochondria",
"Aggresome",
"Cytosol",
"Vesicles and punctate cytosolic patterns",
"Negative"]

# Try

In [7]:
df = pd.read_csv(csv_path, index_col="ID").rename(columns={"Unnamed: 0": "new_index"})

In [8]:
df

Unnamed: 0_level_0,new_index,old_index,Label,image_height,image_width,boxes_height,boxes_width,boxes
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
5e22a522-bb99-11e8-b2b9-ac1f6b6435d0,0,5,0,3072,3072,"[217, 379, 207, 351, 307, 743, 399, 231, 255, ...","[583, 273, 367, 331, 139, 283, 383, 219, 499, ...","[[0, 217, 1798, 2381], [114, 493, 0, 273], [64..."
5f79a114-bb99-11e8-b2b9-ac1f6b6435d0,1,6,14,2048,2048,"[433, 599, 165, 558, 735, 771, 669, 583, 767, ...","[605, 606, 699, 609, 617, 447, 452, 469, 610, ...","[[0, 433, 0, 605], [0, 599, 568, 1174], [0, 16..."
5c801c04-bb99-11e8-b2b9-ac1f6b6435d0,2,9,14,3072,3072,"[249, 285, 315, 335, 306, 399, 415, 289, 277, ...","[331, 543, 537, 339, 275, 195, 319, 137, 276, ...","[[0, 249, 42, 373], [0, 285, 1894, 2437], [226..."
5e9afd56-bb99-11e8-b2b9-ac1f6b6435d0,3,10,0,2048,2048,"[146, 261, 288, 368, 269, 211, 156, 208, 195, ...","[205, 209, 283, 129, 231, 179, 210, 162, 126, ...","[[0, 146, 455, 660], [0, 261, 635, 844], [5, 2..."
5f1af6b4-bb99-11e8-b2b9-ac1f6b6435d0,4,11,3,2048,2048,"[365, 525, 162, 273, 279, 323, 469, 310, 316, ...","[884, 415, 582, 708, 485, 429, 557, 286, 475, ...","[[0, 365, 0, 884], [0, 525, 766, 1181], [0, 16..."
...,...,...,...,...,...,...,...,...
d9d99186-bbca-11e8-b2bc-ac1f6b6435d0,10407,21798,3,2048,2048,"[845, 173, 299, 455, 587, 1127, 659, 811, 395,...","[1093, 607, 437, 655, 741, 387, 483, 265, 422,...","[[0, 845, 0, 1093], [0, 173, 1122, 1729], [70,..."
daa22470-bbca-11e8-b2bc-ac1f6b6435d0,10408,21799,0,2048,2048,"[1149, 909, 439, 623, 940, 721, 625]","[463, 815, 735, 1057, 536, 223, 451]","[[0, 1149, 370, 833], [0, 909, 666, 1481], [75..."
dc261180-bbca-11e8-b2bc-ac1f6b6435d0,10409,21800,6,2048,2048,"[165, 288, 329, 275, 260, 259, 358, 431, 387, ...","[378, 182, 291, 411, 266, 157, 259, 291, 365, ...","[[0, 165, 78, 456], [0, 288, 459, 641], [0, 32..."
dd0989c4-bbca-11e8-b2bc-ac1f6b6435d0,10410,21801,14,2048,2048,"[357, 325, 425, 345, 515, 527, 251, 424, 343, ...","[399, 512, 591, 695, 461, 437, 333, 507, 615, ...","[[0, 357, 0, 399], [0, 325, 249, 761], [0, 425..."


# Label data

In [9]:
one_hot_labels = tf.convert_to_tensor(pd.get_dummies(df.Label).values)
one_hot_labels

<tf.Tensor: shape=(10412, 19), dtype=uint8, numpy=
array([[1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)>

In [10]:
bboxes = df.boxes.apply(eval).to_list()
bboxes

[[[0, 217, 1798, 2381],
  [114, 493, 0, 273],
  [646, 853, 1954, 2321],
  [1382, 1733, 194, 525],
  [1778, 2085, 1986, 2125],
  [1810, 2553, 806, 1089],
  [2122, 2521, 1314, 1697],
  [2418, 2649, 98, 317],
  [2522, 2777, 1178, 1677],
  [2662, 2901, 0, 121]],
 [[0, 433, 0, 605],
  [0, 599, 568, 1174],
  [0, 165, 878, 1577],
  [83, 641, 1088, 1697],
  [374, 1109, 524, 1141],
  [482, 1253, 0, 447],
  [527, 1196, 247, 699],
  [534, 1117, 1578, 2047],
  [578, 1345, 1042, 1652],
  [1120, 1633, 114, 725],
  [1126, 1957, 377, 1109],
  [1214, 1805, 1218, 1901],
  [1303, 2047, 688, 1309],
  [1502, 2047, 1826, 2047],
  [1514, 2047, 0, 508]],
 [[0, 249, 42, 373],
  [0, 285, 1894, 2437],
  [226, 541, 2534, 3071],
  [298, 633, 1714, 2053],
  [746, 1052, 1050, 1325],
  [834, 1233, 1594, 1789],
  [842, 1257, 2670, 2989],
  [966, 1255, 974, 1111],
  [984, 1261, 1031, 1307],
  [994, 1469, 2026, 2333],
  [2162, 2429, 1434, 1733],
  [2174, 2461, 506, 693],
  [2230, 2537, 2574, 2925],
  [2270, 2665, 54, 33

In [11]:
filenames = df.index.tolist()
filenames

['5e22a522-bb99-11e8-b2b9-ac1f6b6435d0',
 '5f79a114-bb99-11e8-b2b9-ac1f6b6435d0',
 '5c801c04-bb99-11e8-b2b9-ac1f6b6435d0',
 '5e9afd56-bb99-11e8-b2b9-ac1f6b6435d0',
 '5f1af6b4-bb99-11e8-b2b9-ac1f6b6435d0',
 '5fb9edb4-bb99-11e8-b2b9-ac1f6b6435d0',
 '60bfc742-bb99-11e8-b2b9-ac1f6b6435d0',
 '5f005cfa-bb99-11e8-b2b9-ac1f6b6435d0',
 '5beb608c-bb99-11e8-b2b9-ac1f6b6435d0',
 '636e164c-bb99-11e8-b2b9-ac1f6b6435d0',
 '5b99d3e8-bb99-11e8-b2b9-ac1f6b6435d0',
 '6074a6a4-bb99-11e8-b2b9-ac1f6b6435d0',
 '64ab5506-bb99-11e8-b2b9-ac1f6b6435d0',
 '643f73a4-bb99-11e8-b2b9-ac1f6b6435d0',
 '66552404-bb99-11e8-b2b9-ac1f6b6435d0',
 '670096d6-bb99-11e8-b2b9-ac1f6b6435d0',
 '68ad8444-bb99-11e8-b2b9-ac1f6b6435d0',
 '6a70893e-bb99-11e8-b2b9-ac1f6b6435d0',
 '6d856b08-bb99-11e8-b2b9-ac1f6b6435d0',
 '6afe63b2-bb99-11e8-b2b9-ac1f6b6435d0',
 '7072246e-bb99-11e8-b2b9-ac1f6b6435d0',
 '6f2690b8-bb99-11e8-b2b9-ac1f6b6435d0',
 '739453ba-bb99-11e8-b2b9-ac1f6b6435d0',
 '6f303dfc-bb99-11e8-b2b9-ac1f6b6435d0',
 '6fa7b67a-bb99-

In [12]:
filelist_ds = tf.data.Dataset.from_tensor_slices(filenames)

In [13]:
filelist_ds.element_spec

TensorSpec(shape=(), dtype=tf.string, name=None)

# Pre-process

In [14]:
##### NEGATIVE = classes.index("Negative")
NEGATIVE_tensor = tf.reshape(tf.one_hot(18, len(classes), dtype=tf.uint8), (1, -1))

def get_bboxes(file_path):
    return tf.convert_to_tensor(eval(df.loc[file_path.numpy().decode("UTF-8"), "boxes"]))
    
def get_label(file_path):
    return one_hot_labels[df.loc[file_path.numpy().decode("UTF-8"), "new_index"]]

def get_nuc_mask(file_path):
    return tf.convert_to_tensor(np.load(nuclei_masks_folder+file_path.numpy().decode('UTF-8')+".npz")["arr_0"].astype(np.uint8))

def get_cell_mask(file_path):
    return tf.convert_to_tensor(np.load(cell_masks_folder+file_path.numpy().decode('UTF-8')+".npz")["arr_0"].astype(np.uint8))

def get_image(file_path):
    return tf.convert_to_tensor(imageio.imread((images_folder+file_path.numpy().decode('UTF-8')+"_green.png")))
    
def crop_tensor(tensor: tf.Tensor, bbox: list, filter_in : int = None):
    crop = tensor[bbox[0]:bbox[1], bbox[2]: bbox[3]]
    if filter_in:
        crop = tf.where(crop == filter_in, crop, 0)
    return crop

def add_padding(crop: tf.Tensor, size: int):
    """
    Returns a tensor with added border.
    Made to positively resize a Tensor, preserving the ratio and the original size.

    :param crop: A 3D tensor, aka an image, to resize.
    :type crop: tensorflow.Tensor (dtype=tf.uint, ndims=3)
    :param size: Wanted size. Will apply to width and height, aka a square base.
    :return: A larger tensor.
    :rtype: tensorflow.Tensor
    """

    h, w = crop.shape[:2]
    dy, dx = (size - h) // 2, (size - w) // 2
    ret = tf.pad(crop, ((dy, size - h - dy), (dx, size - w - dx), (0, 0)))
    return ret


def resize_crop(crop: tf.Tensor, size: int):
    """
    Resize a 3D tensor to a fixed square. Respects the proportions, thus doesn't distort the image.
    If the tensor needs to be enlarged, it will add padding.
    If the tensor needs to be smaller, it will be resized with a bit of fast interpolation.

    :param crop: A 3D tensor, aka an image, to resize.
    :type crop: tensorflow.Tensor (dtype=tf.uint8, ndims=3)
    :param size: Wanted size. Will apply to width and height, aka a square base.
    :return: A resized tensor.
    :rtype: tensorflow.Tensor
    """

    if (tf.shape(crop)[:2] > size).numpy().any():
        return tf.image.resize_with_pad(crop, size, size, method=tf.image.ResizeMethod.AREA)

    return add_padding(crop, size)


In [15]:
def get_crops(image_path: tf.Tensor, wipe_other_cells: bool = True, wipe_other_green: bool = False, padding: float = 0.01):
    cell_tensor = get_cell_mask(image_path)
    nuclei_tensor = get_nuc_mask(image_path)
    cell_no_nuclei_tensor = tf.math.subtract(cell_tensor, nuclei_tensor)
    green = get_image(image_path)

    bboxes = get_bboxes(image_path)
    ret = None
    for i, bbox in enumerate(bboxes):
        red_crop = crop_tensor(cell_no_nuclei_tensor, bbox, i+1 if wipe_other_cells else None)
        blue_crop = crop_tensor(nuclei_tensor, bbox, i+1 if wipe_other_cells else None)
        green_crop = crop_tensor(green, bbox)
        
        if wipe_other_cells and wipe_other_green:
            total_cell_crop = crop_tensor(cell_tensor, bbox)
            green_crop = tf.where(tf.math.logical_and(total_cell_crop > 0, total_cell_crop != i+1), 0, green_crop)

        final_im = tf.stack([red_crop*255, green_crop, blue_crop*255], 2)
        final_im = tf.dtypes.cast(resize_crop(final_im, SIZE), tf.uint8)
        final_im = tf.expand_dims(final_im, axis=0)
        
        if final_im.shape[1] != SIZE or final_im.shape[2] != SIZE:
            print(f"Weird shape : {final_im.shape} for image : {image_path}.")
            ret = tf.zeros([1, SIZE, SIZE, 3], dtype=tf.uint8)

        elif ret is None:
            ret = final_im
        else:
            ret = tf.concat((ret, final_im), axis=0)

    if ret is None: 
        ret = tf.zeros([1, SIZE, SIZE, 3], dtype=tf.uint8)
    
    return ret

# Process

In [16]:
ds_batched = filelist_ds.map(lambda x: (tf.py_function(func=get_crops,
                                        inp=[x, True, True],
                                        Tout=(tf.uint8))),num_parallel_calls = 32).prefetch(1000)

In [17]:
for image, filename in tqdm(zip(ds_batched, filenames), total=len(filenames)):
    for i, crop in enumerate(image):
        tf.keras.preprocessing.image.save_img(f"{destination_folder}/{filename}_{str(i)}.png",crop)

100%|██████████| 10412/10412 [1:16:56<00:00,  2.26it/s]


# RESIZE ONLY

In [None]:
filelist_ds = tf.data.Dataset.from_tensor_slices(filenames)

def resize_custom(img: tf.Tensor, size: int):
    if (tf.shape(img)[:2] > size).numpy().any():
        return tf.image.resize_with_pad(img, size, size, method=tf.image.ResizeMethod.AREA)

ds_batched = filelist_ds.map(lambda x: (tf.py_function(func=resize_custom,
                                        inp=[x],
                                        Tout=(tf.uint8))),num_parallel_calls = N_THREAD).prefetch(1000)
                                        
for image, filename in tqdm(zip(ds_batched, filenames), total=len(filenames)):
    tf.keras.preprocessing.image.save_img(f"{destination_folder}/{filename}}.png",image)