In [17]:
import random

import os
import sys

import numpy as np
import torch

from PIL import Image

from skimage import io
import matplotlib.pyplot as plt

In [18]:
data_path = os.curdir + os.sep + "data" + os.sep + "Dataset011_Cell"
train_image_dir_path = data_path + os.sep + "imagesTr"
train_label_dir_path = data_path + os.sep + "labelsTr"

external_image_dir_path = data_path + os.sep + "imagesTs-External"
internal_image_dir_path = data_path + os.sep + "imagesTs-Internal"

In [25]:
train_images = os.listdir(train_image_dir_path)
train_images_paths = [train_image_dir_path + os.sep + image for image in train_images]

train_labels = os.listdir(train_label_dir_path)
train_labels_paths = [train_label_dir_path + os.sep + label for label in train_labels]

external_images = os.listdir(external_image_dir_path)
external_images_paths = [external_image_dir_path + os.sep + image for image in external_images]

internal_images = os.listdir(internal_image_dir_path)
internal_images_paths = [internal_image_dir_path + os.sep + image for image in internal_images]

# Rename the images to be compatabile with nnUNet framework.

In [20]:
def rename_images(image_paths, label_paths=None):

    image_dir = f"{os.sep}".join(image_paths[0].split(os.sep)[:-1])
    if label_paths:
        label_dir = f"{os.sep}".join(label_paths[0].split(os.sep)[:-1])

    for indx in range(len(image_paths)):
        image_extension = image_paths[indx].split(".")[-1]
        new_image_name = f"cell_{str(indx).zfill(3)}_0000.{image_extension}"
        os.rename(image_paths[indx], image_dir + os.sep + new_image_name)
        
        if label_paths:
            label_extension = label_paths[indx].split(".")[-1]
            new_label_name = f"cell_{str(indx).zfill(3)}.{label_extension}"
            os.rename(label_paths[indx], label_dir + os.sep + new_label_name) 

rename_images(train_images_paths, train_labels_paths)
rename_images(external_images_paths)
rename_images(internal_images_paths)

# Convert from tiff to png to standardize them for nnUNet framework, as it expects them to be of the same format.
### Note: If the alpha dimension from tiff was important here, we could find ways of converting from png to tiff, as opposed to the other way around.
### Also note: If we were not using nnUNet, we could possibly process both image formats using two different input layers, keeping everything in the backbone the same.

In [22]:
def convert_to_png(image_paths, convert_from="tiff"):
    dir = os.path.dirname(image_paths[0])
    for image_path in image_paths:
        image_name = os.path.basename(image_path)
        if convert_from in image_name or convert_from == "all": 
            image_name = f"{os.sep}".join(image_name.split(".")[:-1]) + ".png"
            img = Image.open(image_path)
            img.convert('RGB').save(os.path.join(dir, image_name), "PNG")
            img.close()
            os.remove(image_path)

# Covert both jpg and tiff images to png.
convert_to_png(train_images_paths, convert_from="jpg")
convert_to_png(train_images_paths, convert_from="tiff")

convert_to_png(external_images_paths, convert_from="all")
convert_to_png(internal_images_paths, convert_from="all")

./data/Dataset011_Cell/imagesTr
./data/Dataset011_Cell/imagesTr


# Split data into train and validation.
### **Only do this after nnUNet data preprocessing.**

In [26]:
val_images_path = data_path + os.sep + "imagesVl" 
val_labels_path = data_path + os.sep + "labelsVl"

os.makedirs(val_images_path, exist_ok=True)
os.makedirs(val_labels_path, exist_ok=True)

In [27]:
import shutil

n_images = len(train_images)
val_indices_list = np.arange(0, n_images, n_images/int(n_images * 0.2)).round().astype("int")
val_images_paths = np.array(train_images_paths)[val_indices_list]
val_labels_paths = np.array(train_labels_paths)[val_indices_list]

for path in val_images_paths:
    shutil.move(path, val_images_path)

for path in val_labels_paths:
    shutil.move(path, val_labels_path)