In [3]:
import os
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
from imageio import imread
from imageio import volread
from skimage.transform import rescale
from shutil import copyfile

import tifffile
from tqdm import tqdm
import imageio 

In [24]:
imgs_paths = glob('../dl-course-2022/Project5_Drosophila_microCT_segmentation/images/*.tif') ## browse every image paths names
labels_paths = glob('../dl-course-2022/Project5_Drosophila_microCT_segmentation/labels/*.tif') ## browse every labels paths names

# Load every image


In [25]:
images=[]
labels=[]

for im_file in tqdm(imgs_paths):
    images.append(imread(im_file))
print('Number of images:' , len(images))

for lbl_file in tqdm(labels_paths):
    labels.append(imread(lbl_file))
print('Number of labels:' , len(labels))

100%|██████████| 81/81 [00:48<00:00,  1.67it/s]


Number of images: 81


100%|██████████| 81/81 [00:48<00:00,  1.67it/s]

Number of labels: 81





In [26]:
images = np.concatenate([im[None] for im in images], axis=0)
labels = np.array(labels)

# Rescaled

In [45]:
def to_target_shape(data , target_shape):
    if data.shape == target_shape:
        print('Yes')
        return data
    for dim , (sh , tsh) in enumerate(zip(data.shape , target_shape)):
        if sh == tsh: # Already the target shape
            continue 
        elif sh > tsh: # Need to crop data to obtain the tsh
            crop_dim = tuple(slice(0, tsh) if i == dim else slice(None) for i in range(data.ndim))
            data = data[crop_dim]
        else: # Need to padd - data smaller than target shape
            padd_value = tsh - sh
            padding = [(0, padd_value) if i == dim else (0, 0) for i in range(data.ndim)]
            data = np.pad(data , padding)
    assert data.shape == target_shape
    return data

def rescale_volume(im_path , scale_factor , target_shape , im_output_folder , lbl_output_folder):
    label_path = im_path.replace("images" , "labels").replace("Rec" , "Rec_labels")
    volume = imageio.volread(im_path)
    labels = imageio.volread(label_path)
    if volume.shape != labels.shape:
        return
    volume = rescale(volume , scale=scale_factor , preserve_range=True)
    labels = rescale(labels , scale=scale_factor , order=0 , preserve_range=True, anti_aliasing=False).astype(labels.dtype)
    volume = to_target_shape(volume , target_shape)
    labels = to_target_shape(labels , target_shape)
    #save the new volume in the rescaled_folder:
    im_name = os.path.split(im_path)[1]
    imageio.volwrite(os.path.join(im_output_folder , im_name ) ,  volume)
    lbl_name = os.path.split(label_path)[1]
    imageio.volwrite(os.path.join(lbl_output_folder , lbl_name) ,  labels)
             
def rescaled_data(input_folder , dataset_folder):
    output_folder = os.path.join(input_folder , 'rescaled')
    im_resc_folder = os.path.join(output_folder , 'images')
    labels_resc_folder = os.path.join(output_folder , 'labels')
    os.makedirs(im_resc_folder , exist_ok=True)
    os.makedirs(labels_resc_folder , exist_ok=True)
    image_paths = glob(os.path.join(dataset_folder, "images", "*.tif"))
    scale_factor = (1. / 8, 1. / 4, 1. / 4)
    target_shape = (128,) * 3
    for im_path in tqdm(image_paths):
        rescale_volume(im_path , scale_factor , target_shape , im_resc_folder , labels_resc_folder)
    return output_folder

In [46]:
input_folder = os.getcwd()
dataset_path = '../dl-course-2022/Project5_Drosophila_microCT_segmentation/'
# Rescaled dataset
output_folder = rescaled_data(input_folder , dataset_path)

100%|██████████| 81/81 [26:58<00:00, 19.98s/it]


In [44]:
import shutil
#shutil.rmtree(output_folder)

# Make splits

In [48]:
# Make split
def _make_split(output_folder):
        image_paths = glob(os.path.join(output_folder , 'images', '*.tif'))
        n_images = len(image_paths)
        n_train = int(n_images * 0.7)
        n_validation = int(n_images * 0.8)
        
        os.makedirs(os.path.join(output_folder , "train" , "images") , exist_ok=True)
        os.makedirs(os.path.join(output_folder , "train" , "labels") , exist_ok=True)

        os.makedirs(os.path.join(output_folder , "validation" , "images") , exist_ok=True)
        os.makedirs(os.path.join(output_folder , "validation" , "labels") , exist_ok=True)

        os.makedirs(os.path.join(output_folder , "test" , "images") , exist_ok=True)
        os.makedirs(os.path.join(output_folder , "test" , "labels") , exist_ok=True)

        for index , im_path in enumerate(image_paths):
            lbl_path = im_path.replace("images", "labels").replace("Rec.tif", "Rec_labels.tif")
            im_name = os.path.split(im_path)[1]
            lbl_name = os.path.split(lbl_path)[1]

            if index < n_train:
                copyfile(im_path , os.path.join(output_folder ,"train", "images" , im_name))
                copyfile(lbl_path , os.path.join(output_folder ,"train" , "labels" , lbl_name))

            elif index < n_validation:
                copyfile(im_path , os.path.join(output_folder , "validation" ,  "images" , im_name))
                copyfile(lbl_path , os.path.join(output_folder ,"validation" , "labels" , lbl_name))

            elif index < n_images:
                copyfile(im_path , os.path.join(output_folder , "test" , "images" , im_name))
                copyfile(lbl_path , os.path.join(output_folder ,"test" , "labels" , lbl_name))

        

_make_split(os.path.join(output_folder))