# 1st NOTEBOOK: Data preparation notebook

1st notebook to train a Unet
Use this notebook to prepare your data for training the Unet

Create ROIs from your analysis create an RGB image of the same size of the input image with "red" Cryoinjury, "green" ventricle and "blue" background.

![20220607__69_czi__s10.tif](attachment:c85616e2-b6a3-4558-8d29-b6923b16f713.tif)

Pay attention that the your images and corresponding annotation must have identical names!

Steps to perform before using this notebook:
1. Create a folder "Data"
2. In "Data" create one folder "Raw_Images" and one "Raw_Annotations" 
2. Copy your images in a folder ".../Data/Raw_Images"
3. Copy your annotations in folder ".../Data/Raw_Annotations"

You can either create a new training set or add data to an already existing training set

# Load the packages

In [1]:
%load_ext autoreload

In [2]:
%autoreload
import random
import os
import filecmp
import numpy as np
import shutil

from skimage.io import imread,imshow,imsave
from skimage.transform import resize
import matplotlib.pyplot as plt


import sys
sys.path.append(r'.\Modules')

import trainDataLoader as TDL

# 1. Enter the path to "Data"

In [3]:
# You have to enter the path to the folder "Data"
path_data = r"D:\Prateek\Nick\DeepLearningTraining"

path_raw_annotation = os.path.join(path_data, "Raw_Annotations")
print(path_raw_annotation)
path_raw_images = os.path.join(path_data, "Raw_Images")
print(path_raw_images)

D:\Prateek\Nick\DeepLearningTraining\Raw_Annotations
D:\Prateek\Nick\DeepLearningTraining\Raw_Images


In [4]:
# Compare the two new folders, each image should have an annotation and vice versa
result = filecmp.dircmp(path_raw_images, path_raw_annotation)

if result.left_only:
    print("These images have no annotation: ",result.left_only)
if result.right_only:
    print("These annotations have no images: ",result.right_only)
else:
    print("Annotations and Images match")

Annotations and Images match


# 2. Delete non-corresponding images

In [5]:
# If you execute this, the images without annotation will be deleted and vice versa
for i in result.left_only:
    os.remove(os.path.join(path_raw_images, i))

for i in result.right_only:
    os.remove(os.path.join(path_raw_annotation, i))

# 3. If the folders "Annotations" or "Images" do not exist, they will be created here

In [6]:
new_folders = {"Annotations": ["Annotations","Annotations_Validation","Annotations_Test"],
                "Images":["Images", "Images_Validation", "Images_Test"]
                }
for i in new_folders.items():
    folder = os.path.join(path_data, i[0])
    if not os.path.exists(folder):
        os.mkdir(folder)
        [os.mkdir(os.path.join(folder,f)) for f in i[1]]
        print("The folders were created:", i)
    else:
        print("The folder exists already:", folder)

The folders were created: ('Annotations', ['Annotations', 'Annotations_Validation', 'Annotations_Test'])
The folders were created: ('Images', ['Images', 'Images_Validation', 'Images_Test'])


# 4. Distribute the raw_images and raw_annotations to the folders

In [7]:
# Enter the percentage of data you want to use for validation and testing
random.seed(42)
percent_validation = 0.25
percent_test = 0.1


test = 1 - percent_test
valid = 1 - percent_test - percent_validation

# Create a list of all files and shuffle them before distributing them
files = os.listdir(path_raw_images)
random.shuffle(files)

train, validate, test = np.split(files, [int(len(files)*valid), int(len(files)*test)])

distribute_data = {"Annotations": {"Annotations": train,"Annotations_Validation":validate,"Annotations_Test": test},
                    "Images":{"Images": train, "Images_Validation": validate, "Images_Test": test}
                    }

# Execute to transfer data

In [8]:
#Execute to transfer data
for data in distribute_data:
    if data == "Annotations": 
        ori = os.path.join(path_data,path_raw_annotation)
    else:
        ori = os.path.join(path_data,path_raw_images)
    
    for folders in distribute_data[data]:
        print("Copying the raw {} to folder: {}".format(data,folders))
        for images in distribute_data[data][folders]:
            
            data_ori = os.path.join(ori, images)
            
            data_dest = os.path.join(path_data,data, folders, images)
            
            
            shutil.copyfile(data_ori,data_dest)
            
            im = imread(data_dest)
            im_out = TDL.resize_trainingdata(im, mask=data)
        
            imsave(data_dest, im_out, check_contrast=False)
        
        print(im.shape, im.max(), im_out.shape, im_out.max())
        print("{} || {}".format(data_ori, data_dest))
        
        

Copying the raw Annotations to folder: Annotations
(6001, 6001, 3) 255 (512, 512, 3) 1
D:\Prateek\Nick\DeepLearningTraining\Raw_Annotations\2022-09-06%2017.47.04_3.tif || D:\Prateek\Nick\DeepLearningTraining\Annotations\Annotations\2022-09-06%2017.47.04_3.tif
Copying the raw Annotations to folder: Annotations_Validation
(6001, 6001, 3) 255 (512, 512, 3) 1
D:\Prateek\Nick\DeepLearningTraining\Raw_Annotations\2022-09-07%2016.03.39_6.tif || D:\Prateek\Nick\DeepLearningTraining\Annotations\Annotations_Validation\2022-09-07%2016.03.39_6.tif
Copying the raw Annotations to folder: Annotations_Test
(6001, 6001, 3) 255 (512, 512, 3) 1
D:\Prateek\Nick\DeepLearningTraining\Raw_Annotations\2022-09-06%2020.11.38_2.tif || D:\Prateek\Nick\DeepLearningTraining\Annotations\Annotations_Test\2022-09-06%2020.11.38_2.tif
Copying the raw Images to folder: Images
(6001, 6001, 3) 255 (512, 512, 3) 254
D:\Prateek\Nick\DeepLearningTraining\Raw_Images\2022-09-06%2017.47.04_3.tif || D:\Prateek\Nick\DeepLearningTr

# 5. Check whether there are images with a different size in the folders.

In [9]:
for data in distribute_data:
     for folders in distribute_data[data]:
        print("In :",folders)
        data_folders = os.path.join(path_data, data, folders)    
        check = TDL.check_images(data_folders)
        print(check)

In : Annotations
None
In : Annotations_Validation
None
In : Annotations_Test
None
In : Images
None
In : Images_Validation
None
In : Images_Test
None


# If you used this tool to transfer files, no images should have the wrong shape
but if some are wrong the <b>TDL.resize_trainingdata(image)</b> function can be used to convert them 

# 6. Convert masks and images to the correct datatype

In [None]:
for data in distribute_data:
    for folders in distribute_data[data]:
        folder_path = os.path.join(path_data, data, folders)
        for images in os.listdir(folder_path):

            data_ = os.path.join(folder_path, images)

            im = imread(data_)
            
            if im.shape != (512,512,3):
                im_out = TDL.resize_trainingdata(im, mask=data)
                print(im.max(),np.unique(im))
                imsave(data_, im_out, check_contrast=False)
                
            if ((im.max() != 1) & (data == "Annotations")):
                im_out = TDL.resize_trainingdata(im, mask=data)
                print(data_)
                print(im.max(),np.unique(im))
                print(im_out.max(),np.unique(im_out))
                imsave(data_, im_out, check_contrast=False)

# Now proceed to the next notebook <b>SegModels_Training.ipynb</b>