In [10]:
#Note:
#In this tutorial you will learn how to use CAC Python library for image segmentation using U-Net
#We provide data consisting of 3 original images and 3 binary labels with the size 512 x 512 pixels

In [11]:
#Import external libraries
import numpy
import os
import shutil
import time

In [12]:
#Import CAC python library
from cac import model, trainer, utilities

In [13]:
#Before creating training and test set, we need to check whether both our image and label have the same size
#
#For checking the image, we use this function: utilities.read_single_image(filename=None, forced_size=None, scaled=False, info=False)
#filename refers to the image file that you want to load, the supported format follows the current opencv capabilities
#forced_size is to resize the loaded image using opencv tool
#scaled is to scale the value of the image to 0 - 1
#info is to provide any information regarding the resizing result
#
#You can also use this function for later prediction or visualization 
#Since we do not need to change the size, we set forced_size=None
checked_image = utilities.read_single_image('data/images/0.png', forced_size=None, scaled=False, info=True)

In [14]:
#For checking the label, we use this function: utilities.read_single_label(filename=None, forced_size=None, scaled=False, info=False)
#filename refers to the label file that you want to load, the supported format follows the current opencv capabilities
#forced_size is to resize the loaded label using opencv tool
#scaled is to scale the value of the image to 0 - 1
#info is to provide any information regarding the resizing result
#
#You can also use this function for later prediction or visualization 
#Since we do not need to change the size, we set forced_size=None
checked_label = utilities.read_single_label('data/labels/0.png', forced_size=None, scaled=False, info=True)

In [15]:
#Once we are sure with the quality of our images and labels, we then create an image and label pool
#
#For creating image pool, we use this function: utilities.read_images_from_folder(folder_dir=None, forced_size=None, scaled=True)
#folder_dir refers to folder containing all images for training, we recommend to use enumeration to name your images
#forced_size is to resize the loaded image using opencv tool
#scaled is to scale the value of the image to 0 - 1
#
#We need to change the size to reduce the computational burden later, here we use 64 x 64 just for tutorial purpose!!!
#Remember, the size of the image should be a factor of 2
image_pool    = utilities.read_images_from_folder('data/images', forced_size=(64, 64), scaled=True)

In [16]:
#For creating label pool, we use this function: utilities.read_labels_from_folder(folder_dir=None, forced_size=None, scaled=True)
#folder_dir refers to folder containing all labels for training, we recommend to use enumeration to name your labels
#forced_size is to resize the loaded label using opencv tool
#scaled is to scale the value of the image to 0 - 1
#
#We need to change the size to reduce the computational burden later, here we use 64 x 64 just for tutorial purpose!!!
#Remember, the size of the label should be a factor of 2
label_pool    = utilities.read_labels_from_folder('data/labels', forced_size=(64, 64), scaled=True)

In [17]:
#We are now ready to create our training, test, and prediction set
#
#For this purpose, we will use this function: utilities.create_set(pool_image_array=None, pool_label_array=None, no_training_samples=None, no_test_samples=None, documentation_path=None, random_seed=1234567)
#pool_image_array refers to image pool variable we created before --> image_pool
#pool_label_array refers to array pool variable we created before --> label_pool
#no_training_samples is to determine the amount of training samples
#no_test_samples is to determine the amount of test samples 
#This function will produce excel file, containing details of images going to training, test, or prediction sample
#random_seed is to ensure reproducibility
#
#This function will randomly split the images to  training, test, and validation set. If you only fill "no_training_samples", you will only get training and test set
#If you fill both "no_training_samples" and "no_test_samples", you will create training, test, and prediction set
#Note that, the number of training samples + test samples + prediction samples = your total images
#
#For the current tutorial, we want to put 6 images as training samples, 2 images as test samples, and 1 image as prediction sample
training_images, training_labels, \
test_images, test_labels, \
prediction_images, prediction_labels = utilities.create_set(pool_image_array=image_pool, pool_label_array=label_pool,
                                                no_training_samples=1, no_test_samples=1,
                                                documentation_path='data') 

In [18]:
#After preparing the training, test, and prediction samples, we are now ready for creating our U-Net model
#In CAC Python library, we only provide built-in vanilla U-Net architecture
#If user wants to go with different architecture, user can replace this code section with any Tensorflow architecture
#
#For defining a new U-Net model, we use this function: model.create_vanilla_unet(training_images, training_labels, encoder_depth=5, number_filters_at_first_encoder=64, 
#pooling_size=(2,2), kernel_size=(3,3), batchnorm_axis=-1, learning_rate=1E-3, distributed=False, info=False)
#This function allows use to define you own pooling, kernel size, batchnorm, and learning rate etc.
#We recommend to leave the pooling_size, kernel_size, and batchnorm axis untouched !!!
#If you install the GPU version of this library, you can set "distributed=True" which will activate Horovod library to allow training with GPU
#Without installing the GPU version, setting "distributed=True" will give you an error
#"info=True" will give you a summary of the architecture
#Make sure you set a variable as an output of this function --> you will pass this variable to the trainer
unet = model.create_vanilla_unet(training_images, training_labels, 
                                 encoder_depth=4,
                                 number_filters_at_first_encoder=64,
                                 learning_rate=1E-3, 
                                 distributed=False,
                                 info=True)

In [20]:
#Once you defined a model, we will proceed to tha training
#
#For this purpose, we use this function: trainer.train_with_generator(training_images, training_labels, model, batch_size, epochs, distributed=False, save_path=None)
#Here, you can modify the batch size and epochs
#If you use train_with_generator, the library will internally create a generator for you which will help you to perform augmentation
#If you install the GPU version of this library, you can set "distributed=True" which will activate Horovod library to allow training with GPU
#Without installing the GPU version, setting "distributed=True" will give you an error
#save_path is for saving the trained model
trained_unet = trainer.train_with_generator(training_images, training_labels,
                                            unet, batch_size=1, epochs=10,
                                            distributed=False,
                                            save_path='results')

In [21]:
#This section is to help you select a good hyperparameters
#It is based on accuracy in predicting the test set
#
#For this purpose we use this function: trainer.calculate_image_metric(test_images, test_labels, model, save_path=None)
#save_path is for saving the test metric --> it will produce a text file with the name "test_error.txt"
#Note that, the error is very high. It is only for tutorial purpose.
test_error = trainer.calculate_image_metric(test_images, test_labels,
                                            trained_unet,
                                            save_path='results')

In [22]:
#Once all the training is done, we can use our model to make prediction
#
#For this purpose, we use this function: trainer.calculate_jaccard_score(test_images, test_labels, model, reference_label=None, save_path=None)
#save_path is for saving the prediction metric --> it will produce a text file with the name "jaccard.txt"
#If you put reference_label here, it will superpose your prediction result with the reference label (knowing that the geometry of the chip stays the same)
#If you do not have any prediction label for the accurayc calculation, you can skip this section
prediction_error = trainer.calculate_jaccard_score(prediction_images, prediction_labels,
                                                   trained_unet,
                                                   save_path='results')