# iQ Winter School 2018 on Machine Learning Applied to Quantitative Analysis of Medical Images
## Hands-on Session 2 - Challenge

### Your turn!

Here you have the chance to beat the good old simplistic image processing techniques by applying what you've learned in the tutorial. We provide you with several help functions that you may use as you wish. Although these functions point you in the direction of using a patch-based CNN, feel free to implement your own ideas on how to approach the problem!

In [None]:
# Some imports
import numpy as np
from random import randint
import os
import timeit

from keras import metrics, optimizers
import tensorflow as tf
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from keras.utils import plot_model
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard
import keras
from keras import backend as K

import skimage.segmentation
import sklearn.feature_extraction.image
import sklearn.model_selection
from sklearn.metrics import roc_auc_score
from pylab import *

import warnings
warnings.filterwarnings("ignore",category=FutureWarning)

In [None]:
# Patch extraction functions. 

# Use these if you'd like to follow a patchwise classification approach:
# the images are subdivided into patches, which are then fed into a classifier (eg, a CNN) 
# together with the label of its center pixel.
        
def Get_Patch(center_x,center_y,patch_s,image):
    """
    Gets the patch with size patch_s at position (center_x, center_y) in the image
    Input: 
        center_x: line position of the center pixel
        center_y: col position of the center pixel
        patch_s: patch size
        image: whole image to extract patch from (img_size, img_size)
    Output:
        new_patch: an array of size [patch_s,patch_s]        
    """
    
    half_patch=int(patch_s/2)
    
    start_x=center_x-half_patch
    start_y=center_y-half_patch
    end_x=center_x+half_patch+1
    end_y=center_y+half_patch+1
   
    new_patch=np.zeros((patch_s,patch_s))
    
    new_patch=image[start_x:end_x,start_y:end_y]  
    
    return(new_patch)

def Extract_Patches(path_save,patch_s,X,Y):
    """
    Extracts patches from the images and saves them in different batches 
    (makes it easier to load to memory)
    All the patches from the minority class are extracted and the same number 
    of random patches are extracted from the majority class
    Inputs:
        path_save = path to save the extracted patches
        patch_s = size of the extracted (squared) patches 
        X = images (n_images, img_size, img_size)
        Y = labels (n_images, img_size, img_size)
    Outputs:
        No outputs, files are written to the specified directory
    """       
    
    num_images=X.shape[0]
    lines_s=X.shape[1]
    cols_s=X.shape[2]
    
    # accounts for the image borders 
    half_patch=int(patch_s/2)
    end_l=lines_s-half_patch
    end_c=cols_s-half_patch
    
    # goes through all images and extracts a patch for each postion where there is a membrane (label = 0)
    for k in range(0,num_images):
        print("Processing Image: ",k)
        patches=list()
        patches_labels=list()
        for i in range (half_patch,end_l):
            for j in range (half_patch,end_c):
                if not Y[k,i,j]:
                   patch=Get_Patch(i,j,patch_s,X[k,:,:])
                   patches.append(patch)
                   patches_labels.append(0)
                      
        total_patches=len(patches) # total number of patches extracted from the minority class
        l=0
        # we extract the same number of patches for the majority class
        while (l<total_patches):        
            i=randint(half_patch,end_l-1)
            j=randint(half_patch,end_c-1)
            if Y[k,i,j]:
                   patch=Get_Patch(i,j,patch_s,X[k,:,:])
                   patches.append(patch)
                   l += 1
                   patches_labels.append(1)
        
        name_patch=os.path.join(path_save,"batch_"+str(k)+".npy")
        name_labels=os.path.join(path_save,"labels_"+str(k)+".npy")
        patches=np.array(patches)
        patches_labels=np.array(patches_labels)
        np.save(name_patch,patches)
        np.save(name_labels,patches_labels)
    return()
       
    
def Extract_Patches_Test(path_save,patch_s,X,Y):
    """
    Extracts all possible the patches from the images (one for each pixel). 
    This function can be used to visualize the results and compute the DICE score.
    Inputs:
        path_save = path to save the extracted patches
        patch_s = size of the extracted (squared) patches 
        X = images (n_images, img_size, img_size)
        Y = labels (n_images, img_size, img_size)
    Outputs:
        No outputs, files are written to the specified directory
    """
    
    num_images=X.shape[0]
    lines_s=X.shape[1]
    cols_s=X.shape[2]
    
    #accounting for the borders of the patches
    half_patch=int(patch_s/2)
    end_l=lines_s-half_patch
    end_c=cols_s-half_patch
    
    #goes through the image
    for k in range(0,num_images):
        print("Processing Image: ",k)
        patches=list()
        patches_labels=list()
        for i in range (half_patch,end_l):
            for j in range (half_patch,end_c):
               patch=Get_Patch(i,j,patch_s,X[k,:,:])
               patches.append(patch)
               if Y[k,i,j]==0:
                   patches_labels.append(0)
               else:
                   patches_labels.append(1)
                           
        name_patch=os.path.join(path_save,"batch_test"+str(k)+".npy")
        name_labels=os.path.join(path_save,"labels_test"+str(k)+".npy")
        patches=np.array(patches,dtype='float32')
        patches_labels=np.array(patches_labels,dtype='float32')
        np.save(name_patch,patches)
        np.save(name_labels,patches_labels)     

# If you run into memory problems, you may want to downsample the original images
def resize_image(image, final_size):
    ratio = np.array(image.shape)/np.array(final_size)
    smoothed = sndim.gaussian_filter(image, sigma = ratio) # smooth the image first
    return skimage.transform.resize(smoothed, final_size)
        
# Performance metrics
def Dice(ground_truth,segmentation):
    intersection=np.sum(segmentation[ground_truth==1]*2.0)
    dice_coef=intersection/(np.sum(ground_truth)+np.sum(segmentation))
    print("dice coef: ",dice_coef)
    return(dice_coef)    
    
def Pixel_Error(ground_truth,segmentation):
    bin_seg = 1 - (segmentation < .5)
    pix_err=1 - ((bin_seg == ground_truth).sum()/ground_truth.size)
    print("pixel error: ",pix_err )
    return(pix_err)
    
def Score_AUC(ground_truth,segmentation):
    ground_truth= np.reshape(ground_truth, (-1))
    segmentation= np.reshape(segmentation, (-1))
    auc=roc_auc_score(ground_truth,segmentation)
    print("AUC: ",auc)
    return(auc)

With the code below you can extract patches from the images. Make sure that the path to the shared folder is set properly. This is important because the VM doesn't have enough memory to store the patches you are going to extract.

In [None]:
shared_folder = r'/home/user/Desktop/Shared_Windows/winter_school_ml' # change this accordingly
path_challenge = os.path.join(shared_folder, 'challenge') # path to the challenge data 
path_patches = os.path.join(path_challenge, 'Patches') # path to the location where the patches will be written

# Creates patch folder, if it isn't there yet
if not os.path.exists(path_patches):
    os.makedirs(path_patches)

# Extracts patches from all images, later they will be used for training and validating
# Patches are size 15, this is something you can play with.
patch_s = 15
X=np.load(os.path.join(path_challenge,"data.npy"))
Y=np.load(os.path.join(path_challenge,"labels.npy"))
Extract_Patches(path_patches, patch_s, X, Y) 

# Extracts patches from a few images for later evaluation and visualization 
# (you shouldn't use them for training)
Extract_Patches_Test(path_patches, patch_s, X[20:25,:], Y[20:25,:,:])

You have created 25 batches of about 100k patches each. You can now train a classifier on them (we advise you to keep 20 for the actual training and 5 for testing, where you can evaluate hyperparameters, etc.). 

For example, you can use a CNN like the one you explored in [Tutorial-Part2](../tutorial/tutorial_part2-final.ipynb):

In [None]:
num_batches = 20 # number of training batches (you created one per original image)
batch_size = 100 # number of training patches seen at each iteration of the optimizer
epochs = 1 # number of iterations over the whole training set
num_classes = 2 # number of classes

# shape of each input patch (patch_size, patch_size, 1) - 1 channel for grayscale images
patch_shape = (patch_s, patch_s, 1)

#Here you can create your own CNN architecture (or your own approach)
model = Sequential()


# Training is performed in batches: for each epoch, read a batch and feed it to the network
for e in range(epochs):
    for i in range(0,num_batches):
        x = np.load(os.path.join(path_patches,"batch_"+str(i)+".npy"))
        y = np.load(os.path.join(path_patches,"labels_"+str(i)+".npy"))
        y=keras.utils.to_categorical(y,num_classes)
        x=np.expand_dims(x,axis=3) 
        model.fit(x, y, batch_size=batch_size, nb_epoch=epochs, verbose=1, validation_split=0.2)

Now that you have trained your model, you can apply it to make predictions on the test set images

In [None]:
img_ind = 0 # pick image index
x = np.load(os.path.join(path_patches,"batch_test"+str(img_ind)+".npy"))
x = np.expand_dims(x,axis=3) 
prob_segmentation=model.predict(x)
prob_segmentation=np.reshape(prob_segmentation,(498,498,-1))[:,:,1]

# ground truth labels
y = np.load(os.path.join(path_patches,"labels_test"+str(img_ind)+".npy"))
y = keras.utils.to_categorical(y,num_classes)
y = np.reshape(y, (498,498,-1))[:,:,1] 

# binarize the probability map
threshold = 0.5
bin_segmentation = prob_segmentation > threshold

# Calculates the performance metrics
pix_er = Pixel_Error(y, bin_segmentation)
dice = Dice(y, bin_segmentation)
auc = Score_AUC(y, prob_segmentation) # for the AUC, the actual probabilities are used

You can also visualize the results:

In [None]:
img = np.reshape(x, (498,498,-1))[:,:,1] # reconstruct the image from the patches

contours_ground_truth = skimage.segmentation.mark_boundaries(img, y > 0)
contours_segmentation = skimage.segmentation.mark_boundaries(img, bin_segmentation)

figure(figsize=(16,16))
subplot(221)
title('segmentation result', fontsize=18)
imshow(bin_segmentation, cmap='gray')
axis('off')
subplot(222)
title('ground truth', fontsize=18)
imshow(y, cmap='gray')
axis('off')

show()

That's it! You have developed a segmentation method which will hopefully generalize well enough to be applicable to the validation data. If you're happy with it you can save it by running the following:

In [None]:
path_model = os.path.join(path_challenge, 'my_amazing_model.h5')
model.save(path_model)

See you tomorrow for the final competition!

## ## Hands-on Session 3 - Award ceremony day
By now you should have received the password to unlock the validation data. It is now time to evaluate your method and rank it against the other participants'. 

Good luck!

In [None]:
# Function to test the trained model on the validation set
def Test_Validation(path_validation, model):
    n_samples = 5
    pix=np.zeros(n_samples)
    dice=np.zeros(n_samples)
    auc=np.zeros(n_samples)
    
    for i in range(n_samples):
        x = np.load(os.path.join(path_validation,"batch_test"+str(i)+".npy"))
        y = np.load(os.path.join(path_validation,"labels_test"+str(i)+".npy"))
        y = keras.utils.to_categorical(y,num_classes)
        x = np.expand_dims(x,axis=3) 
        segmentation=model.predict(x)
        y = np.reshape(y, (498,498,-1))[:,:,1] 
        segmentation=np.reshape(segmentation,(498,498,-1))
        segmentation=segmentation[:,:,1]
        pix[i] = Pixel_Error(y,segmentation)
        dice[i] = Dice(y,segmentation)
        auc[i] = Score_AUC(y,segmentation)
    return(pix,dice,auc)

# Extracts patches from the validation images
path_validation = os.path.join(path_challenge, "validation_data")
path_patches_validation = os.path.join(path_patches, "validation_data")
if not os.path.exists(path_patches_validation):
    os.makedirs(path_patches_validation)

X = np.load(os.path.join(path_validation,"data_validation.npy"))
Y = np.load(os.path.join(path_validation,"labels_validation.npy"))
Extract_Patches_Test(path_patches_validation,patch_s,X,Y)


In [None]:
# Load the model and test it on the validation data
trained_model = load_model(path_model)
pix,dice,auc = Test_Validation(path_patches_validation, trained_model)

print("pixel error: %0.2f +/- %0.2f" % (pix.mean(), pix.std()))
print("Dice coefficient: %0.2f +/- %0.2f" % (dice.mean(), dice.std()))
print("Area Under the Curve: %0.2f +/- %0.2f" % (auc.mean(), auc.std()))

Please let us know your results so that we can rank the groups!