# Notebook on segmentation of cluster of cells

Author : Aaron MAMANN

## Libraries

In [1]:
import os
import PIL
from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import re
import random
from natsort import natsorted
import glob
from skimage import img_as_ubyte
import skimage 
from skimage.transform import rescale, resize, downscale_local_mean
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision 
from torchvision import transforms
#from torchmetrics import Dice 
from scipy.spatial import distance 
import shutil
#import cv2
import csv
from skimage.measure import label, regionprops, regionprops_table
import tifffile
import tifffile as tiff
from pathlib import Path
from tqdm.notebook import tqdm


In [7]:
import torch
from torchmetrics import JaccardIndex

preds  = torch.tensor([0, 0, 1, 0])
target = torch.tensor([1, 1, 1, 0])

Jac=JaccardIndex(task="binary",average='none',num_classes=2)
Jac(preds, target)
#dice = Dice(average=None,num_classes=2)

tensor(0.3333)

## Main Function : Galbor Filter with specific Preprocessing and PostProcessing Functions 

In [8]:

def create_masks(file_dir,image_files_name,mask_files_name,gabor_parameter,fill_cells,experiment_name='CNV',save_predicted_mask=True,compute_score=True,ratio_first_thresh=300/(1920*1440),ratio_small_object_thresh=3700/(1920*1440),ratio_thresh_big_objects=30000/(1920*1440),ratio_canal_diameter=100/1440 ):

    experiment_types_dir = [  "".join([file_dir,'/',f]) for f in natsorted(os.listdir(file_dir)) if experiment_name in f]  
    
    if compute_score : 
        score_experiment=dict()
    
    for type_dir in experiment_types_dir:
        l=0
        conditions_experiment= [  f for f in natsorted(os.listdir(type_dir) ) if experiment_name in f]    
        for condition in conditions_experiment : 
            sequences=[ "".join([type_dir,'/',condition,'/',image_files_name,'/',f]) for f in  natsorted(os.listdir( "".join([type_dir,'/',condition,'/',image_files_name]) )) if experiment_name in f]
            if save_predicted_mask :         
                Path("".join([type_dir,'/',condition,'/masks_predicted/'])).mkdir(parents=True, exist_ok=True)
                                              
            for sequence in sequences:
          
                images_in_tiff = tifffile.imread(sequence)              
                if np.ndim(images_in_tiff)==2 :
                    images_in_tiff = np.expand_dims(images_in_tiff, axis=0)
                nb_images_in_tiff = len(images_in_tiff)
                predicted_mask=list()
            
                if compute_score : 
                    masks_in_tiff = tifffile.imread( "".join([type_dir,'/',condition,'/',mask_files_name,'/',os.path.basename(sequence).replace(".tif", "_mask.tif") ]) )
                    if np.ndim(masks_in_tiff)==2 :
                        masks_in_tiff = np.expand_dims(masks_in_tiff, axis=0)
                    score_sequence=0
            
                j=0         
                for index_one_image in range(nb_images_in_tiff): 
                    nb_pixels=images_in_tiff[index_one_image].shape[0]*images_in_tiff[index_one_image].shape[1]
                    nb_vertical_pixels=images_in_tiff[index_one_image].shape[0]
                    thresh_small_objects=ratio_small_object_thresh*nb_pixels
                    first_thresh=ratio_first_thresh*nb_pixels
                    im_original=skimage.morphology.remove_small_objects(scipy.ndimage.binary_fill_holes(skimage.morphology.binary_closing(skimage.morphology.remove_small_objects(scipy.ndimage.binary_fill_holes(skimage.filters.gabor(img_as_ubyte(skimage.exposure.rescale_intensity(images_in_tiff[index_one_image])),gabor_parameter)[1]),min_size=first_thresh, connectivity=1),footprint=np.ones((fill_cells,fill_cells)))),min_size=thresh_small_objects,connectivity=1)
                    im_original = img_as_ubyte(im_original)
                    labeled_im = label(im_original)

                    canal_diameter=ratio_canal_diameter*nb_vertical_pixels 
                    props = regionprops(labeled_im)
                    for i in range(len(props)) :
                        if abs(props[i].bbox[2]-props[i].bbox[0])>canal_diameter :
                            im_original[props[i].bbox[0]:props[i].bbox[2]+1,props[i].bbox[1]:props[i].bbox[3]+1] = 0
        
                    im_original=img_as_ubyte(skimage.morphology.remove_small_objects(np.array(im_original,dtype=bool),min_size=thresh_small_objects,connectivity=1))
                    new_image=img_as_ubyte(np.pad(im_original, (50, 50),'constant',constant_values=255))  
                    thresh_big_objects=ratio_thresh_big_objects*nb_pixels 
                    im_big_objects=img_as_ubyte(skimage.morphology.remove_small_objects(np.array(np.pad(im_original, (50, 50), 'constant', constant_values=255),dtype=bool),min_size=thresh_big_objects)) 
                    new_image = new_image - im_big_objects
                    shape_im = new_image.shape
                    new_image=new_image[50:shape_im[0]-50,50:shape_im[1]-50]   
                    if compute_score : 
                        true_mask = img_as_ubyte(masks_in_tiff[index_one_image])
                        #dice = Dice(average=None,num_classes=2)
                        #dice = Dice(average=None)
                        #dice = Dice()
                        Jac=JaccardIndex(task="binary",average='none',num_classes=2)
                        score_sequence = score_sequence + Jac(torch.where(torch.from_numpy(new_image)> 0, 1, 0), torch.where(torch.from_numpy(true_mask)> 0, 1, 0)).numpy()    
                                                                         
                    if j==0:
                        predicted_mask=[np.array(new_image, dtype=np.uint16)]                 
                    else:
                        predicted_mask=np.append(predicted_mask,[np.array(new_image, dtype=np.uint16)],axis=0)
                    j=j+1
                 
                if save_predicted_mask :       
                    tiff.imsave("".join([type_dir,'/',condition,'/masks_predicted/',os.path.basename(sequence)]),predicted_mask)
            
                if compute_score : 
                    score_sequence = score_sequence/nb_images_in_tiff
                    score_experiment[sequence] = score_sequence
                l=l+1
                if compute_score : 
                    print("".join([os.path.basename(type_dir),',',condition,': Tiff ',str(os.path.basename(sequence).replace(".tif", " ")),'segmented (Tiff n°', str(l) ,'), gabor param ',str(gabor_parameter), ', fill param ', str(fill_cells), ', score: ', str(round(score_sequence, 4)) ]))        
    if compute_score : 
        return score_experiment

                            

In [4]:
# Clémence you have to simulate that, the following part is the training and the test which allowed to find the value of gabor parameter (equal to 0.78)
# and the value of the fill cell parameter (equal to 5) 

file_dir='/Users/aaronmamann/Desktop/Cleaned_Datasets_Clemence/DT2' # change this into your own path
image_files_name = 'Source images'
mask_files_name = 'Masks'

create_masks(file_dir,image_files_name,mask_files_name,gabor_parameter=0.78,fill_cells=5,save_predicted_mask=True,compute_score=True)

  tiff.imsave("".join([type_dir,'/',condition,'/masks_predicted/',os.path.basename(sequence)]),predicted_mask)


CNV1,CNV1_Coll_Ct: Tiff CNV006_20210430_782_t8 segmented (Tiff n°1), gabor param 0.78, fill param 5, score: 0.8529
CNV1,CNV1_Coll_Ct: Tiff CNV006_20210430_787_t8 segmented (Tiff n°2), gabor param 0.78, fill param 5, score: 0.7381
CNV1,CNV1_Coll_Ct: Tiff CNV006_20210430_791_t8 segmented (Tiff n°3), gabor param 0.78, fill param 5, score: 0.856
CNV1,CNV1_Coll_Ct: Tiff CNV006_20210430_793_t8 segmented (Tiff n°4), gabor param 0.78, fill param 5, score: 0.8054
CNV1,CNV1_Coll_Ct: Tiff CNV010_20210606_065_t1 segmented (Tiff n°5), gabor param 0.78, fill param 5, score: 0.8955
CNV1,CNV1_Coll_Ct: Tiff CNV010_20210606_065_t25 segmented (Tiff n°6), gabor param 0.78, fill param 5, score: 0.848
CNV1,CNV1_Coll_Ct: Tiff CNV010_20210606_066_t1 segmented (Tiff n°7), gabor param 0.78, fill param 5, score: 0.8395
CNV1,CNV1_Coll_Ct: Tiff CNV010_20210606_066_t24 segmented (Tiff n°8), gabor param 0.78, fill param 5, score: 0.9027
CNV1,CNV1_Coll_Ct: Tiff CNV010_20210606_067_t1 segmented (Tiff n°9), gabor param

KeyboardInterrupt: 

# Allocate randomly .tif file into the training set and the test set

In [3]:
# This function moves some tif files into a file used for training (tuning the gabor parameter, 
# fill parameter, minimum number of pixels parameter) and into another file used for testing

def create_training_and_test_files_for_each_experiment(file_dir,image_files_name,mask_files_name,experiment_name='CNV'):
                 
    l=0
    sequences=[ "".join([file_dir,'/',image_files_name,'/',f]) for f in  natsorted(os.listdir( "".join([file_dir,'/',image_files_name]) )) if experiment_name in f]
    training_set=random.sample(sequences, k=int(len(sequences)*0.7))
    test_set=list(set(sequences) - set(training_set))
    
    Path("".join([file_dir,'/training/CNV_mutiples/Source images/'])).mkdir(parents=True, exist_ok=True)
    Path("".join([file_dir,'/training/CNV_mutiples/Masks/'])).mkdir(parents=True, exist_ok=True)
    
    for sequence in training_set:
        os.replace(sequence, "".join([file_dir,'/training/CNV_mutiples/Source images/',os.path.basename(sequence)]))
        os.replace( "".join([file_dir,'/',mask_files_name,'/',os.path.basename(sequence).replace(".tif", "_mask.tif") ]), "".join([file_dir,'/training/CNV_mutiples/Masks/',os.path.basename(sequence).replace(".tif", "_mask.tif") ]))
        
    Path("".join([file_dir,'/test/CNV_mutiples/Source images/'])).mkdir(parents=True, exist_ok=True)
    Path("".join([file_dir,'/test/CNV_mutiples/Masks/'])).mkdir(parents=True, exist_ok=True)
    
    for sequence in test_set:
        os.replace(sequence, "".join([file_dir,'/test/CNV_mutiples/Source images/',os.path.basename(sequence)]))
        os.replace( "".join([file_dir,'/',mask_files_name,'/',os.path.basename(sequence).replace(".tif", "_mask.tif") ]), "".join([file_dir,'/test/CNV_mutiples/Masks/',os.path.basename(sequence).replace(".tif", "_mask.tif") ]))
     
    shutil.rmtree("".join([file_dir,'/',image_files_name]) ) 
    shutil.rmtree("".join([file_dir,'/',mask_files_name]) ) 
       
         


In [10]:
# We execute this function to create a training file (with images and masks) and a test file (with images and masks)

file_dir='/Users/aaronmamann/Desktop/Cleaned_Datasets_Clemence/Validation' # change this into your own path
create_training_and_test_files_for_each_experiment(file_dir=file_dir,image_files_name='Source images',mask_files_name='Masks',experiment_name='CNV')


## Tuning hyperparameters (gabor, fill, number pixels for mask) on training set

In [5]:
file_dir = '/Users/clemence/Documents_Clémence/Analysis/Tracking algorithm/Tracking-seg_Aaron-Gus_CNV/230113-CNV-optimization/training' # change this into your own path
image_files_name = 'Source images'
mask_files_name = 'Masks'

# We are going to tune jointly the gabor parameter and the fill parameter on the training set

hyperparameter_candidates = [[0.76,5],[0.77,5],[0.78,5],[0.79,5],[0.76,10],[0.77,10],[0.78,10],[0.79,10],[0.76,15],[0.77,15],[0.78,15],[0.79,15],[0.76,20],[0.77,20],[0.78,20],[0.79,20] ]
dice_score_hyperparameters = list(map(lambda x: [x[0],x[1],np.array(list(create_masks(file_dir,image_files_name,mask_files_name,gabor_parameter=x[0],fill_cells=x[1],save_predicted_mask=False).values())).mean() ] , hyperparameter_candidates))
df_dice_scores=pd.DataFrame(dice_score_hyperparameters)
#df_dice_scores.columns = ['gabor_param', 'fill_param', 'Min_pixels', 'Dice scores']
df_dice_scores.columns = ['gabor_param', 'fill_param', 'Dice scores']
df_dice_scores

NameError: name 'JaccardIndex' is not defined

In [11]:
file_dir = '/Users/clemence/Documents_Clémence/Analysis/Tracking algorithm/Tracking-seg_Aaron-Gus_CNV/230113-CNV-optimization/training' # change this into your own path
image_files_name = 'Source images'
mask_files_name = 'Masks'

# We are going to tune jointly the gabor parameter and the fill parameter on the training set

hyperparameter_candidates = [[0.78,3],[0.785,3],[0.79,3],[0.78,5],[0.785,5],[0.79,5],[0.78,7],[0.785,7],[0.79,7],[0.78,10],[0.785,10],[0.79,10],[0.78,12],[0.785,12],[0.79,12] ]
dice_score_hyperparameters = list(map(lambda x: [x[0],x[1],np.array(list(create_masks(file_dir,image_files_name,mask_files_name,gabor_parameter=x[0],fill_cells=x[1],save_predicted_mask=False).values())).mean() ] , hyperparameter_candidates))
df_dice_scores=pd.DataFrame(dice_score_hyperparameters)
#df_dice_scores.columns = ['gabor_param', 'fill_param', 'Min_pixels', 'Dice scores']
df_dice_scores.columns = ['gabor_param', 'fill_param', 'Dice scores']
df_dice_scores

  return _convert(image, np.uint8, force_copy)


CNV_mutiples,CNV_1: Tiff CNV006_20210430_782_t8 segmented (Tiff n°1), gabor param 0.78, fill param 3, score: 0.7618
CNV_mutiples,CNV_1: Tiff CNV006_20210430_787_t8 segmented (Tiff n°2), gabor param 0.78, fill param 3, score: 0.576
CNV_mutiples,CNV_1: Tiff CNV006_20210430_793_t8 segmented (Tiff n°3), gabor param 0.78, fill param 3, score: 0.6746
CNV_mutiples,CNV_1: Tiff CNV010_20210606_065_t1 segmented (Tiff n°4), gabor param 0.78, fill param 3, score: 0.8151
CNV_mutiples,CNV_1: Tiff CNV010_20210606_066_t24 segmented (Tiff n°5), gabor param 0.78, fill param 3, score: 0.823
CNV_mutiples,CNV_1: Tiff CNV010_20210606_067_t1 segmented (Tiff n°6), gabor param 0.78, fill param 3, score: 0.8531
CNV_mutiples,CNV_1: Tiff CNV010_20210606_095_t31 segmented (Tiff n°7), gabor param 0.78, fill param 3, score: 0.822
CNV_mutiples,CNV_1: Tiff CNV010_20210606_095_t49 segmented (Tiff n°8), gabor param 0.78, fill param 3, score: 0.8476
CNV_mutiples,CNV_1: Tiff CNV010_20210606_111_t31 segmented (Tiff n°9), g

Unnamed: 0,gabor_param,fill_param,Dice scores
0,0.78,3,0.711559
1,0.785,3,0.71205
2,0.79,3,0.711206
3,0.78,5,0.71185
4,0.785,5,0.711992
5,0.79,5,0.711666
6,0.78,7,0.709555
7,0.785,7,0.709594
8,0.79,7,0.709627
9,0.78,10,0.706074


In [15]:
# after having tuned the gabor and fill parameter, we are going to tune the number of pixels threshold (every mask below that number of pixels is removed) 

hyperparameter_candidates = [3600,3650,3700,3750,3800]
#dice_score_pixels_nb = list(map(lambda x: [0.785,5,x,np.array(list(create_masks(file_dir,image_files_name,mask_files_name,gabor_parameter=0.785,fill_cells=5, thresh_small_objects = x ,save_predicted_mask=False).values())).mean() ] , hyperparameter_candidates))
dice_score_pixels_nb = list(map(lambda x: [0.785,5,x,np.array(list(create_masks(file_dir,image_files_name,mask_files_name,gabor_parameter=0.785,fill_cells=5, save_predicted_mask=False).values())).mean() ] , hyperparameter_candidates))
dice_score_pixels_nb=pd.DataFrame(dice_score_pixels_nb)
dice_score_pixels_nb.columns = ['gabor_param', 'fill_param', 'Min_pixels', 'Dice scores']
dice_score_pixels_nb

  return _convert(image, np.uint8, force_copy)


CNV_mutiples,CNV_1: Tiff CNV006_20210430_782_t8 segmented (Tiff n°1), gabor param 0.785, fill param 5, score: 0.744
CNV_mutiples,CNV_1: Tiff CNV006_20210430_787_t8 segmented (Tiff n°2), gabor param 0.785, fill param 5, score: 0.5856
CNV_mutiples,CNV_1: Tiff CNV006_20210430_793_t8 segmented (Tiff n°3), gabor param 0.785, fill param 5, score: 0.6736
CNV_mutiples,CNV_1: Tiff CNV010_20210606_065_t1 segmented (Tiff n°4), gabor param 0.785, fill param 5, score: 0.8112
CNV_mutiples,CNV_1: Tiff CNV010_20210606_066_t24 segmented (Tiff n°5), gabor param 0.785, fill param 5, score: 0.8228
CNV_mutiples,CNV_1: Tiff CNV010_20210606_067_t1 segmented (Tiff n°6), gabor param 0.785, fill param 5, score: 0.8528
CNV_mutiples,CNV_1: Tiff CNV010_20210606_095_t31 segmented (Tiff n°7), gabor param 0.785, fill param 5, score: 0.8137
CNV_mutiples,CNV_1: Tiff CNV010_20210606_095_t49 segmented (Tiff n°8), gabor param 0.785, fill param 5, score: 0.8426
CNV_mutiples,CNV_1: Tiff CNV010_20210606_111_t31 segmented (Ti

Unnamed: 0,gabor_param,fill_param,Min_pixels,Dice scores
0,0.785,5,3600,0.711992
1,0.785,5,3650,0.711992
2,0.785,5,3700,0.711992
3,0.785,5,3750,0.711992
4,0.785,5,3800,0.711992


## Predict on Test set with best hyperparameters

In [17]:
file_dir = '/Users/clemence/Documents_Clémence/Analysis/Tracking algorithm/Tracking-seg_Aaron-Gus_CNV/230113-CNV-optimization/test' # change this into your own path
image_files_name = 'Source images'
mask_files_name = 'Masks'
#dice_scores = np.array(list(create_masks(file_dir,image_files_name,mask_files_name,gabor_parameter=0.78,fill_cells=5,thresh_small_objects=3700,save_predicted_mask=True,compute_score=True).values()))
dice_scores = np.array(list(create_masks(file_dir,image_files_name,mask_files_name,gabor_parameter=0.785,fill_cells=5,thresh_small_objects=3700,save_predicted_mask=True,compute_score=True).values()))

print('The mean of all the dice scores on the test set is :')
print(dice_scores.mean())
print ('The standard deviation of all the dice scores on the test set is')
print(dice_scores.std())


FileNotFoundError: [Errno 2] No such file or directory: '/Users/aaronmamann/Desktop/Cleaned_Datasets_Clemence/Validation/test'