# MOrgAna workflow for advance python users
This workflow is intended for users with programming background to analyse multiple image folders at once. Users can also use this notebook to select and adapt modules/functions specific to their purpose. This workflow follows the order of scripts in python_example_scripts and explains the code shown in the scripts in more detail.

## Generate Masks
This section makes use of the code from the following scripts:
* '01_create_model_folder.py'
* '02_create_ground_truth.py'
* '03_train_networks.py'
* '04_predict_masks.py'
* '05_select_final_mask_method.py'

### 01_create_model_folder.py
The following code chooses images from the acquired dataset to form the training dataset for the generation of the model.



In [1]:
import os, glob, shutil
from tqdm.notebook import tqdm
import numpy as np
from numpy.random import default_rng

In [None]:
# select folder containing all image folders to be analysed
# parent_folder = os.path.join('test_data','2020-09-22_conditions')
# parent_folder = os.path.join('/','Users','jialelim', 'Desktop', 'example_dataset_ipynb', 'condB')
parent_folder = os.path.join('Y:',os.sep,'Jia_Le_Lim','morgana_example_datasets','gastruloids','condA')

print('Image subfolders found in: ' + parent_folder)
if os.path.exists(parent_folder):
    print('Path exists! Proceed!')# check if the path exists

In [None]:
# select images for training dataset
start = 0 # increase value to exclude starting images in dataset
dN = 0 # every dNth image will be used for the training dataset; if dN = 0, random images are taken

# True: create one model for all folders; False: create one model for each image subfolder
combine_subfolders = True
   
# add folders that you want to ignore here
exclude_folder = ['model_']

In [None]:
def initialize_model_folder(folder, dN=30, start=0, combine=True):
    
    # create folders
    if combine:
        model_folder = os.path.join(os.path.split(folder)[0],'model_')
    else:
        model_folder = os.path.join(os.path.split(folder)[0], 'model_' + os.path.split(folder)[1])

    if not os.path.exists(model_folder):
        os.mkdir(model_folder)
        
    trainingset_folder = os.path.join(model_folder,'trainingset')
    if not os.path.exists(trainingset_folder):
        os.mkdir(trainingset_folder)

    # count images and extract trainingset file names
    flist = glob.glob(os.path.join(folder,'*.tif'))
    flist.sort()
    if dN:
        flist = flist[start::dN]
    else: 
        rng = default_rng()
        random_choice = rng.choice(len(flist), size=np.clip(len(flist)//10, 1, None), replace=False)
        flist = [flist[i] for i in random_choice]

    
    # copy images to trainingset folder
    for f in flist:
        fname = os.path.split(folder)[1] + '_' + os.path.split(f)[-1]
        newf = os.path.join(trainingset_folder,fname)
        if not os.path.exists(newf):
            shutil.copy(f,newf)

In [None]:
# compute parent folder as absolute path
parent_folder = os.path.abspath(parent_folder)
    
# find out all image subfolders in parent_folder
folder_names = next(os.walk(parent_folder))[1] 
    
# exclude folders in exclude_folder
folder_names = [g for g in folder_names if not g in exclude_folder ]

for folder_name in tqdm(folder_names):
    if not folder_name in exclude_folder:
        folder_path = os.path.join(parent_folder, folder_name)

        # for the parent_folder/every image subfolder, generate model folder and the trainingset
        initialize_model_folder(folder_path, dN=dN, start=start, combine=combine_subfolders)

### 02_create_ground_truth.py
This script creates binary masks (ground truths) for images copied into the trainingset folder.

In [None]:
# Run cell if starting from 02_create_ground_truth.py
import os, glob
from tqdm.notebook import tqdm
# select folder containing all image folders to be analysed
# parent_folder = os.path.join('test_data','2020-09-22_conditions')
# parent_folder = os.path.join('/','Users','jialelim', 'Desktop', 'example_dataset_ipynb', 'condA')
parent_folder = os.path.join('Y:',os.sep,'Jia_Le_Lim','morgana_example_datasets','gastruloids','condA')

print('Image subfolders found in: ' + parent_folder)
if os.path.exists(parent_folder):
    print('Path exists! Proceed!')# check if the path exists

In [None]:
from morgana.GUIs.manualmask import makeManualMask
from morgana.DatasetTools import io

In [None]:
import PyQt5.QtWidgets
import sys

In [None]:
def create_GT_mask(model_folder, app):
    
    ### check that model and trainingset exist
    if not os.path.exists(model_folder):
        print('Warning!')
        print(model_folder,':')
        print('Model folder not created! Skipping this subfolder.')
        return
        
    trainingset_folder = os.path.join(model_folder,'trainingset')
    if not os.path.exists(trainingset_folder):
        print('Warning!')
        print(model_folder,':')
        print('Trainingset images not found! Skipping this subfolder.')
        return

    ### load trainingset images and previously generated ground truth    
    flist_in = io.get_image_list(trainingset_folder, string_filter='_GT', mode_filter='exclude')
    flist_in.sort()
    flist_gt = io.get_image_list(trainingset_folder, string_filter='_GT', mode_filter='include')
    flist_gt.sort()

    ### if no trainingset images in the folder, skip this gastruloid
    if len(flist_in) == 0:
        print('\n\nWarning, no trainingset!','Selected "'+model_folder+'" but no trainingset *data* detected. Transfer some images in the "trainingset" folder.')
        return
    
    ### if there are more trainingset than ground truth, promptuse to make mask
    if len(flist_in)!=len(flist_gt):
        print('\n\nWarning, trainingset incomplete!','Selected "'+model_folder+'" but not all masks have been created.\nPlease provide manually annotated masks.')

        for f in flist_in:
            fn,ext = os.path.splitext(f)
            mask_name = fn+'_GT'+ext

            
            
            if not os.path.exists(mask_name):
                if not PyQt5.QtWidgets.QApplication.instance():
                    app = PyQt5.QtWidgets.QApplication(sys.argv)
                else:
                    app = PyQt5.QtWidgets.QApplication.instance() 
                m = makeManualMask(f,subfolder='',fn=fn+'_GT'+ext,wsize = (2000,2000))
                m.show()
                app.exec_()


In [None]:
model_folders = glob.glob(os.path.join(parent_folder,'model_*'))

### compute parent folder as absolute path
model_folders = [os.path.abspath(i) for i in model_folders]

app = PyQt5.QtWidgets.QApplication(sys.argv)

for model_folder in tqdm(model_folders):
    create_GT_mask(model_folder, app)

app.quit()
print('All binary masks/ground truth images found. Move to the next step.')

### 03_train_networks.py
This trains the model for further image segmentation

In [2]:
# Run cell if starting from 03_train_networks.py
import os, glob
from tqdm.notebook import tqdm
import numpy as np

# select folder containing all image folders to be analysed
# parent_folder = os.path.join('test_data','2020-09-22_conditions'
# parent_folder = os.path.join('/','Users','jialelim', 'Desktop', 'example_dataset_ipynb', 'condA')
parent_folder = os.path.join('Y:',os.sep,'Jia_Le_Lim','morgana_example_datasets','gastruloids','condA')

print('Image subfolders found in: ' + parent_folder)
if os.path.exists(parent_folder):
    print('Path exists! Proceed!')# check if the path exists

model_folders = glob.glob(os.path.join(parent_folder,'model_*'))
### compute parent folder as absolute path
model_folders = [os.path.abspath(i) for i in model_folders]

model_folders = [os.path.join('Y:',os.sep,'Jia_Le_Lim','morgana_example_datasets','brain_organoids','adjusted_input','model_LR')]

Image subfolders found in: Y:\Jia_Le_Lim\morgana_example_datasets\gastruloids\condA
Path exists! Proceed!


In [3]:
from skimage.io import imread
import time
from morgana.DatasetTools import io as ioDT
from morgana.MLModel import io as ioML
from morgana.MLModel import train

In [4]:
### define parameters for feature generation for network training
sigmas = [1.0, 5.0, 15.0]
downscaling = 0.25
edge_size = 5
pxl_extract_fraction = 0.25
pxl_extract_bias = 0.4
feature_type = 'ilastik' # or 'ilastik'
deep = False # True: deep learning with Multi Layer Perceptrons; False: Logistic regression


In [5]:
### compute parent folder as absolute path
model_folders = [os.path.abspath(i) for i in model_folders]

for model_folder in model_folders:
    print('-------------'+model_folder+'------------')

    training_folder = os.path.join(model_folder, 'trainingset')

    ### load images
    flist_in = ioDT.get_image_list(
                                              training_folder, 
                                              string_filter='_GT', 
                                              mode_filter='exclude'
                                              )
    img_train = []
    for f in flist_in:
        img = imread(f)
        if len(img.shape)==2:
            img = np.expand_dims(img,0)
        if img.shape[-1] == np.min(img.shape):
            img = np.moveaxis(img, -1, 0)
        img_train.append( img[0] )

    ## load ground truth
    flist_gt = ioDT.get_image_list(
                                            training_folder, 
                                            string_filter='_GT', 
                                            mode_filter='include'
                                            )
    gt_train = [ imread(f) for f in flist_gt ]
    gt_train = [ g.astype(int) for g in gt_train ]

    print('##### Training set:')
    for i,f in enumerate(zip(flist_in,flist_gt)):
        print(i+1,'\t', os.path.split(f[0])[-1],'\t', os.path.split(f[1])[-1])

    ###################################################################
    ### compute features and generate training set and weights

    print('##### Generating training set...')
    X, Y, w, scaler = train.generate_training_set( 
                                    img_train, 
                                    [g.astype(np.uint8) for g in gt_train], 
                                    sigmas = sigmas,
                                    down_shape = downscaling,
                                    edge_size = edge_size,
                                    fraction = pxl_extract_fraction,
                                    feature_mode = feature_type,
                                    bias = pxl_extract_bias 
                                    )

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 179.06it/s]

-------------Y:\Jia_Le_Lim\morgana_example_datasets\brain_organoids\adjusted_input\model_LR------------
##### Training set:
1 	 tp01.tif 	 tp01_GT.tif
2 	 tp09.tif 	 tp09_GT.tif
3 	 tp11.tif 	 tp11_GT.tif
4 	 tp17.tif 	 tp17_GT.tif
5 	 tp22.tif 	 tp22_GT.tif
##### Generating training set...
Number of images: 5
Number of pixels extracted per image (25%): [1411, 1411, 1552, 1368, 1552]
Number of features per image: 13
Extracting features...





In [6]:
### Train the model (with Multi Layer Perceptrons, please ensure you have cuDNN installed)
for model_folder in model_folders:
    print('##### Training model...')
    start = time.time()
    classifier = train.train_classifier( X, Y, w, deep = deep )
    print('Models trained in %.3f seconds.'%(time.time()-start))
    if not deep:
        print('classes_: ', classifier.classes_)
        print('coef_: ', classifier.coef_)
    
    ### Save the model
    ioML.save_model( 
                model_folder,
                classifier,
                scaler,
                sigmas = sigmas,
                down_shape = downscaling,
                edge_size = edge_size,
                fraction = pxl_extract_fraction,
                feature_mode = feature_type,
                bias = pxl_extract_bias,
                deep = deep
                )
    print('##### Model saved!')

print('All models saved, move to the next step.')
    

##### Training model...
Training of Logistic Regression classifier...
{0.0, 1.0, 2.0}
Models trained in 0.194 seconds.
classes_:  [0. 1. 2.]
coef_:  [[-0.85409424 -0.84752903 -0.70476301 -3.19455644  0.12023304 -0.8631146
   4.17909937 -4.65167898 -0.04176757 -0.70050138 -7.29429794 -0.66663825
   0.49675211]
 [-0.31751994 -0.29268077 -0.27630206  1.77656644  0.45489563 -0.7109808
   2.63987679  0.30368142 -1.91709535  0.9997611   5.33776778  0.50862319
   3.28350853]
 [ 1.17161418  1.1402098   0.98106506  1.41799    -0.57512867  1.5740954
  -6.81897616  4.34799756  1.95886292 -0.29925972  1.95653016  0.15801506
  -3.78026064]]
##### Model saved!
All models saved, move to the next step.


### 04_predict_masks.py
Generate binary masks for image dataset using previously trained model.

In [13]:
# Run cell if starting from 04_train_networks.py
import os, glob
from tqdm.notebook import tqdm
import numpy as np
from skimage.io import imread

# select folder containing all image folders to be analysed
# parent_folder = os.path.join('test_data','2020-09-22_conditions')
# parent_folder = os.path.join('/','Users','jialelim', 'Desktop', 'example_dataset_ipynb', 'condA')
parent_folder = os.path.join('Y:',os.sep,'Jia_Le_Lim','morgana_example_datasets','gastruloids','condA')

print('Image subfolders found in: ' + parent_folder)
if os.path.exists(parent_folder):
    print('Path exists! Proceed!')# check if the path exists

# add folders that you want to ignore here
exclude_folder = []
deep = False

Image subfolders found in: Y:\Jia_Le_Lim\morgana_example_datasets\gastruloids\condA
Path exists! Proceed!


In [14]:
from skimage.io import imsave
import scipy.ndimage as ndi
import multiprocessing
from itertools import repeat
from morgana.DatasetTools import io as ioDT
import morgana.DatasetTools.multiprocessing.istarmap
from morgana.MLModel import io as ioML
from morgana.MLModel import predict

In [15]:
# find out all image subfolders in parent_folder
folder_names = next(os.walk(parent_folder))[1] 

model_folders = glob.glob(os.path.join(parent_folder,'model_*'))
model_folders_name = [os.path.split(model_folder)[-1] for model_folder in model_folders]

# exclude folders in exclude_folder
exclude_folder = ['']

image_folders = [g for g in folder_names if not g in model_folders_name + exclude_folder]
image_folders = [os.path.join(parent_folder, i) for i in image_folders]

model_folders = [os.path.join('Y:',os.sep,'Jia_Le_Lim','morgana_example_datasets','brain_organoids','adjusted_input','model_LR')]
image_folders = [os.path.join('Y:',os.sep,'Jia_Le_Lim','morgana_example_datasets','brain_organoids','adjusted_input','wt2_LR')]

In [16]:
for i in range(len(image_folders)):
    
    image_folder = image_folders[i]
    print(image_folder)
    if len(model_folders)>1:
        model_folder = model_folders[i]
    else:
        model_folder = model_folders[0]

    print('-------------'+image_folder+'------------')
#     training_folder = os.path.join(model_folder, 'trainingset')
#     print(model_folder, training_folder)

    print('##### Loading classifier model and parameters...')
    classifier, scaler, params = ioML.load_model( model_folder, deep = deep)
    print('##### Model loaded!')

    #######################################################################
    ### apply classifiers and save images

    result_folder = os.path.join(image_folder, 'result_segmentation')
    if not os.path.exists(result_folder):
        os.mkdir(result_folder)
    print(result_folder)

    flist_in = ioDT.get_image_list(image_folder)
    flist_in.sort()        
    N_img = len(flist_in)
#     print(flist_in)

    # multiprocess
    N_cores = np.clip( int(0.8 * multiprocessing.cpu_count()),1,None )

    # try using multiprocessing
    pool = multiprocessing.Pool(N_cores)
    _ = list(   tqdm(
                            pool.istarmap(
                                predict.predict_single_image, 
                                zip(    flist_in, 
                                        repeat(classifier),
                                        repeat(scaler),
                                        repeat(params) ) ), 
                                total = N_img ) )

    # sequential prediction
#     for f in tqdm(flist_in):
#         predict.predict_single_image(f,classifier,scaler,params)

    print('All images done!')


Y:\Jia_Le_Lim\morgana_example_datasets\brain_organoids\adjusted_input\wt2_LR
-------------Y:\Jia_Le_Lim\morgana_example_datasets\brain_organoids\adjusted_input\wt2_LR------------
##### Loading classifier model and parameters...
##### Model loaded!
Y:\Jia_Le_Lim\morgana_example_datasets\brain_organoids\adjusted_input\wt2_LR\result_segmentation


  0%|          | 0/24 [00:00<?, ?it/s]

All images done!


In [15]:
classifier

In [None]:
|