# MOrgAna workflow for advance python users
This workflow is intended for users with programming background to analyse multiple image folders at once. Users can also use this notebook to select and adapt modules/functions specific to their purpose. This workflow follows the order of scripts in python_example_scripts and explains the code shown in the scripts in more detail.

## Generate Masks

### 01_create_model_folder.py
The following code chooses images from the acquired dataset to form the training dataset for the generation of the model.

In [1]:
# import packages required
import os, glob, shutil
from tqdm.notebook import tqdm
import numpy as np
from numpy.random import default_rng

In [2]:
# select folder containing all image folders to be analysed
# parent_folder = os.path.join('test_data','2020-09-22_conditions')
parent_folder = os.path.join('/','Users','jialelim', 'Desktop', 'example_dataset_ipynb', 'condB')
print('Image subfolders found in: ' + parent_folder)
if os.path.exists(parent_folder):
    print('Path exists! Proceed!')# check if the path exists

Image subfolders found in: /Users/jialelim/Desktop/example_dataset_ipynb/condB
Path exists! Proceed!


In [3]:
# select images for training dataset
start = 0
dN = 0 # every dNth image will be used for the training dataset; if dN = 0, random images are taken

# True: create one model for all folders; False: create one model for each image subfolder
combine_subfolders = True
   
# add folders that you want to ignore here
exclude_folder = ['model_']

In [4]:
def initialize_model_folder(folder, dN=30, start=0, combine=True):
    
    ### create folders
    if combine:
        model_folder = os.path.join(os.path.split(folder)[0],'model_')
    else:
        model_folder = os.path.join(os.path.split(folder)[0], 'model_' + os.path.split(folder)[1])

    if not os.path.exists(model_folder):
        os.mkdir(model_folder)
        
    trainingset_folder = os.path.join(model_folder,'trainingset')
    if not os.path.exists(trainingset_folder):
        os.mkdir(trainingset_folder)
    
    ### count images and extract trainingset file names
    flist = glob.glob(os.path.join(folder,'*.tif'))
    flist.sort()
    if dN:
        flist = flist[start::dN]
    else: 
        rng = default_rng()
        random_choice = rng.choice(len(flist), size=np.clip(len(flist)//10, 1, None), replace=False)
        flist = [flist[i] for i in random_choice]

    
    ### copy images to trainingset folder
    for f in flist:
        fname = os.path.split(folder)[1] + '_' + os.path.split(f)[-1]
        newf = os.path.join(trainingset_folder,fname)
        if not os.path.exists(newf):
            shutil.copy(f,newf)

In [5]:


    ### compute parent folder as absolute path
    parent_folder = os.path.abspath(parent_folder)
    
    ### find out all image subfolders in parent_folder
    folder_names = next(os.walk(parent_folder))[1] 
    
    ### exclude folders in exclude_folder
    folder_names = [g for g in folder_names if not g in exclude_folder ]

    for folder_name in tqdm(folder_names):
        if not folder_name in exclude_folder:
            folder_path = os.path.join(parent_folder, folder_name)

            ### for every gastruloid, generate model folder and the trainingset
            initialize_model_folder(folder_path, dN=dN, start=start, combine=combine_subfolders)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))


