### Tutorial to setup parameter dictionary for large-scale inference using the 3D U-Net.
Assumes that training, evaluation, and stitching and blending of images is completed through this pipeline.

In [None]:
#import necessary modules
import os, sys, shutil
#navigate one folder back up to be in the main repo - can do this a number of ways
os.chdir("/jukebox/wang/zahra/python/lightsheet_py3")
print(os.getcwd())
import argparse   
from tools.utils.io import load_kwargs
from tools.conv_net.utils.preprocessing.preprocess import get_dims_from_folder, make_indices, make_memmap_from_tiff_list, generate_patch, reconstruct_memmap_array_from_tif_dir
from tools.conv_net.utils.postprocessing.cell_stats import calculate_cell_measures, consolidate_cell_measures
from tools.conv_net.utils.preprocessing.check import check_patchlist_length_equals_patches    
import pandas as pd, numpy as np

Setup main run function for CPU-based pre- and post-processing.

In [None]:
def main(**args):
    
    #args should be the info you need to specify the params
    # for a given experiment, but only params should be used below
    params = fill_params(**args)
    
    if params["stepid"] == 0:
        #######################################PRE-PROCESSING FOR CNN INPUT --> MAKING INPUT ARRAY######################################################
        
        #make directory to store patches
        if not os.path.exists(params["data_dir"]): os.mkdir(params["data_dir"])
    	#save params to .csv file
        save_params(params, params["data_dir"])
        
        #convert full size data folder into memmap array
        make_memmap_from_tiff_list(params["cellch_dir"], params["data_dir"], 
                                               params["cores"], params["dtype"], params["verbose"])
            
    elif params["stepid"] == 1:
        #######################################PRE-PROCESSING FOR CNN INPUT --> PATCHING###################################################
        
        #generate memmap array of patches
        patch_dst = generate_patch(**params)
        sys.stdout.write("\nmade patches in {}\n".format(patch_dst)); sys.stdout.flush()
        
    elif params["stepid"] == 11:
        #######################################CHECK TO SEE WHETHER PATCHING WAS SUCCESSFUL###################################################
        
        #run checker
        check_patchlist_length_equals_patches(**params)
        sys.stdout.write("\nready for inference!"); sys.stdout.flush()

    elif params["stepid"] == 21:
        ####################################POST CNN --> INITIALISING RECONSTRUCTED ARRAY FOR ARRAY JOB####################################
        
        sys.stdout.write("\ninitialising reconstructed array...\n"); sys.stdout.flush()
        np.lib.format.open_memmap(params["reconstr_arr"], mode="w+", shape = params["inputshape"], dtype = params["dtype"])
        sys.stdout.write("done :]\n"); sys.stdout.flush()

    elif params["stepid"] == 2:
        #####################################POST CNN --> RECONSTRUCTION AFTER RUNNING INFERENCE ON TIGER2#################################
        
        #reconstruct
        sys.stdout.write("\nstarting reconstruction...\n"); sys.stdout.flush()
        reconstruct_memmap_array_from_tif_dir(**params)
        if params["cleanup"]: shutil.rmtree(params["cnn_dir"])

    elif params["stepid"] == 3:
        ##############################################POST CNN --> FINDING CELL CENTERS#####################################################   
        
        save_params(params, params["data_dir"])
        
        #find cell centers, measure sphericity, perimeter, and z span of a cell
        csv_dst = calculate_cell_measures(**params)
        sys.stdout.write("\ncell coordinates and measures saved in {}\n".format(csv_dst)); sys.stdout.flush()
        
    elif params["stepid"] == 4:
        ##################################POST CNN --> CONSOLIDATE CELL CENTERS FROM ARRAY JOB##############################################
        
        #part 1 - check to make sure all jobs that needed to run have completed; part 2 - make pooled results
        consolidate_cell_measures(**params)

##### Parameter dictionary setup function.
For the pre-processing parameters:
1. "float32" is the recommeded data type for `params["dtype"]`.
2. Modify `params["cores"]` accordingful to local desktop or cluster capabilities (6 or higher recommeded).
3. `params["cleanup"]` boolean deletes memmory mapped arrays after patches have been generated. Recommend `False` for testing.
4. `params["patchsz"]` is entirely dependent on the image dimensions. Recommend 60 planes in the z dimension and dividing the `x` and `y` dimensions by 2 and adding 32 for best performance for 4x or tiled LBVT data and adding 32 to the `x` and `y` dimensions for single-tiled images.
5. Specify window used for inference in `params["window"]`. This does not get used but is saved to the parameter dictionary copy generated in the lightsheet data directory.

The post-processing parameters can also be changed after pre-processing. Note:
1. `params["threshold"]` is important; it is the threshold by which cells will be segmented and centers generated. This can be determined by sweeping for the threshold in your validation data during training and picking the threshold with the best performance.
2. `params["zsplt"]` and `params["ovlp_plns"]` relates to the number of planes considered at a time for 3D connected component analysis. Recommend 30 for both for 4x LBVT cellular resolution data for memory efficiency and best performance.

In [None]:
def fill_params(expt_name, stepid, jobid):

    params = {}

    #slurm params
    params["stepid"]        = stepid
    params["jobid"]         = jobid 
    
    #experiment params
    params["expt_name"]     = os.path.basename(os.path.abspath(os.path.dirname(expt_name))) #going one folder up to get to fullsizedata
        
    #find cell channel tiff directory from parameter dict
    kwargs = load_kwargs(os.path.dirname(expt_name))
    print("\n\n loading params for: {}".format(os.path.dirname(expt_name)))
    vol = [vol for vol in kwargs["volumes"] if vol.ch_type == "cellch"][0]
    src = vol.full_sizedatafld_vol
    assert os.path.isdir(src), "nonexistent data directory"
    print("\n\n data directory: {}".format(src))
    
    params["cellch_dir"]    = src
    params["scratch_dir"]   = "/jukebox/scratch/zmd" #whatever path you are saving patches and reconstructed arrays to to
    params["data_dir"]      = os.path.join(params["scratch_dir"], params["expt_name"])
    
    #changed paths after cnn run
    params["cnn_data_dir"]  = os.path.join(params["scratch_dir"], params["expt_name"])
    params["cnn_dir"]       = os.path.join(params["cnn_data_dir"], "output_chnks") #set cnn patch directory
    params["reconstr_arr"]  = os.path.join(params["cnn_data_dir"], "reconstructed_array.npy")
    params["output_dir"]    = expt_name
    
    #pre-processing params
    params["dtype"]         = "float32"
    params["cores"]         = 8
    params["verbose"]       = True
    params["cleanup"]       = False
    
    params["patchsz"]       = (60, 3840, 3328) #cnn window size for lightsheet = typically 20, 192, 192 for 4x, 20, 32, 32 for 1.3x
    params["stridesz"]      = (40, 3648, 3136) 
    params["window"]        = (20, 192, 192)
    
    params["inputshape"]    = get_dims_from_folder(src)
    params["patchlist"]     = make_indices(params["inputshape"], params["stridesz"])
    
    #post-processing params
    params["threshold"]     = (0.45,1) #h129 = 0.6; prv = 0.48
    params["zsplt"]         = 30
    params["ovlp_plns"]     = 30
        
    return params

After pre- and post-processing the data for cell detection, the code saves out the parameters you picked per sample as a `.csv` in the `3dunet_output` folder in the main data directory.

In [None]:
def save_params(params, dst):
    """ 
    save params in cnn specific parameter dictionary for reconstruction/postprocessing 
    can discard later if need be
    """
    (pd.DataFrame.from_dict(data=params, orient="index").to_csv(os.path.join(dst, "cnn_param_dict.csv"),
                            header = False))
    sys.stdout.write("\nparameters saved in: {}".format(os.path.join(dst, "cnn_param_dict.csv"))); sys.stdout.flush()

Run locally for testing.

In [None]:
#run
if __name__ == "__main__":
    
    #init
    args = {}
    
    args["stepid"] = 0 #Step ID to run patching, reconstructing, or cell counting
    args["jobid"] = 0 #Job ID to run as an array job, useful only for step 1,2,3; see slurm files for more info
    args["expt_name"] = "/jukebox/wang/pisano/tracing_output/antero_4x/20160823_tp_bl6_cri_500r_02/lightsheet" #Tracing output directory (aka registration output)
    
    main(**args)