# Image Identification using "pyimagej"

In [1]:
from FIJI_Marco_For_Particle_Identification import process_NF_particles as pnp
import glob
import multiprocessing as mp
import numpy as np 
import os
import pandas as pd
import scipy.io as sio
import shutil
import time 
import gc

## Section under development 

In [12]:
# parallelization code set
# ~~~~~~~~~~~~~~~~~~"Create  the file architecture 
# add the main path 

mainPath  = 'D:/binary_flock_density_manipulation/density_0.52_date_28.02.2022'; 
# add the last slash as well
experimentPath = glob.glob( os.path.normpath(mainPath)+'\*_files');

# add the Images 
experimentPath = [s + '\\Images\\' for s in experimentPath];
experimentPath = [s .replace('\\','/') for s in experimentPath];

results = [];

for Path in experimentPath[0:1]:

    # ~~~~~~~~~~~ Count the total number of frames within the "Image" folder ~~~~~~~~~~~~~
    Total_Frames = len(glob.glob(Path +'*.tif'));
    if (os.path.normpath(glob.glob(Path +'*.tif')[-1]).split(os.sep)[-1] == 'ImageBackground.tif'):
        Total_Frames -= 1;
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~ read the total number of images ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    N  = 18; # specify the number of workers ... do this judiciously  
    Ram_size = 18;
    memory_allocation = "-Xmx" + str(int(Ram_size/N)) + "g"; # N*memory alloc. not exceed the RAM size-1
    pool = mp.Pool(processes=N);
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    #~~~~~~~~~~~~~~~~create the chunk firsts and lasts (tested robust) ~~~~~~~~~~~~~~~~~~~
    Chunk = (Total_Frames/N);
    frame_start = [];
    frame_finish =[];
    inp_for_starmap =[];

    for i in range(N):
        frame_start.append(int(np.floor(i*Chunk)+1));
        frame_finish.append(int(np.floor((i+1)*Chunk+1)));
        inp_for_starmap.append(tuple([Path,frame_start[i],frame_finish[i],memory_allocation]));

    #~~~~~~~~~~~~~create a "main" function for running the parallization code~~~~~~~~~~~~~
    #~~~~ the __main__ function is not necessary on mac and linux .. window 's crap!~~~~~~

    if __name__=="__main__":

        # ~~~~~~~~~~~~~~~~~~~ create a temporary folder in C: Drive ~~~~~~~~~~~~~~~~~~~~~~
        os.mkdir('C:/Temporary')

        start = time.perf_counter();    
        # creating N processes and executing them return the location of the file where the array is stored
        results = pool.starmap(pnp, inp_for_starmap);

        # measure the run time
        finish = time.perf_counter();

        # closing the pool is very important!! creates problem with multiple running 
        pool.close();
        
        # concate the arrays into a single master array
        master = np.memmap('C:/Temporary/master.array', dtype='float32', mode='w+', shape = (1,4));
        
        for i in results:
            # thread  a chunk ...
            chunk_data = np.memmap(i[0], dtype='float32', mode='r', shape = (i[1],4));
            
            # measuer the sizes of the mster and the chunk.
            master_len = np.shape(master)[0];
            chunk_len = np.shape(chunk_data)[0];
            
            # allocate more size to master
            master = np.memmap('C:/Temporary/master.array', dtype='float32', mode='r+', shape = ((master_len + chunk_len-1),4));
            
            # copy the chunk into master
            master[master_len:,:] = np.array(chunk_data[1:,:]);
        
        
        # remove the first line of the master
        master = master[1:,:];
        
        # Transform the path name to the position for saving the .mat output
        parts = os.path.normpath(Path).split(os.sep)[:-1];
        parts.extend(['Analysis','1_Positions','Positions_NF.mat']);
        mat_file_name = '/'.join(parts);
        
        # Save the data in a .mat file 
        dictn = {"XYF":master};
        sio.savemat(mat_file_name, dictn); 
        
        # remove the large datasets from the memory and collect garbage 
        del(results);
        del(master);
        del(dictn);
        del(chunk_data)
        gc.collect();
        print(" time taken by",Path," is ", round((finish - start)/60),"minutes");
        
        # delete the temprorary folder in C: drive
        shutil.rmtree('C:/Temporary');

 time taken by D:/binary_flock_density_manipulation/density_0.52_date_28.02.2022/13h45_2x2x_5exp_150V_200fps_files/Images/  is  1 minutes


# Legacy sections 

In [None]:
# parallelization code set
# ~~~~~~~~~~~~~~~~~~"Create  the file architecture 
# add the main path 

mainPath  = 'D:/binary_flock_density_manipulation/density_1.00_date_11.3.2022'; 
# add the last slash as well
experimentPath = glob.glob( os.path.normpath(mainPath)+'\*_files');

# add the Images 
experimentPath = [s + '\\Images\\' for s in experimentPath];
experimentPath = [s .replace('\\','/') for s in experimentPath];

results = [];

for Path in experimentPath:

    # ~~~~~~~~~~~ Count the total number of frames within the "Image" folder ~~~~~~~~~~~~~
    Total_Frames = len(glob.glob(Path +'*.tif'));
    if (os.path.normpath(glob.glob(Path +'*.tif')[-1]).split(os.sep)[-1] == 'ImageBackground.tif'):
        Total_Frames -= 1;
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~ read the total number of images ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    N  = 8; # specify the number of workers ... do this judiciously  
    Ram_size = 24;
    memory_allocation = "-Xmx" + str(int (Ram_size/N)) + "g"; # N*memory alloc. not exceed the RAM size-1
    pool = mp.Pool(processes=N);
    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    #~~~~~~~~~~~~~~~~create the chunk firsts and lasts (tested robust) ~~~~~~~~~~~~~~~~~~~
    Chunk = (Total_Frames/N);
    frame_start = [];
    frame_finish =[];
    inp_for_starmap =[];

    for i in range(N):
        frame_start.append(int(np.floor(i*Chunk)+1));
        frame_finish.append(int(np.floor((i+1)*Chunk+1)));
        inp_for_starmap.append(tuple([Path,frame_start[i],frame_finish[i],memory_allocation]));

    #~~~~~~~~~~~~~create a "main" function for running the parallization code~~~~~~~~~~~~~
    #~~~~ the __main__ function is not necessary on mac and linux .. window 's crap!~~~~~~

    if __name__=="__main__":

        # ~~~~~~~~~~~~~~~~~~~ create a temporary folder in C: Drive ~~~~~~~~~~~~~~~~~~~~~~
        os.mkdir('C:/Temporary')

        start = time.perf_counter();    
        # creating N processes and executing them
        results = pool.starmap(pnp, inp_for_starmap);
        
        # convert every float64 to float32
        for c in range(N):
            results[c][results[c].select_dtypes(np.float64).columns] = results[c].select_dtypes(np.float64).astype(np.float32);

        # measure the run time
        finish = time.perf_counter();

        # delete the temprorary folder in C: drive
        shutil.rmtree('C:/Temporary');
        
        # closing the pool is very important!! creates problem with multiple running 
        pool.close()

        # Transform the path name to the position for saving the .mat output
        parts = os.path.normpath(Path).split(os.sep)[:-1];
        parts.extend(['Analysis','1_Positions','Positions_NF.mat']);
        mat_file_name = '/'.join(parts);

        # Save the data in a .mat file 
        dictn = {"XYF":np.concatenate(results)};
        sio.savemat(mat_file_name, dictn); 
        
        # remove the large datasets from the memory and collect garbage 
        del(results);
        del(dictn);
        gc.collect(generation = 2);
        gc.collect(generation = 1);
        gc.collect(generation = 0);
        gc.collect();
        print(" time taken by",Path," is ", round((finish - start)/60),"minutes");