Requirements: 
* Matlab 
* Matlab's Image Processing Toolbox
* Matlab's Optimization Toolbox

Clone Airlocalize from its github repo:  
In terminal (in your selected dir), type:

``` 
git clone git@github.com:timotheelionnet/AIRLOCALIZE.git
```

In [1]:
from glob import glob
import os

In [3]:
import sys
import os
import numpy as np
import csv
import copy
from scipy import spatial

import pandas as pd

## Define dirs

In [2]:
input_dir = 'PATH/TO/IMAGES'
input_dir_embryos = 'PATH/TO/IMAGES1'

In [None]:
# dir where airlocalize
air_dir = 'PATH/TO/DIR/AIRLOCALIZE'

In [None]:
# User choice
analysis_path = 'PATH/TO/SET'

## Simulated Images
for accuracy comparison

### Setup config files
for the grid search

In [None]:
## Assuming that the structure is like the "Selected_simulation" dir, where images are in sub dirs
input_dirs = glob(os.path.join(input_dir, '*'))

#### Read example config file

In [None]:
org_conf_file = open(os.path.join(air_dir, 'examples/3DsmFISH_humCells.ini'), 'r')
lines = org_conf_file.readlines()
org_conf_file.close()

#### Get lines to change

In [None]:
lines[2], lines[5], lines[13], lines[18], lines[23], lines[37], lines[48], lines[51], lines[75], lines[101]

In [None]:
d = {   ### constant:
    "fileProcessingMode": 13,
    "inclusionString": 18,
    "exclusionString": 23,
    "maxSpots": 51,
    "outputSpotsImage": 75,
    ## dirs:
    "dataFileName": 2,
    "saveDirName": 5,
    "psfSigma": 37,
    ## grid search
    "threshLevel": 48,
    "minDistBetweenSpots": 101} 

In [None]:
# Set constant line values

lines[d["fileProcessingMode"]] = 'fileProcessingMode=batch\n'
lines[d["inclusionString"]] = 'inclusionString=.tif\n'
lines[d["exclusionString"]] = 'exclusionString=3000spots\n'
lines[d["maxSpots"]] = 'maxSpots=400\n'
lines[d["outputSpotsImage"]] = 'outputSpotsImage=0\n'

In [None]:
# Values to set grid search:

thrs = [4,5,6,7,8,9,10,11,12,13]
dists = [1,2,3]

In [None]:
conf_path_simul = 'config_files_simulated'
os.makedirs(os.path.join(analysis_path, conf_path_simul))

In [None]:
results_path_simul = 'results_simulated'
os.makedirs(os.path.join(analysis_path, results_path_simul))

#### create files

In [None]:
for i,p in enumerate(input_dirs):

    lines[d["dataFileName"]] = f'dataFileName={p}\n'
        
    sig_xy = p.split('Sigxy ')[1].split(' SigZ')[0]
    sig_xy = sig_xy.replace('pt','.') if len(sig_xy)>1 else sig_xy
    
    sig_z = p.split('SigZ ')[1]
    
    lines[d["psfSigma"]] = f'psfSigma={sig_xy},{sig_z}\n'
    
    for thr in thrs:
        for dist in dists:
            
            lines[d["saveDirName"]] = f'saveDirName={results_path_simul}/{os.path.basename(p)}/{thr}_{dist}\n'

            lines[d["threshLevel"]] = f'threshLevel={thr}\n'
            lines[d["minDistBetweenSpots"]] = f'minDistBetweenSpots={dist}\n'
            
            out_file = open(os.path.join(conf_path,f'{i}_{thr}_{dist}.ini'), "w")
            out_file.writelines(lines)
            out_file.close()

### Run on Matlab:

run matlab (can also run `matlab --nodesktop`) from the config file directory.  
in matlab :  

```
addpath(genpath(PATH/TO/AIRLOCALIZE));

conf_files = dir;

files = strings;

for c = 3:size(conf_files)
    nam = conf_files(c).name
    startt = tic
    AIRLOCALIZE(nam);
    files(c-2) = nam;
    times(c-2) = toc(startt);
end

T = table(files, times);
writetable(T,'timetable.txt');


### Analyze Results

In [None]:
# Leo's code:

# This function compares two arrays:
# Unmod = ground truth array
# More_than = detections from one of the programs

#Function checks if points in More_than are close/match points in GT array (under certain distance)

#Returns: 
# # of undetected ground truth points
# # spurious detections
# and average distance between detection and associated points

def profile_detections(unmod, more_than):

    min_dist = 2

    distance_arr = []

    removedItems = True
    euc_dist = 0

    while (removedItems and len(more_than) != 0 and len(unmod) != 0 ):
        #print("loop")

        minDist = 10000
        minIndexUnmod = -1
        minIndexMore_Than = -1
        counter = 0
        kd_copy = copy.deepcopy(more_than)
        kdtree = spatial.KDTree(kd_copy)

        for item in unmod:
            distance,index = kdtree.query(item) # a new KD tree is made
            if ( distance < minDist ):
                minDist = distance
                minIndexUnmod = counter
                minIndexMore_Than = index
                #print(minDist, counter, item)
            counter = counter + 1

        if ( minDist < min_dist): # if less than min dist
            more_than = np.delete(more_than, minIndexMore_Than, axis = 0 ) # delete mod ind
            unmod = np.delete(unmod,minIndexUnmod, axis = 0) #delete unmod ind
            #print(len(more_than),distance) # sanity checkd
            removedItems = True
            distance_arr.append(minDist) # if we want to extrat stat ig

        else:
            removedItems = False
    if (len(distance_arr) >0):
        euc_dist = np.mean(np.asarray(distance_arr))
        
    return(len(unmod), len(more_than), euc_dist)

In [None]:
all_results_paths = glob(os.path.join(results_path_simul,"*","*","*.loc4"))

In [None]:
# Get images in the format of "orgdir/orgname"
all_results_names = ["/".join([p for idx,p in enumerate(r.split('/')) if idx in [1,3]]) for r in all_results]

In [None]:
unique_names = list(set(all_results_names))

In [None]:
len(unique_names), len(all_results_names)

In [None]:
result_files_sorted = []

for n in unique_names:

    name_results = [all_results[i] for i,x in enumerate(all_results_names) if x == n]
    result_files_sorted.append(name_results)

#### Run comparison to GT

In [None]:
best_results_files = []
best_results = []


for files in result_files_sorted:
    
    exmpl_file = files[0].split('/')
    
    gt_path = os.path.join(input_dir, exmpl_file[1], exmpl_file[3][:-1])
    
    if not os.path.exists(gt_path):
        print('not found', gt_path)
        continue

    gt_spots = pd.read_csv(gt_path, sep = "\s+", header=None)
    gt_spots = gt_spots.to_numpy()[:,:-1] - np.array([0.5,0.5,1])
    
    print(files[0])
    
    best = False
    
    for r_f in files:
        
        df = pd.read_csv(r_f, sep="\t")
        
        detected_spots = df[["x_in_pix","y_in_pix","z_in_pix"]].to_numpy()
        detected_spots = detected_spots-[0.5,0.5,1]

        diff_results = list(profile_detections(gt_spots, detected_spots))
        
        # format diff results as [nspots(GT), FN, FP, euc_dist]
        n_spots = int(gt_path.split("spots")[0].split("_")[-1])
        diff_results.insert(0, n_spots)
        
        if diff_results[1]==0 and diff_results[2]==0:
            
            best = diff_results
            best_file = r_f
            break
        
        elif not best or (best[1]+best[2])>(diff_results[1]+diff_results[2]):
            best = diff_results
            best_file = r_f
    
    best_results.append(best)
    best_results_files.append(best_file)
        

In [None]:
zipped = list(zip(best_results_files, best_results))

In [None]:
with open(os.path.join(analysis_path, 'airlocalize_results_simul.txt', 'w') as f:
    for r in best_results:
        if isinstance(r,list):
            f.write(",".join(str(it) for it in r) + '\n')

In [None]:
with open(os.path.join(analysis_path, 'airlocalize_result_simul_best_files.txt', 'w') as f:
    for fi in best_results_files:
        if isinstance(fi,str):
            f.write(fi + '\n')

## Embryo images
to analyse execution time

In [None]:
input_files = glob(os.path.join(input_dir_embryos, "*"))

### Setup config files
for the grid search

#### Read example config file

In [None]:
org_conf_file = open(os.path.join(air_dir, 'examples/3DsmFISH_humCells.ini'), 'r')
lines = org_conf_file.readlines()
org_conf_file.close() 

#### Get lines to change

In [None]:
d = {   ### constant:
    "fileProcessingMode": 13,
    "inclusionString": 18,
    "exclusionString": 23,
    "maxSpots": 51,
    "outputSpotsImage": 75,
    ## dirs:
    "dataFileName": 2,
    "saveDirName": 5,
    "psfSigma": 37,
    ## grid search
    "threshLevel": 48,
    "minDistBetweenSpots": 101} 

In [None]:
#lines[d["fileProcessingMode"]] = 'fileProcessingMode=batch\n'
lines[d["inclusionString"]] = 'inclusionString=.tif\n'
#lines[d["exclusionString"]] = 'exclusionString=3000spots\n'
#lines[d["maxSpots"]] = 'maxSpots=5000\n'
lines[d["outputSpotsImage"]] = 'outputSpotsImage=0\n'

In [None]:
conf_path_embryos = 'config_files_embryos'
os.makedirs(os.path.join(analysis_path, conf_path_embryos))

In [None]:
results_path_embryos = 'results_simulated'
os.makedirs(os.path.join(analysis_path, results_path_embryos))

In [None]:
# Values to set grid search:

thrs = [700, 750, 800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200]
dists = [1,2,3]

### create files

In [None]:
for i,p in enumerate(input_files):

    lines[d["dataFileName"]] = f'dataFileName={p}\n'
        
#     sig_xy = p.split('Sigxy ')[1].split(' SigZ')[0]
#     sig_xy = sig_xy.replace('pt','.') if len(sig_xy)>1 else sig_xy
    
#     sig_z = p.split('SigZ ')[1]
    
    sig_xy = 1
    sig_z = 1.4
    
    str_f = os.path.basename(p)[:-4]
    
    lines[d["psfSigma"]] = f'psfSigma={sig_xy},{sig_z}\n'
    
    for thr in thrs_per_file[i]:
        for dist in dists:
            
            lines[d["saveDirName"]] = f'saveDirName={results_path_embryos}/{thr}_{dist}\n'

            lines[d["threshLevel"]] = f'threshLevel={thr}\n'
            lines[d["minDistBetweenSpots"]] = f'minDistBetweenSpots={dist}\n'
            
            out_file = open(os.path.join(conf_path,f'{str_f}_{thr}_{dist}.ini'), "w")
            out_file.writelines(lines)
            out_file.close()

### Run on Matlab
the same way as written above for simulated dat

### Analyse Results

In [None]:
all_results_paths = glob(os.path.join(results_path_embryos,"*","*","*.loc4"))

In [None]:
# For comparison of similar number of points as RSFISH found for each embryo, load the data from RSFISH
# Assuming that more points take longer to analyse

df_rs = pd.read_csv('PATH_TO/RSFISH_embryos_npoints_and_times.csv')

In [None]:
names = df_rs.name.values

In [None]:
lowest_files = []

for i,n in enumerate(names):
    gt_nspots = df_rs.at[i,"n_spots"]
    
    files = [r for r in all_results_paths if n in r]
#     print(len(files))

    lowest_abs = 100000
    
    for ff in files:
        df1 = pd.read_csv(ff)
        nspots_air = df1.shape[0]
        if nspots_air==0:
            os.remove(ff)
        else:
            abs_spots = abs(nspots_air - gt_nspots)
            #print(ff.split("/")[1], nspots_air, abs_spots)
            if abs_spots < lowest_abs:
                lowest_abs = abs_spots
                lowest_file = ff
        
    lowest_files.append(lowest_file)


#### Get times

In [None]:
time_file = [os.path.join(conf_path_embryos,'timetable.txt') for d in dirs]

In [None]:
df = pd.read_csv(time_file)

df_time = df[df.index.str.contains("files")]
df_time["times"] = df[df.index.str.contains("times")][0].values

df_times = df_times.reset_index()

In [None]:
df = pd.DataFrame()

for i,f in enumerate(lowest_files):
    
    fn = f.split('/')[-1][:-5] + "_" + f.split('/')[1] + '.ini'
    
    df.at[i,"time"] = df_times[df_times[0]==fn]["times"].values[0]
    df.at[i,"name"] = "_".join((df_times[df_times[0]==fn][0].values[0]).split('_')[:-2])
    
    n_spots = pd.read_csv(f).shape[0]
    df.at[i,"n_spots"] = int(n_spots)
    

In [None]:
df.to_csv(os.path.join(analysis_path, 'airlocalize_embryos_time.csv', index=False)