# ClearMap2 parameter sweep on annotated volumes

The goal of this notebook is to perform a parameter sweep on a folder of tiff files that users have annotated and saved those annotations as RoiSet.zip files.

## Set up directories and imports

In [None]:
path_to_cm2 = '/home/emilyjanedennis/Desktop/GitHub/rat_BrainPipe/ClearMap2
src = '/Users/emilydennis/Desktop/'
ann_folder = os.path.join(src,'mesospim_anns')

In [3]:
# imports 
import os,sys,json,glob,shutil
import numpy as np
import matplotlib.pyplot as plt
import tifffile as tif
import pandas as pd
import seaborn as sns
from scipy.spatial import distance
sys.path.append(path_to_cm2)

## Define functions

In [54]:
def convert_tiff_to_npy(src,ann_folder,this_ann_folder):
    file_loc = os.path.join(src,ann_folder,this_ann_folder)
    imported_tiff = tif.imread(file_loc)
    np.save(file_loc[0:-4] + '.npy',imported_tiff)

In [None]:
def get_ann_vals_in_np(src, ann_folder,ann_file):
    ann_vals=[]
    these_anns = os.listdir(os.path.join(src,ann_folder,ann_file))
    for j in np.arange(0,len(these_anns)):
        z = int(these_anns[j][0:4])
        y = int(these_anns[j][5:9])
        x = int(these_anns[j][10:14])
        ann_vals.append([z,y,x])
return ann_vals

In [80]:
def pairwise_distance_metrics_given_cdists(
        ground_truth, predicted, y, cutoff=10, verbose=True):
    """
    Function to calculate the pairwise distances
    between two lists of zyx points.

    Inputs:
    -------
    ground_truth, predicted: each iterable
    consisting of ndimensional coordinates.
    y: matrix of distances between all
    elements of ground truth and predicted

    Returns:
    -------
    paired: list of [ground_truth"s index
    (from input list), predicted"s index (from input list), distance]
    tp,fp,fn: statistics on true positives,
    false positives, and false negatives.
    """
    # only keep those distances that are below the cutoff!
    truth_indices, pred_indices = np.where(y <= cutoff)
    dists = zip(y[truth_indices, pred_indices], truth_indices, pred_indices)

    # sort by smallest dist
    dists = sorted(dists, key=lambda x: x[0])

    used_truth = set()
    used_pred = set()
    paired = []
    for (i, dist) in enumerate(dists):
        d = dist[0]
        if d > cutoff:
            # we have reached distances beyond the cutoff
            break
        truth_idx = dist[1]
        pred_idx = dist[2]
        if truth_idx not in used_truth and pred_idx not in used_pred:
            paired.append((truth_idx, pred_idx, d))
            used_truth.add(truth_idx)
            used_pred.add(pred_idx)
        if len(used_truth) == len(ground_truth) or len(used_pred) == len(predicted):
            # we have used up all the entries from the shorter list
            break

    tp = len(paired)
    fn = len(ground_truth) - len(paired)
    fp = len(predicted) - len(paired)
    if verbose:
        print("TP: {}, FP: {}, FN: {}".format(tp, fp, fn))
    # print(paired)

    if verbose:
       plt.hist([xx[2] for xx in paired], bins=np.max((int(len(paired)/500), 10)))
       plt.title("Histogram of distances - pixel or microns")

    return paired, tp, fp, fn

In [81]:
def filter_cells(source, thresholds):
  """Filter a array of detected cells according to the thresholds.
  
  Arguments
  ---------
  source : str, array or Source
    The source for the cell data.
  sink : str, array or Source
    The sink for the results.
  thresholds : dict
    Dictionary of the form {name : threshold} where name refers to the 
    column in the cell data and threshold can be None, a float 
    indicating a minimal threshold or a tuple (min,max) where min,max can be
    None or a minimal and maximal threshold value.
  
  Returns
  -------
  sink : str, array or Source
    The thresholded cell data.
  """
  
  ids = np.ones(source.shape[0], dtype=bool);
  for k,t in thresholds.items():
    if t:
      if not isinstance(t, (tuple, list)):
        t = (t, None);
      if t[0] is not None:
        ids = np.logical_and(ids, t[0] <= source[k])
      if t[1] is not None:
        ids = np.logical_and(ids, t[1] > source[k]);
  cells_filtered = source[ids];
  return cells_filtered


In [None]:
def pairwise_distance_metrics(ground_truth, predicted, cutoff=10, verbose=True):
    """
    Function to calculate the pairwise distances
    between two lists of zyx points.

    Inputs:
    -------
    ground_truth, predicted: each iterable consisting of
    ndimensional coordinates.

    Returns:
    -------
    paired: list of [ground_truth"s index (from input list),
    predicted"s index (from input list), distance]
    tp,fp,fn: statistics on true positives, false positives,
    and false negatives.
    """

    if verbose:
        print("\nCalculating pairwise distances...")
    y = distance.cdist(ground_truth, predicted, metric="euclidean")
    return pairwise_distance_metrics_given_cdists(
       ground_truth, predicted, y, cutoff, verbose)

## Prepare data for cm2

In [15]:
listoftiffs = [f for f in os.listdir(ann_folder) if f.find('.tif') != -1]
listofanns = [f for f in os.listdir(ann_folder) if f.find('.RoiSet') != -1]

In [78]:
# import annotation volumes and reformat for CM2 if in tiff form

In [56]:
for i in np.arange(0,len(listoftiffs)):
    convert_tiff_to_npy(src,ann_folder,listoftiffs[i])

TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'
TiffPage 0: TypeError: read_bytes() missing 3 required positional arguments: 'dtype', 'count', and 'offsetsize'


In [77]:
listofnpys = [f for f in os.listdir(ann_folder) if f.find('.npy') != -1]

## Set up ClearMap2: imports and directories

In [None]:
import ClearMap.IO.Workspace as wsp
import ClearMap.IO.IO as io
import ClearMap.ImageProcessing.Experts.Cells as cells
from itertools import product

In [None]:
directory = ann_folder
ws = wsp.Workspace('CellMap', directory=directory);

## Define CM2 parameters to sweep over

In [79]:
# Here we list the parameter ranges to sweep over.
background_sizes = [5,7,9] # will make xy tuples like: (5,5), (7,7), (9,9)  
shape_threshold_sizes = [100, 150, 200, 250, 300, 350, 400, 450]

In [None]:
# set up basic, shared cell_detection_parameters
cell_detection_parameter = cells.default_cell_detection_parameter.copy()
cell_detection_parameter['illumination'] = None
cell_detection_parameter['intensity_detection']['measure'] = ['source','background']

processing_parameter = cells.default_cell_detection_processing_parameter.copy()
processing_parameter.update(
    processes = 'serial',
    size_max = 100, #35,
    size_min = 30, #30,
    overlap  = 15, #10,
    verbose = True
    )

## CM2 Parameter sweep

In [None]:
for file in listofnpys:
    ws.debug = file
    ws.info()

    for background_size,shape_threshold_size in product(background_sizes,shape_threshold_sizes):
        this_cell_parameter_dict = cell_detection_parameter.copy()
        this_cell_parameter_dict['background_correction']['shape'] = (background_size,background_size)
        this_cell_parameter_dict['shape_detection']['threshold'] = shape_threshold_size
        postfix = f'raw_bck{background_size}_shpthresh{shape_threshold_size}'
        # actually detect cells:
        cells.detect_cells(ws.filename('debug'), ws.filename('cells', postfix=postfix),
            cell_detection_parameter=this_cell_parameter_dict,
            processing_parameter=processing_parameter)

## Calculate fp, fn, precision for each volume, save as data frame

# STOPPED HERE

In [None]:
if filt:
                paired, tp, fp, fn = pairwise_distance_metrics(anns,filt,cutoff=30,verbose=False)
                # add output to other outputs
                precision = tp/(tp+fp)
                recall = tp/(tp+fn)
                f1 = 2*((precision*recall)/(precision+recall))
                datatoadd = file[0:-14], sz, sz2, source, tp, fp, fn, round(precision,2), round(recall,2), round(f1,2)
                alldata.append(datatoadd)
    # when all loops are done, save
    placetosave = os.path.join(trainingfolder,"{}_filt_outputs.npy".format(file[0:-14]))
    np.save(placetosave,alldata)
    df = pd.DataFrame(alldata,columns=['name','sz1','sz2','source','tp','fp','fn','p','r','f1'])
    alldf = pd.concat([alldf,df])
#directories and files
a235 = ['/home/emilyjanedennis/Desktop/cells_raw.npy']
thresholds = {'source' : 3,
      'size'   : (30,120)
      }
filetosave= "/home/emilyjanedennis/Desktop/a235_filtered.npy"
cells.filter_cells(source = a235, sink = filetosave, thresholds=thresholds);

In [30]:
for i in np.arange(0,len(listofanns)):
    ann_vals = get_ann_vals_in_np(src,ann_folder,listofanns[i])
    # do bipartite matching

In [None]:
# maybe filter?
 # set iters you want -- should move this out of loop and to the top 
    sizes = np.arange(10,100,5)
    sizes2 = np.arange(40,140,5)
    alldata=[()]
    # for this iter
    for sz in sizes:
        for sz2 in sizes2:
            size = (sz,sz2)
                #set thresholds
            thresholds = {    
                 'source' : 3,
                'size'   : size}
             #filter cells using defined below (need to import later, but had changed so source/sink = just sink)
            filtd = filter_cells(source = np.load(os.path.join(rawfolder,file)), 
                       thresholds=thresholds);
            filtereddata=[()]
            #reformat filtered data this is hacky and because I'm being lazy, to change
            for l in range(0,np.size(filtd)):
                z, y, x, toss, toss2 = filtd[l]
                vals = x, y, z
                filtereddata.append(vals)
            filt = filtereddata[1:]
            #compare filt to ann
            if filt:
                paired, tp, fp, fn = pairwise_distance_metrics(anns,filt,cutoff=30,verbose=False)
                # add output to other outputs
                precision = tp/(tp+fp)
                recall = tp/(tp+fn)
                f1 = 2*((precision*recall)/(precision+recall))
                datatoadd = file[0:-14], sz, sz2, source, tp, fp, fn, round(precision,2), round(recall,2), round(f1,2)
                alldata.append(datatoadd)
    # when all loops are done, save
    placetosave = os.path.join(trainingfolder,"{}_filt_outputs.npy".format(file[0:-14]))
    np.save(placetosave,alldata)
    df = pd.DataFrame(alldata,columns=['name','sz1','sz2','source','tp','fp','fn','p','r','f1'])
    alldf = pd.concat([alldf,df])
#directories and files
a235 = ['/home/emilyjanedennis/Desktop/cells_raw.npy']
thresholds = {'source' : 3,
      'size'   : (30,120)
      }
filetosave= "/home/emilyjanedennis/Desktop/a235_filtered.npy"
cells.filter_cells(source = a235, sink = filetosave, thresholds=thresholds);


In [33]:
#pd.DataFrame(ann_vals,columns=["z","y","x"])

Unnamed: 0,x,y,z
0,23,63,257
1,7,398,137
2,21,159,373
3,4,278,21
4,4,63,383
...,...,...,...
79,1,387,170
80,14,398,71
81,8,384,197
82,2,75,169
