# 4. QUANTIFICATION 📏📐🧮

----------

## OVERVIEW - Quantify organelle morphometrics
Our goal is to quantify the shape, size, and distribution of individual organelle objects identified through segmentation (Notesbooks 4-10) and the interactions between those organelles, or ***organelle contacts***. Quantification takes advantage of [scikit-image](https://scikit-image.org/) regionprops tools to measure size and shape features of organelles and organelle contacts and the *MeasureObjectIntensityDistribution* framework from [CellProfiler](https://cellprofiler.org/) to measure organelle distribution.

This notebook sets up the logic of our quantification functions that quantify the following:
1. Amount, size, and shape of individual organelles
2. Amount, size, and shape of organelle contacts
3. Distribution of organelles or contact in XY and Z, separately

## IMPORTS

In [2]:
# top level imports
from pathlib import Path
import os, sys

import parse

import napari

### import local python functions in ../infer_subc
sys.path.append(os.path.abspath((os.path.join(os.getcwd(), '..'))))

from infer_subc.core.file_io import (read_czi_image,
                                        export_inferred_organelle,
                                        import_inferred_organelle,
                                        export_tiff,
                                        list_image_files)

from infer_subc.core.img import *
from infer_subc.utils.stats import *
from infer_subc.utils.stats import (_my_props_to_dict, _assert_uint16_labels)
from infer_subc.utils.stats_helpers import *

from infer_subc.organelles import * 

from infer_subc.constants import (TEST_IMG_N,
                                    NUC_CH ,
                                    LYSO_CH ,
                                    MITO_CH ,
                                    GOLGI_CH ,
                                    PEROX_CH ,
                                    ER_CH ,
                                    LD_CH ,
                                    RESIDUAL_CH )     

import time
%load_ext autoreload
%autoreload 2

[autoreload of skimage.util.apply_parallel failed: Traceback (most recent call last):
  File "c:\Users\Shannon\anaconda3\envs\infer-subc\lib\site-packages\IPython\extensions\autoreload.py", line 273, in check
    superreload(m, reload, self.old_objects)
  File "c:\Users\Shannon\anaconda3\envs\infer-subc\lib\site-packages\IPython\extensions\autoreload.py", line 471, in superreload
    module = reload(module)
  File "c:\Users\Shannon\anaconda3\envs\infer-subc\lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 619, in _exec
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "c:\Users\Shannon\anaconda3\envs\infer-subc\lib\site-packages\skimage\util\apply_parallel.py", line 3, in <module>
    from .._shared.utils import channel_as_last_axis, deprecate_multichannel_kwarg
ImportError: cannot import name 'chann

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Specify directories and image from testing 
Specifically: the input (raw images, segmentation outputs) and output (.csv files with quantification data) directories and a test image set for processing in this notebook.

In [3]:
test_img_n = TEST_IMG_N

data_root_path = Path(os.path.expanduser("~")) / "Documents/Python Scripts/Infer-subc-2D"

raw_data_path = data_root_path / "raw"

im_type = ".czi"


raw_file_list = list_image_files(raw_data_path,im_type)
test_img_name = raw_file_list[test_img_n]

# adding an additional list of image paths for the output files
seg_data_path = data_root_path / "out"
seg_file_list = list_image_files(seg_data_path, "tiff")

out_data_path = data_root_path / "quant"
if not Path.exists(out_data_path):
    Path.mkdir(out_data_path)
    print(f"making {out_data_path}")

In [4]:
img_data, meta_dict = read_czi_image(test_img_name)

channel_names = meta_dict['name']
img = meta_dict['metadata']['aicsimage']
scale = meta_dict['scale']
channel_axis = meta_dict['channel_axis']

  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


Import all segmentation outputs

In [5]:
# masks
nuc_seg = import_inferred_organelle("nuc", meta_dict, seg_data_path)
cell_seg = import_inferred_organelle("cell", meta_dict, seg_data_path)
cyto_seg = import_inferred_organelle("cyto", meta_dict, seg_data_path)
# mask_seg = import_inferred_organelle("masks", meta_dict, seg_data_path)

#organelles
lyso_seg = import_inferred_organelle("lyso", meta_dict, seg_data_path)
mito_seg = import_inferred_organelle("mito", meta_dict, seg_data_path)
golgi_seg = import_inferred_organelle("golgi", meta_dict, seg_data_path)
perox_seg = import_inferred_organelle("perox", meta_dict, seg_data_path)
LD_seg = import_inferred_organelle("LD", meta_dict, seg_data_path)
ER_seg = import_inferred_organelle("ER", meta_dict, seg_data_path)

loaded  inferred 3D `nuc`  from C:\Users\Shannon\Documents\Python Scripts\Infer-subc-2D\out 
loaded  inferred 3D `cell`  from C:\Users\Shannon\Documents\Python Scripts\Infer-subc-2D\out 
loaded  inferred 3D `cyto`  from C:\Users\Shannon\Documents\Python Scripts\Infer-subc-2D\out 
loaded  inferred 3D `lyso`  from C:\Users\Shannon\Documents\Python Scripts\Infer-subc-2D\out 
loaded  inferred 3D `mito`  from C:\Users\Shannon\Documents\Python Scripts\Infer-subc-2D\out 
loaded  inferred 3D `golgi`  from C:\Users\Shannon\Documents\Python Scripts\Infer-subc-2D\out 
loaded  inferred 3D `perox`  from C:\Users\Shannon\Documents\Python Scripts\Infer-subc-2D\out 
loaded  inferred 3D `LD`  from C:\Users\Shannon\Documents\Python Scripts\Infer-subc-2D\out 
loaded  inferred 3D `ER`  from C:\Users\Shannon\Documents\Python Scripts\Infer-subc-2D\out 


In [6]:
# NEED TO DEPRICATE OR UPDATE THIS SO THAT THESE FUNCTIONS DO NOT CREATE A SEGMENTATION BASED ON HARD CODED STUFF

# ###################
# # CELLMASK, NUCLEI, CYTOPLASM, NUCLEUS
# ###################
# nuclei_obj =  get_nuclei(img_data,meta_dict, out_data_path)
# cellmask_obj = get_cellmask(img_data, nuclei_obj, meta_dict, out_data_path)
# cyto_mask = get_cytoplasm(nuclei_obj , cellmask_obj , meta_dict, out_data_path)

## ** ***WIP*** automate reading in segmentation based on raw image name
Make sure it is not affected by differing names in the segmentation output

In [7]:
file_name = source_file.stem
file_name

NameError: name 'source_file' is not defined

In [37]:
str_len = len(file_name)
str_len

23

In [52]:
seg_file_list = list_image_files(out_data_path, "tiff")
segmentaiton_name = seg_file_list[0].stem
segmentaiton_name

'a24hrs-Ctrl_14_Unmixing-cell'

In [56]:
for raw_img in img_file_list:
    
    for img in seg_file_list:
        if img.stem.endswith("cell"):
            cell_path = img
        if img.stem.endswith("nuc"):
            nuc_path = img
    cell_path, nuc_path

(WindowsPath('C:/Users/Shannon/Documents/Python Scripts/Infer-subc-2D/out/a24hrs-Ctrl_14_Unmixing-cell.tiff'),
 WindowsPath('C:/Users/Shannon/Documents/Python Scripts/Infer-subc-2D/out/a24hrs-Ctrl_14_Unmixing-nuc.tiff'))

In [45]:
identifier = segmentaiton_name[23:]
identifier

'-cell'

In [50]:
if identifier.endswith("cell"):
    cell = segmentaiton_name
elif "nuc":
    nuc = segmentaiton_name

In [51]:
cell

'a24hrs-Ctrl_14_Unmixing-cell'

In [49]:
cell = identifier.endswith("cell")
cell

True

-------------------------
## regionprops

`skimage.measure.regionprops` provides tools quantification of amount, size, and shape. It will be utilized to characterize orangelles and organelle contact sites (regions of overlap between organelles).

> Some measurements are not possible in 3D, so lets first see what works on our 3D image: 
>> Note: the names of the regionprops correspond to the 2D analysis even for those which are well defined in 3D.  i.e. "area" is actually "volume" in 3D, etc.

In [8]:
# labels = label(nuc_seg )
# rp = regionprops(labels, intensity_image=img_data[NUC_CH], spacing=scale)

# supported = [] 
# unsupported = []

# for prop in rp[0]:
#     try:
#         rp[0][prop]
#         supported.append(prop)
#     except NotImplementedError:
#         unsupported.append(prop)

# print("Supported properties:")
# print("  " + "\n  ".join(supported))
# print()
# print("Unsupported properties:")
# print("  " + "\n  ".join(unsupported))

Supported properties:
  area
  area_bbox
  area_convex
  area_filled
  axis_major_length
  axis_minor_length
  bbox
  centroid
  centroid_local
  centroid_weighted
  centroid_weighted_local
  coords
  equivalent_diameter_area
  euler_number
  extent
  feret_diameter_max
  image
  image_convex
  image_filled
  image_intensity
  inertia_tensor
  inertia_tensor_eigvals
  intensity_max
  intensity_mean
  intensity_min
  label
  moments
  moments_central
  moments_normalized
  moments_weighted
  moments_weighted_central
  moments_weighted_normalized
  slice
  solidity

Unsupported properties:
  eccentricity
  moments_hu
  moments_weighted_hu
  orientation
  perimeter
  perimeter_crofton


-----------------
## ORGANELLE AMOUNT AND MORPHOLOGY

#### 1. Build the list of measurements we want to include from regionprops

In [7]:
# start with LABEL
properties = ["label"]

# add position
properties = properties + ["centroid", "bbox"]

# add area
properties = properties + ["area", "equivalent_diameter"] # "num_pixels", 

# add shape measurements
properties = properties + ["extent", "feret_diameter_max", "euler_number", "convex_area", "solidity", "axis_major_length", "axis_minor_length"]

# add intensity values (used for quality checks)
properties = properties + ["max_intensity", "mean_intensity", "min_intensity"]

properties

['label',
 'centroid',
 'bbox',
 'area',
 'equivalent_diameter',
 'extent',
 'feret_diameter_max',
 'euler_number',
 'convex_area',
 'solidity',
 'axis_major_length',
 'axis_minor_length',
 'max_intensity',
 'mean_intensity',
 'min_intensity']

#### 2. Add additional measurements as "extra_properties" with custom functions.

In [8]:
# create a function to include the standard deviation of intensities
def _standard_deviation_intensity(region, intensities):
    return np.std(intensities[region])

extra_properties = [_standard_deviation_intensity]

In [9]:
# creating a function to measure the surface area of each object. This function utilizes "marching_cubes" to generate a mesh (non-pixelated object)
def _surface_area_from_props(labels, props):
    # SurfaceArea
    surface_areas = np.zeros(len(props["label"]))
    # TODO: spacing = [1, 1, 1] # this is where we could deal with anisotropy in Z

    for index, lab in enumerate(props["label"]):
        # this seems less elegant than you might wish, given that regionprops returns a slice,
        # but we need to expand the slice out by one voxel in each direction, or surface area freaks out
        volume = labels[
            max(props["bbox-0"][index] - 1, 0) : min(props["bbox-3"][index] + 1, labels.shape[0]),
            max(props["bbox-1"][index] - 1, 0) : min(props["bbox-4"][index] + 1, labels.shape[1]),
            max(props["bbox-2"][index] - 1, 0) : min(props["bbox-5"][index] + 1, labels.shape[2]),
        ]
        volume = volume == lab
        verts, faces, _normals, _values = marching_cubes(
            volume,
            method="lewiner",
            spacing=(1.0,) * labels.ndim,
            level=0,
        )
        surface_areas[index] = mesh_surface_area(verts, faces)

    return surface_areas

#### 3. Run regionprops and export values as a pandas dataframe

In [10]:
rp = regionprops(label_image=nuc_seg,intensity_image=img_data[0], extra_properties=extra_properties)

props = _my_props_to_dict(rp, nuc_seg, intensity_image=img_data[0], properties=properties, extra_properties=extra_properties)

props_table = pd.DataFrame(props)

props_table.rename(columns={"area": "volume"}, inplace=True)

In [11]:
# props["surface_area"] = surface_area_from_props(nuc_seg, props)
surface_area_tab = pd.DataFrame(_surface_area_from_props(nuc_seg, props))

props_table.insert(11, "surface_area", surface_area_tab)

In [12]:
pd.set_option('display.max_columns', None)

props_table

Unnamed: 0,label,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,bbox-2,bbox-3,bbox-4,bbox-5,volume,surface_area,equivalent_diameter,extent,feret_diameter_max,euler_number,convex_area,solidity,axis_major_length,axis_minor_length,max_intensity,mean_intensity,min_intensity,_standard_deviation_intensity
0,1,6.852718,395.342064,352.456152,0,285,243,15,503,462,383089,73113.820312,90.108761,0.534943,241.770552,1,436169,0.878304,263.125917,15.700109,8392.0,2096.299672,0.0,1033.459225


#### 4. Define `get_summary_stats_3D()` function

Based on the _prototyping_ above define the function to quantify amount, size, and shape of organelles.

In [24]:
def _get_regionprops_3D(segmentation_img: np.ndarray, intensity_img, mask: np.ndarray) -> Tuple[Any, Any]:
    """
    Parameters
    ------------
    segmentation_img:
        a 3d np.ndarray image of the segemented organelles
    intensity_img:
        a 3d np.ndarray image of the "raw" florescence intensity the segmentation was based on
    mask:
        a 3d np.ndarray image of the cell mask (or other mask of choice); used to create a "single cell" analysis

    Returns
    -------------
    pandas dataframe of containing regionprops measurements (columns) for each object in the segmentation image (rows) and the regionprops object

    Regionprops measurements included:
    ['label',
    'centroid',
    'bbox',
    'area',
    'equivalent_diameter',
    'extent',
    'feret_diameter_max',
    'euler_number',
    'convex_area',
    'solidity',
    'axis_major_length',
    'axis_minor_length',
    'max_intensity',
    'mean_intensity',
    'min_intensity']

    Additional measurement include:
    ['standard_deviation_intensity',
    'surface_area']
    """
    ###################################################
    ## MASK THE ORGANELLE OBJECTS THAT WILL BE MEASURED
    ###################################################
    # in case we sent a boolean mask (e.g. cyto, nucleus, cellmask)
    input_labels = _assert_uint16_labels(segmentation_img)

    # mask
    input_labels = apply_mask(segmentation_img, mask)

    ##########################################
    ## CREATE LIST OF REGIONPROPS MEASUREMENTS
    ##########################################
    # start with LABEL
    properties = ["label"]

    # add position
    properties = properties + ["centroid", "bbox"]

    # add area
    properties = properties + ["area", "equivalent_diameter"] # "num_pixels", 

    # add shape measurements
    properties = properties + ["extent", "feret_diameter_max", "euler_number", "convex_area", "solidity", "axis_major_length", "axis_minor_length"]

    # add intensity values (used for quality checks)
    properties = properties + ["max_intensity", "mean_intensity", "min_intensity"]

    #######################
    ## ADD EXTRA PROPERTIES
    #######################
    def _standard_deviation_intensity(region, intensities):
        return np.std(intensities[region])

    extra_properties = [_standard_deviation_intensity]

    ##################
    ## RUN REGIONPROPS
    ##################
    rp = regionprops(segmentation_img, intensity_image=intensity_img, extra_properties=extra_properties)

    props = _my_props_to_dict(
        rp, segmentation_img, intensity_image=intensity_img, properties=properties, extra_properties=extra_properties
    )

    props_table = pd.DataFrame(props)
    props_table.rename(columns={"area": "volume"}, inplace=True)

    ##################################################################
    ## RUN SURFACE AREA FUNCTION SEPARATELY AND APPEND THE PROPS_TABLE
    ##################################################################
    surface_area_tab = pd.DataFrame(surface_area_from_props(input_labels, props))

    props_table.insert(11, "surface_area", surface_area_tab)

    ################################################################
    ## ADD SKELETONIZATION OPTION FOR MEASURING LENGTH AND BRANCHING
    ################################################################
    #  # ETC.  skeletonize via cellprofiler /Users/ahenrie/Projects/Imaging/CellProfiler/cellprofiler/modules/morphologicalskeleton.py
    #         if x.volumetric:
    #             y_data = skimage.morphology.skeletonize_3d(x_data)
    # /Users/ahenrie/Projects/Imaging/CellProfiler/cellprofiler/modules/measureobjectskeleton.py

    return props_table, rp

In [25]:
org_img = img_data[0]    
org_obj = nuc_seg
cell_mask = cell_seg

nuc_table, rp = _get_regionprops_3D(org_obj, org_img, cell_mask)

In [26]:
nuc_table

Unnamed: 0,label,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,bbox-2,bbox-3,bbox-4,bbox-5,volume,surface_area,equivalent_diameter,extent,feret_diameter_max,euler_number,convex_area,solidity,axis_major_length,axis_minor_length,max_intensity,mean_intensity,min_intensity,_standard_deviation_intensity
0,1,6.852718,395.342064,352.456152,0,285,243,15,503,462,383089,73113.820312,90.108761,0.534943,241.770552,1,436169,0.878304,263.125917,15.700109,8392.0,2096.299672,0.0,1033.459225


In [27]:
nuc_table.equals(props_table)

True

#### 5. Add prototype function into stats.py

In [28]:
from infer_subc.utils.stats import get_regionprops_3D

nuc_table_TEST, rp = get_regionprops_3D(org_obj, org_img, cell_mask)

In [40]:
nuc_table_TEST

Unnamed: 0,label,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,bbox-2,bbox-3,bbox-4,bbox-5,volume,surface_area,equivalent_diameter,extent,feret_diameter_max,euler_number,convex_area,solidity,axis_major_length,axis_minor_length,max_intensity,mean_intensity,min_intensity,standard_deviation_intensity
0,1,6.852718,395.342064,352.456152,0,285,243,15,503,462,383089,73113.820312,90.108761,0.534943,241.770552,1,436169,0.878304,263.125917,15.700109,8392.0,2096.299672,0.0,1033.459225


#### We will use ordered lists to align the raw data channels with their corresponding segmentation output and name.

In [None]:
# names of organelles we have
organelle_names = ["nuc","lyso", "mito","golgi","perox","ER","LD"]

# get_methods  = [get_nuclei,
#                 get_lyso,
#                 get_mito,
#                 get_golgi,
#                 get_perox,
#                 get_ER,
#                 get_LD]

# load all the organelle segmentations
# organelles = [meth(img_data,meta_dict, out_data_path) for meth in get_methods]
segmentations = [nuc_seg,
                lyso_seg,
                mito_seg,
                golgi_seg,
                perox_seg,
                ER_seg,
                LD_seg]

# get the intensities
organelle_channels = [NUC_CH, LYSO_CH,MITO_CH,GOLGI_CH,PEROX_CH,ER_CH,LD_CH]

intensities = [img_data[ch] for ch in organelle_channels]

-----------------
## ORGANELLE CONTACTS

The region of overlap between each pair of organelles will be considered the "organelle contact". This can also be done with higher order contacts as well (i.e., three-way, four-way, etc.), but that is not yet implemented here.

> NOTE: Since biological contact sites are smaller than our resolution limit for confocal microscopy, the region of overlap is only an estimation of contacts.
>> It may be helpful to dilate a single organelle (the organelle of interest) before determining the overlap region. This will include any touching, but not overlapping interactions and may improve our ability to detect differences in this subresolution system.

#### 1. Build the list of measurements we want to include from regionprops

In [None]:
# start with LABEL
Xproperties = ["label"]

# add position
Xproperties = Xproperties + ["centroid", "bbox"]

# add area
Xproperties = Xproperties + ["area", "equivalent_diameter"] # "num_pixels", 

# add shape measurements
Xproperties = Xproperties + ["extent", "feret_diameter_max", "euler_number", "convex_area", "solidity", "axis_major_length", "axis_minor_length", "slice"]

Xproperties

#### 2. Create region of overlap between organelle 'a' and organelle 'b'

In [None]:
a = _assert_uint16_labels(lyso_seg)
b = _assert_uint16_labels(mito_seg)

a_int_b = np.logical_and(a > 0, b > 0)

labels = label(apply_mask(a_int_b, cell_seg)).astype("int")

#### 3. Create region of overlap using a "shell" or similar to the membrane of organelle 'a'

In [None]:
a_shell_int_b = np.logical_and(np.logical_xor(a > 0, binary_erosion(a > 0)), b > 0)

shell_labels = label(apply_mask(a_shell_int_b, cell_seg)).astype("int")

In [None]:
props = regionprops_table(labels, intensity_image=None, extra_properties=None)

surface_area_tab = pd.DataFrame(surface_area_from_props(labels, props))

In [None]:
label_a = []
index_ab = []
label_b = []
for index, lab in enumerate(props["label"]):
    # this seems less elegant than you might wish, given that regionprops returns a slice,
    # but we need to expand the slice out by one voxel in each direction, or surface area freaks out
    volume = labels[props["slice"][index]]
    la = a[props["slice"][index]]
    lb = b[props["slice"][index]]
    volume = volume == lab
    la = la[volume]
    lb = lb[volume]

    all_as = np.unique(la[la>0]).tolist()
    all_bs = np.unique(lb[lb>0]).tolist()
    if len(all_as) != 1:
        print(f"we have an error.  as-> {all_as}")
    if len(all_bs) != 1:
        print(f"we have an error.  bs-> {all_bs}")

    label_a.append(all_as[0] )
    label_b.append(all_bs[0] )
    index_ab.append(f"{all_as[0]}_{all_bs[0]}")

In [None]:
props["label_A"] = label_a ## TODO: FIND A WAY TO INSERT ACTUAL ORGANELLE NAME, NOT "a" OR "b"
props["label_b"] = label_b
props_table = pd.DataFrame(props)
props_table.insert(11, "surface_area", surface_area_tab)
props_table.rename(columns={"area": "volume"}, inplace=True)
props_table.drop(columns="slice", inplace=True)
props_table.insert(loc=0,column='label_',value=index_ab)
props_table.insert(loc=0,column='shell',value=use_shell_a)

#### 4. Define `get_aXb_measurements_3D()` function

Based on the _prototyping_ above define the function to quantify amount, size, and shape of organelles.

In [None]:
def _get_aXb_measurements_3D(a, b, mask, use_shell_a=False):
    """
    collect volumentric measurements of organelle `a` intersect organelle `b`

    Parameters
    ------------
    a:
        a 3D np.ndarray image of one segemented organelle
    b:
        a 3D np.ndarray image of a second segemented organelle
    mask:
        a 3d np.ndarray image of the cell mask (or other mask of choice); used to create a "single cell" analysis
    use_shell_a:
        creates a "shell" of organelle a to simulate just the membrane area of the organelle and the performs the overlaps; all the same measurements are carried out of the shell region

    Returns
    -------------
    pandas dataframe of containing regionprops measurements (columns) for each overlap region between a and b (rows)

    Regionprops measurements included:
    ['label',
    'centroid',
    'bbox',
    'area',
    'equivalent_diameter',
    'extent',
    'feret_diameter_max',
    'euler_number',
    'convex_area',
    'solidity',
    'axis_major_length',
    'axis_minor_length']

    Additional measurement include:
    ['surface_area']
 
    """

    ##########################################
    ## CREATE LIST OF REGIONPROPS MEASUREMENTS
    ##########################################
    # start with LABEL
    properties = ["label"]

    # add position
    properties = properties + ["centroid", "bbox"]

    # add area
    properties = properties + ["area", "equivalent_diameter"] # "num_pixels", 

    # add shape measurements
    properties = properties + ["extent", "feret_diameter_max", "euler_number", "convex_area", "solidity", "axis_major_length", "axis_minor_length", "slice"]

    #########################
    ## CREATE OVERLAP REGIONS
    #########################
    a = _assert_uint16_labels(a)
    b = _assert_uint16_labels(b)

    if use_shell_a:
        a_int_b = np.logical_and(np.logical_xor(a > 0, binary_erosion(a > 0)), b > 0)
    else:
        a_int_b = np.logical_and(a > 0, b > 0)

    labels = label(apply_mask(a_int_b, mask)).astype("int")

    ##################
    ## RUN REGIONPROPS
    ##################
    props = regionprops_table(labels, intensity_image=None, extra_properties=None)

    ##################################################################
    ## RUN SURFACE AREA FUNCTION SEPARATELY AND APPEND THE PROPS_TABLE
    ##################################################################
    surface_area_tab = pd.DataFrame(surface_area_from_props(labels, props))

    ######################################################
    ## LIST WHICH ORGANELLES ARE INVOLVED IN THE CONTACTS
    ######################################################
    label_a = []
    index_ab = []
    label_b = []
    for index, lab in enumerate(props["label"]):
        # this seems less elegant than you might wish, given that regionprops returns a slice,
        # but we need to expand the slice out by one voxel in each direction, or surface area freaks out
        volume = labels[props["slice"][index]]
        la = a[props["slice"][index]]
        lb = b[props["slice"][index]]
        volume = volume == lab
        la = la[volume]
        lb = lb[volume]

        all_as = np.unique(la[la>0]).tolist()
        all_bs = np.unique(lb[lb>0]).tolist()
        if len(all_as) != 1:
            print(f"we have an error.  as-> {all_as}")
        if len(all_bs) != 1:
            print(f"we have an error.  bs-> {all_bs}")

        label_a.append(all_as[0] )
        label_b.append(all_bs[0] )
        index_ab.append(f"{all_as[0]}_{all_bs[0]}")


    props["label_A"] = label_a ## TODO: FIND A WAY TO INSERT ACTUAL ORGANELLE NAME, NOT "a" OR "b"
    props["label_b"] = label_b
    props_table = pd.DataFrame(props)
    props_table.insert(11, "surface_area", surface_area_tab)
    props_table.rename(columns={"area": "volume"}, inplace=True)
    props_table.drop(columns="slice", inplace=True)
    props_table.insert(loc=0,column='label_',value=index_ab)
    props_table.insert(loc=0,column='shell',value=use_shell_a)

    return props_table

In [None]:


#mito
b = _assert_uint16_labels(organelles[2])
nmi = organelle_names[2]

cross_tab = get_aXb_stats_3D(org_obj, b, organelle_mask) 
shell_cross_tab = get_aXb_stats_3D(org_obj, b, organelle_mask, use_shell_a=True)

merged_tab = pd.concat([cross_tab,shell_cross_tab])
merged_tab.insert(loc=0,column='organelle_b',value=nmi )


-----------------
## DISTRIBUTION  


### Radial distribution 

### 2D projection of inferred objects (and masks, florescence image)

Segment image in 3D;
sum projection of binary image; 
create 5 concentric rings going from the edge of the nuclie to the edge of the cellmask (ideally these will be morphed to cellmask/nuclei shape as done in CellProfiler); 
measure intensity per ring (include nuclei as the center area to measure from)/ring area; 
the normalized measurement will act as a frequency distribution of that organelle starting from the nuclei bin going out to the cell membrane - 
Measurements needed: mean, median, and standard deviation of the frequency will be calculated

- pre-processing
  1. Make 2D sum projection of binary segmentation
  2. Create 5 (default) bins linearly between edge of the nuclei to the edge of the cellmask - these are somewhat like rings morphed to the shape of the nuclei and cellmask, or more accurately like terrain lines of the normalized radial distance beween teh edge of the nuclei and the edge of the cellmask.
  3. Use nucleus + concentric rings to mask the 2D sum project into radial distribution regions: nuclei = bin 1, ... largest/outter most ring = bin 6. See similar concept in CellProfiler: https://cellprofiler-manual.s3.amazonaws.com/CellProfiler-4.2.5/modules/measurement.html?highlight=distribution#module-cellprofiler.modules.measureobjectintensitydistribution"	
   


The logic was borrowed from CellProfiler, but alorithm somewhate simplified by making assumpitions of doing all estimates over a single cellmask (single cell).   Most of the code should be capable of performing the more complicated multi-object versions as CellProfiler does.  Although this functionality is untested the source code was left in this more complex format in case it might be updated for this functionality in the future.




## Zernicky distributions...
- get the magnitude and phase for the zernike 
- he Zernike features characterize the distribution of intensity across the object. For instance, Zernike 1,1 has a high value if the intensity is low on one side of the object and high on the other. The zernike magnitudes feature records the rotationally invariant degree magnitude of the moment and the zernike phase feature gives the moment’s orientation

`zernike_degree` (default = 9) chooses how many moments to calculate.


The logic was borrowed from CellProfiler, but alorithm greatly simplified by making assumpitions of doing all estimates over a single cellmask (single cell)

In [None]:
# csv_path = out_data_path / f"{o}_{meta_dict["file_name"].split('/')[-1].split('.')[0]}_stats.csv"
Path(meta_dict['file_name']).name

'ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed.czi'

In [None]:
organelles[0].shape, organelle_names[0]

((15, 768, 768), 'nuclei')

In [15]:
test_org = 1

# args 
cellmask_obj
nuclei_obj
organelle_mask = cyto_mask
organelle_name = organelle_names[test_org]
org_obj = organelles[test_org]
org_img = intensities[test_org]

n_rad_bins = 5
n_zernike = 9

target = organelle_name

In [16]:
        # now get radial stats
rad_stats,z_stats, bin_index = get_radial_stats(        
                                cellmask_obj,
                                organelle_mask,
                                org_obj,
                                org_img,
                                target,
                                nuclei_obj,
                                n_rad_bins,
                                n_zernike
                                )


In [None]:
viewer = napari.view_image(bin_idx)


### depth - summary
Segment image in 3D;
measure area fraction of each organelle per Z slice;
these measurements will act as a frequency distribution of that organelle starting from the bottom of the cellmask (not including neurites) to the top of the cellmask;
measurements: mean, median, and standard deviation of the frequency distribution	

- pre-processing
  1. subtract nuclei from the cellmask --> cellmask cytoplasm
  2. mask organelle channels with cellmask cytoplasm mask

- per-object measurements
  - For each Z slice in the masked binary image measure:
    1. organelle area
    2. cellmask cytoplasm area

- per-object calculations
  - For each Z slice in the masked binary image: organelle area / cellmask cytoplasm area

- per cell summary
  1. create a frequency table with the z slice number on the x axis and the area fraction on the y axis
  2. Measure the frequency distribution's mean, median, and standard deviation for each cell"

In [None]:
viewer.add_image(cellmask_obj>0)

<Image layer 'Image' at 0x2bf465db0>

In [106]:

d_stats = get_depth_stats(        
                cellmask_obj,
                organelle_mask,
                org_obj,
                org_img,
                target,
                nuclei_obj
                )
  

# putting it all together

`make_organelle_stat_tables` prototype

In [13]:

from infer_subc.utils.stats import _assert_uint16_labels

organelle_to_colname = {"nuc":"NU", "lyso": "LY", "mito":"MT", "golgi":"GL", "perox":"PR", "ER":"ER", "LD":"LD", "cell":"CM", "cyto":"CY", "nucleus": "N1","nuclei":"NU",}

def _make_organelle_stat_tables(
    organelle_names: List[str],
    organelles: List[np.ndarray],
    intensities: List[np.ndarray],
    nuclei_obj:np.ndarray, 
    cellmask_obj:np.ndarray,
    organelle_mask: np.ndarray, 
    out_data_path: Path, 
    source_file: str,
    n_rad_bins: Union[int,None] = None,
    n_zernike: Union[int,None] = None,
) -> int:
    """
    get summary and all cross stats between organelles `a` and `b`
    calls `get_summary_stats_3D`
    """
    count = 0
    org_stats_tabs = []
    for j, target in enumerate(organelle_names):
        org_img = intensities[j]        
        org_obj = _assert_uint16_labels(organelles[j])

        # A_stats_tab, rp = get_simple_stats_3D(A,mask)
        a_stats_tab, rp = get_summary_stats_3D(org_obj, org_img, organelle_mask)
        a_stats_tab.insert(loc=0,column='organelle',value=target )
        a_stats_tab.insert(loc=0,column='ID',value=source_file.stem )

        # add the touches for all other organelles
        # loop over Bs
        merged_tabs = []
        for i, nmi in enumerate(organelle_names):
            if i != j:
                # get overall stats of intersection
                # print(f"  b = {nmi}")
                count += 1
                # add the list of touches
                b = _assert_uint16_labels(organelles[i])

                ov = []
                b_labs = []
                labs = []
                for idx, lab in enumerate(a_stats_tab["label"]):  # loop over A_objects
                    xyz = tuple(rp[idx].coords.T)
                    cmp_org = b[xyz]
                    
                    # total number of overlapping pixels
                    overlap = sum(cmp_org > 0)
                    # overlap?
                    labs_b = cmp_org[cmp_org > 0]
                    b_js = np.unique(labs_b).tolist()

                    # if overlap > 0:
                    labs.append(lab)
                    ov.append(overlap)
                    b_labs.append(b_js)

                cname = organelle_to_colname[nmi]
                # add organelle B columns to A_stats_tab
                a_stats_tab[f"{cname}_overlap"] = ov
                a_stats_tab[f"{cname}_labels"] = b_labs  # might want to make this easier for parsing later

                #####  2  ###########
                # get cross_stats

                cross_tab = get_aXb_stats_3D(org_obj, b, organelle_mask) 
                shell_cross_tab = get_aXb_stats_3D(org_obj, b, organelle_mask, use_shell_a=True)
                            
                # cross_tab["organelle_b"]=nmi
                # shell_cross_tab["organelle_b"]=nmi
                #  Merge cross_tabs and shell_cross_tabs 
                # merged_tab = pd.merge(cross_tab,shell_cross_tab, on="label_")
                merged_tab = pd.concat([cross_tab,shell_cross_tab])
                merged_tab.insert(loc=0,column='organelle_b',value=nmi )

                merged_tabs.append( merged_tab )


        #  Now append the 
        # csv_path = out_data_path / f"{source_file.stem}-{target}_shellX{nmi}-stats.csv"
        # e_stats_tab.to_csv(csv_path)
        # stack these tables for each organelle
        crossed_tab = pd.concat(merged_tabs)
        # csv_path = out_data_path / f"{source_file.stem}-{target}X{nmi}-stats.csv"
        # stats_tab.to_csv(csv_path)
        crossed_tab.insert(loc=0,column='organelle',value=target )
        crossed_tab.insert(loc=0,column='ID',value=source_file.stem )

        # now get radial stats
        rad_stats,z_stats, _ = get_radial_stats(        
                cellmask_obj,
                organelle_mask,
                org_obj,
                org_img,
                target,
                nuclei_obj,
                n_rad_bins,
                n_zernike
                )

        d_stats = get_depth_stats(        
                cellmask_obj,
                organelle_mask,
                org_obj,
                org_img,
                target,
                nuclei_obj
                )
      
        proj_stats = pd.merge(rad_stats, z_stats,on=["organelle","mask"])
        proj_stats = pd.merge(proj_stats, d_stats,on=["organelle","mask"])
        proj_stats.insert(loc=0,column='ID',value=source_file.stem )

        # write out files... 
        # org_stats_tabs.append(A_stats_tab)
        csv_path = out_data_path / f"{source_file.stem}-{target}-stats.csv"
        a_stats_tab.to_csv(csv_path)

        csv_path = out_data_path / f"{source_file.stem}-{target}-cross-stats.csv"
        crossed_tab.to_csv(csv_path)

        csv_path = out_data_path / f"{source_file.stem}-{target}-proj-stats.csv"
        proj_stats.to_csv(csv_path)

        count += 1

    print(f"dumped {count}x3 organelle stats ({organelle_names}) csvs")
    return count

In [107]:
n_rad_bins = 5
n_zernike = 9



from infer_subc.utils.stats_helpers import make_organelle_stat_tables

In [108]:
file_count = make_organelle_stat_tables(organelle_names, 
                                      organelles,
                                      intensities, 
                                      nuclei_obj,
                                      cellmask_obj,
                                      cyto_mask, 
                                      out_data_path, 
                                      source_file,
                                      n_rad_bins=5,
                                      n_zernike=9)



dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


In [109]:
target = organelle_names[1]

csv_path = out_data_path / f"{source_file.stem}-{target}-stats.csv"

mito_table = pd.read_csv(csv_path)
mito_table.head()

Unnamed: 0.1,Unnamed: 0,ID,organelle,label,max_intensity,mean_intensity,min_intensity,volume,equivalent_diameter,centroid-0,...,MT_overlap,MT_labels,GL_overlap,GL_labels,PR_overlap,PR_labels,ER_overlap,ER_labels,LD_overlap,LD_labels
0,0,ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed,lyso,2,65535,16752.953982,0,30836,38.906289,6.450642,...,2346,"[1, 2]",2019,[1],77,"[3, 5, 6, 15, 17, 20, 21]",3715,[1],0,[]
1,1,ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed,lyso,5,21639,10853.279793,2242,1544,14.340013,2.794041,...,0,[],0,[],0,[],136,[1],0,[]
2,2,ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed,lyso,6,5773,2776.759036,480,83,5.412025,1.445783,...,0,[],0,[],0,[],0,[],0,[]
3,3,ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed,lyso,8,7080,3002.28125,0,128,6.252741,1.65625,...,0,[],0,[],0,[],0,[],0,[]
4,4,ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed,lyso,9,3976,2270.15,863,20,3.367781,1.0,...,0,[],0,[],0,[],3,[1],0,[]


In [110]:
mito_table.volume.mean()

518.016