# PUTATIVE WORKFLOW


## WORKFLOW EDITOR PLUGIN
- FINE-TUNE SEGMENTATIONS
  - export workflow.jsons
    - masks:
      - nuclei
      - cellmask
      - cytoplasm
    - organelles:
      - lyso
      - mito
      - golgi
      - perox
      - ER
      - LD


## BATCHPROCESS WORKFLOW
- BATCH PROCESS
  - load workflow.jsons for: 
  1. masks
    - export: masks .tiff as stack (nuclei, cellmask, cytoplasm)
  2. organelles
    - export individual .tiffs



## NOTEBOOK ~~OR ***FUTURE*** PLUGIN~~
- COLLECT ORGANELLE STATS
  - extract masks.tiffs as individual
    - nuclei, cellmask, cytoplasm
  - collect regionprops for all organelles
    - export .csvs


## NOTEBOOK ~~OR __FUTURE__ PLUGIN~~
- SUMMARIZE STUDY DATA
  - munge .csv to create summary stats across all cells/images




_____________

## TO DO
- add "segmentation name" field instead of copying from workflow.json name


- choose alternate conf_XXX.json location. 
  - strategy:  add to "prebuilt" list from path


  
  ## FILE NAME CONVENTIONS

  raw file name is kept.

  PREFIX = "segmentation name" or regionprop name.  e.g. 
  SUFFIX = "description" i.e. 

In [1]:
# top level imports
from pathlib import Path
import os, sys
from typing import Optional, Union, Dict, List

import numpy as np
import pandas as pd

import napari

### import local python functions in ../infer_subc
sys.path.append(os.path.abspath((os.path.join(os.getcwd(), '..'))))

from infer_subc.core.file_io import (read_czi_image,
                                        export_inferred_organelle,
                                        import_inferred_organelle,
                                        export_tiff,
                                        list_image_files)



from infer_subc.constants import *
from infer_subc.utils.stats import *
from infer_subc.utils.stats_helpers import *



import time
%load_ext autoreload
%autoreload 2



In [2]:
# this will be the example image for testing the pipeline below
# build the datapath
# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
int_data_path = data_root_path / "raw"
im_type = ".czi"

# get the list of all files
img_file_list = list_image_files(int_data_path,im_type)

# save output ".tiff" files here
out_data_path = data_root_path / "out"

if not Path.exists(out_data_path):
    Path.mkdir(out_data_path)
    print(f"making {out_data_path}")

In [3]:
im_path = Path(img_file_list[0])
im_path

PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi')

## 1. get each unique cells accouding to filename


### extract ID. e.g.

### process each cell & summarize



## 2. aggregate all cells into a database 

In [4]:
full_name = im_path.name

cell_ids = [ Path(fn).stem.split("-")[0] for fn in img_file_list]
cell_ids = list(set(cell_ids))

masks_postfix = "masks2"
organelle_postfix = ["lyso", "mito","golgi","perox","ER","LD"]


In [5]:
# MASK process
# 1. get a listof all files based on a "prefix" and "suffix" for a given path
# dump three .tiff from teh mask multichannel tiff
# from tifffile import imwrite, imread#, tiffcomment
from infer_subc.core.img import label_uint16
from infer_subc.core.file_io import export_tiff, read_tiff_image
from typing import Union
from pathlib import Path

def _explode_mask(mask_path: Union[Path,str], postfix: str= "masks", im_type: str = ".tiff") -> bool:
    """ 
    TODO: add loggin instead of printing
        append tiffcomments with provenance
    """
    if isinstance(mask_path, str): mask_path = Path(mask_path)
    # load image 
    full_stem = mask_path.stem
    if full_stem.endswith(postfix):
        stem = full_stem.rstrip(postfix)
        image = read_tiff_image(mask_path)
        assert image.shape[0]==3
        
        # make into np.uint16 labels
        nuclei = label_uint16(image[0])
        # export as np.uint8 (255)
        cellmask = image[1]>0            
        cytoplasm = image[2]>0

        # write wasks
        root_stem = mask_path.parent / stem
        # ret1 = imwrite(f"{root}nuclei{stem}", nuclei)
        ret1 = export_tiff(nuclei, f"{stem}nuc", mask_path.parent, None)
        # ret2 = imwrite(f"{root}cellmask{stem}", cellmask)
        ret2 = export_tiff(cellmask, f"{stem}cell", mask_path.parent, None)
        # ret3 = imwrite(f"{root}cytosol{stem}", cytosol)
        ret3 = export_tiff(cytoplasm, f"{stem}cyto", mask_path.parent, None)

        print(f"wrote {stem}-{{nuc,cell,cyto}}")
        return True
    else:
        return False



def _explode_masks(root_path: Union[Path,str], postfix: str= "masks", im_type: str = ".tiff"):
    """  
    TODO: add loggin instead of printing
        append tiffcomments with provenance
    """
    if isinstance(root_path, str): root_path = Path(root_path)
    img_file_list = list_image_files(root_path,im_type, postfix)
    wrote_cnt = 0
    for img_f in img_file_list:
        if _explode_mask(img_f, postfix=postfix, im_type=im_type): wrote_cnt += 1
        else: print(f"failed to explode {img_f}")
    else:
        print(f"how thefark!!! {img_f}")

    print(f"exploded {wrote_cnt*100./len(img_file_list)} pct of {len(img_file_list)} files")
    return wrote_cnt



In [8]:
from infer_subc.utils.batch import explode_masks

cnt = explode_masks(out_data_path, postfix='masks2')
cnt

exploded 100.0 pct of 35 files


35

In [6]:


# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"
# linearly unmixed ".czi" files are here
raw_data_path = data_root_path / "raw"
# save output ".tiff" files here
int_data_path = data_root_path / "out"
# save stats here
out_data_path = data_root_path / "out"


In [7]:
raw_path = raw_data_path
int_path = int_data_path
out_path = out_data_path


if isinstance(raw_path, str): raw_path = Path(raw_path)
if isinstance(int_path, str): int_path = Path(int_path)
if isinstance(out_path, str): out_path = Path(out_path)

img_file_list = list_image_files(raw_path,".czi")

if not Path.exists(out_path):
    Path.mkdir(out_path)
    print(f"making {out_path}")



In [9]:
img_file_list

[PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N15_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N16_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N17_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N18_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N20_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N21_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N22_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N23_Unmixed.czi'),
 PosixPath

In [10]:
from typing import Optional, Union, Dict, List

def _find_segmentation_tiff_files(prototype:Union[Path,str], organelles: List[str], int_path: Union[Path,str]) -> Dict:
    """
    find the nescessary image files based on protype, the organelles involved, and paths
    """

    # raw
    prototype = Path(prototype)
    if not prototype.exists():
        print(f"bad prototype. please choose an existing `raw` file as prototype")
        return dict()
    # make sure protoype ends with czi

    out_files = {"raw":prototype}

    int_path = Path(int_path) 
    # raw
    if not int_path.is_dir():
        print(f"bad path argument. please choose an existing path containing organelle segmentations")
        return out_files
    
    # cyto, cellmask
    cyto_nm = int_path / f"{prototype.stem}-cyto.tiff"
    if cyto_nm.exists():
        out_files["cyto"] = cyto_nm
    else:
        print(f"cytosol mask not found.  We'll try to extract from masks ")
        if explode_mask(int_path / f"{prototype.stem}-masks.tiff"): 
            out_files["cyto"] = cyto_nm
        else: 
            print(f"failed to explode {prototype.stem}-masks.tiff")
            return out_files
    
    cellmask_nm = int_path / f"{prototype.stem}-cellmask.tiff"
    if  cellmask_nm.exists():
        out_files["cellmask"] = cellmask_nm
    else:
        print(f"cellmask file not found in {int_path} returning")
        out_files["cellmask"] = None

    # organelles
    for org_n in organelles:
        org_name = Path(int_path) / f"{prototype.stem}-{org_n}.tiff"
        if org_name.exists(): 
            out_files[org_n] = org_name
        else: 
            print(f"{org_n} .tiff file not found in {int_path} returning")
            out_files[org_n] = None
    
    return out_files

    


In [11]:
from infer_subc.utils.batch import find_segmentation_tiff_files
prototype = '/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'
organelles = ["nuc","lyso", "mito","golgi","perox","ER","LD"]

filez = find_segmentation_tiff_files(prototype, organelles, out_data_path)



In [12]:
filez

{'raw': PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'),
 'cyto': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-cyto.tiff'),
 'cell': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-cell.tiff'),
 'nuc': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-nuc.tiff'),
 'lyso': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-lyso.tiff'),
 'mito': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-mito.tiff'),
 'golgi': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-golgi.tiff'),
 'perox': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-perox.tiff'),
 'ER': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-ER.

In [13]:
from infer_subc.utils.stats_helpers import make_organelle_stat_tables, dump_all_stats_tables
from infer_subc.constants import *
from infer_subc.organelles import *
from infer_subc.core.file_io import read_tiff_image, read_czi_image

# names of organelles we have
organelle_names = ["nuc","lyso", "mito","golgi","perox","ER","LD"]

# get the intensities
organelle_channels = [NUC_CH, LYSO_CH,MITO_CH,GOLGI_CH,PEROX_CH,ER_CH,LD_CH]



In [14]:
# for a list of "prefixes"  collect stats + cross stats masked by cytosol (including nuclei masked by cellmask)

def _dump_all_stats_tables(int_path: Union[Path,str], 
                   out_path: Union[Path, str], 
                   raw_path: Union[Path,str], 
                   organelle_names: List[str]= ["nuclei","golgi","peroxi"], 
                   organelle_chs: List[int]= [NUC_CH,GOLGI_CH, PEROX_CH], 
                    ) -> int :
    """  
    TODO: add loggin instead of printing
        append tiffcomments with provenance
    """

    
    if isinstance(raw_path, str): raw_path = Path(raw_path)
    if isinstance(int_path, str): int_path = Path(int_path)
    if isinstance(out_path, str): out_path = Path(out_path)
    
    img_file_list = list_image_files(raw_path,".czi")

    if not Path.exists(out_path):
        Path.mkdir(out_path)
        print(f"making {out_path}")
        
    for img_f in img_file_list:
        filez = find_segmentation_tiff_files(img_f, organelle_names, int_path)
        img_data,meta_dict = read_czi_image(filez["raw"])

        # load organelles and masks
        cyto_mask = read_tiff_image(filez["cyto"])
        cellmask_obj = read_tiff_image(filez["cell"])



        # create intensities from raw as list
        intensities = [img_data[ch] for ch in organelle_chs]

        # load organelles as list
        organelles = [read_tiff_image(filez[org]) for org in organelle_names]
        
        #get mask (cyto_mask)
        nuclei_obj = organelles[ organelle_names.index("nuc") ]

        n_files = make_organelle_stat_tables(organelle_names, 
                                      organelles,
                                      intensities, 
                                      nuclei_obj,
                                      cellmask_obj,
                                      cyto_mask, 
                                      out_data_path, 
                                      img_f,
                                      n_rad_bins=5,
                                      n_zernike=9)

    return n_files



In [15]:

# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"
# linearly unmixed ".czi" files are here
raw_data_path = data_root_path / "raw"
# save output ".tiff" files here
int_data_path = data_root_path / "out"
# save stats here
out_data_path = data_root_path / "out"

n_files = dump_all_stats_tables(out_data_path, 
                     out_data_path, 
                     raw_data_path, 
                     organelle_names=organelle_names, 
                     organelle_chs=organelle_channels)

n_files

  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  cv_cmsk.append(float(np.mean(radial_cv_cmsk)))  #convert to float to make importing from csv more straightforward
  cv_obj.append(float(np.mean(radial_cv_obj)))
  cv_img.append(float(np.mean(radial_cv_obj)))
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  cv_cmsk.append(float(np.mean(radial_cv_cmsk)))  #convert to float to make importing from csv more straightforward
  cv_obj.append(float(np.mean(radial_cv_obj)))
  cv_img.append(float(np.mean(radial_cv_obj)))
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum(

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_mea

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  magnitude 

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_mea

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_mea

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_mea

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


49

# summary statistics

We now need to merge our files


-----------------
##  SUMMARY STATS  
> WARNING: (🚨🚨🚨🚨 WIP)
### normalizations.

- overlaps, normalized by CYTOPLASM, A, and B
- per cell averages, medians, std, and totals

These is all pandas munging and very straightforward tabular manipulation.


In [16]:

data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
int_data_path = data_root_path / "out"


In [17]:
# for a list of "prefixes"  collect stats + cross stats masked by cytosol (including nuclei masked by cellmask)

def _summarize_organelle_stats(int_path: Union[Path,str], 
                              organelle_names: List[str]= ["nuclei","golgi","peroxi"]):
    """  
    """
    # write out files... 

    if isinstance(int_path, str): int_path = Path(int_path)

    if not Path.exists(out_path):
        Path.mkdir(out_path)
        print(f"making {out_path}")

    all_stats_df = pd.DataFrame()
    all_cross_stats_df = pd.DataFrame()
    all_proj_stats_df = pd.DataFrame()
    
    for target in organelle_names:
        stat_file_list = sorted( int_path.glob(f"*{target}-stats.csv") )

        stats_df = pd.DataFrame()
        cross_stats_df = pd.DataFrame()
        proj_stats_df = pd.DataFrame()

        for stats_f in stat_file_list:
            stem = stats_f.stem.split("-")[0]
            # stats load the csv
            stats = load_stats_csv(out_path,stem, target)
            # projection stats
            proj = load_proj_stats_csv(out_path,stem, target)
            # cross stats
            cross = load_cross_stats_csv(out_path,stem, target)

            stats_df = pd.concat([stats_df,stats],axis=0, join='outer')
            proj_stats_df = pd.concat([proj_stats_df,proj],axis=0, join='outer')
            cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
        

        ## maybe merge into all the possible files?
        # summary_df = pd.DataFrame(index=[f.stem.split("-")[0] for f in stat_file_list])
        # cross_stats_df = pd.DataFrame(index=[f.stem.split("-")[0] for f in stat_file_list])
        # proj_stats_df = pd.DataFrame(index=[f.stem.split("-")[0] for f in stat_file_list])

        summary_df = create_stats_summary(stats_df)
        summary_df.insert(loc=1,column="organelle",value=target)
        cross_summary_df = summarize_cross_stats(cross_stats_df)
        ## cross_summary_df = pivot_cross_stats(cross_stats_df)  #makes a wide version... but has a bug
        cross_summary_df.insert(loc=1,column="organelle",value=target)

        all_stats_df = pd.concat([all_stats_df,summary_df],axis=0)
        all_proj_stats_df = pd.concat([all_proj_stats_df,proj_stats_df],axis=0)
        all_cross_stats_df = pd.concat([all_cross_stats_df,cross_summary_df],axis=0)
    

    return all_stats_df, all_proj_stats_df, all_cross_stats_df
        



In [18]:
from infer_subc.utils.stats_helpers import summarize_organelle_stats, dump_organelle_summary_tables

# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"
# linearly unmixed ".czi" files are here
raw_data_path = data_root_path / "raw"
# save output ".tiff" files here could be different than out
int_data_path = data_root_path / "out"
# save stats here
out_data_path = data_root_path / "out"


all_stats_df, all_proj_stats_df, all_cross_stats_df = summarize_organelle_stats( int_path, organelle_names )


  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

In [20]:
def _dump_organelle_summary_tables(
                    int_path: Union[Path,str], 
                    out_path: Union[Path, str], 
                    organelle_names: List[str]= ["nuclei","golgi","peroxi"] ) -> int:
    """
    get summary and all cross stats between organelles `a` and `b`
    calls `get_summary_stats_3D`
    """

    if not Path.exists(out_path):
        Path.mkdir(out_path)
        print(f"making {out_path}")


    all_stats_df, all_proj_stats_df, all_cross_stats_df = summarize_organelle_stats( int_path, organelle_names )

    csv_path = out_path / f"summary-stats.csv"
    all_stats_df.to_csv(csv_path)

    csv_path = out_path / f"summary-proj-stats.csv"
    all_proj_stats_df.to_csv(csv_path)

    csv_path = out_path / f"summary-cross-stats.csv"
    all_cross_stats_df.to_csv(csv_path)

    return 1



In [21]:
n_files = dump_organelle_summary_tables(out_data_path, 
                     out_data_path, 
                     organelle_names)

n_files

  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

1

Make some wrappers to deal with reading our summary stats into pandas properly.


In [None]:


def load_summary_stats_csv(in_path: Path) -> pd.DataFrame:
    """ helper to load the summary stats csv: summary-stats.csv
    returns pandas DataFrame """
    csv_path = in_path / f"summary-stats.csv"
    summary_df = pd.read_csv(csv_path, index_col=0)
    # need to convert columns *_labels
    list_cols = [col for col in summary_df.columns if "labels" in col] #if col.contains("label")
    summary_df = fix_int_list_cols(summary_df,list_cols)
    return summary_df


def load_summary_proj_stats_csv(in_path: Path) -> pd.DataFrame:
    """ helper to load summary projection stats csv: summary-proj-stats.csv
    returns pandas DataFrame """
    obj_cols =  ['ID', 'organelle','mask','radial_n_bins','n_z']  # leave alone
    str_cols = [ 'radial_bins']
    int_cols = ['radial_cm_vox_cnt', 'radial_org_vox_cnt', 'radial_org_intensity', 'radial_n_pix','zernike_n', 'zernike_m', 'z','z_cm_vox_cnt','z_org_vox_cnt', 'z_org_intensity', 'z_nuc_vox_cnt']
    float_cols = ['radial_cm_cv', 'radial_org_cv', 'radial_img_cv','zernike_cm_mag', 'zernike_cm_phs','zernike_obj_mag', 'zernike_obj_phs', 'zernike_nuc_mag','zernike_nuc_phs', 'zernike_img_mag']

    csv_path = in_path / f"summary-proj-stats.csv"
    proj = pd.read_csv(csv_path, index_col=0)
    proj = fix_str_list_cols(proj, str_cols)
    proj = fix_int_list_cols(proj, int_cols)
    proj = fix_float_list_cols(proj, float_cols)
    return proj
        

def load_summary_cross_stats_csv(in_path: Path) -> pd.DataFrame:
    """ helper to load summary cross- stats csv: summary-cross-stats.csv
    returns pandas DataFrame """

    csv_path = in_path / f"summary-cross-stats.csv"
    summary_df = pd.read_csv(csv_path, index_col=0)

    list_cols = [col for col in summary_df.columns if "label" in col] #if col.contains("label")
    str_list_cols = [col for col in list_cols if "__" in col]
    int_list_cols = [col for col in list_cols if "__" not in col]

    summary_df = fix_str_list_cols(summary_df,str_list_cols)
    summary_df = fix_int_list_cols(summary_df,int_list_cols)

    return summary_df
    


In [22]:

#summary_shell.head()
test = load_summary_stats_csv(out_data_path)
test_proj = load_summary_proj_stats_csv(out_data_path)
test_cross = load_summary_cross_stats_csv(out_data_path)