# PUTATIVE WORKFLOW


## WORKFLOW EDITOR PLUGIN
- FINE-TUNE SEGMENTATIONS
  - export workflow.jsons
    - masks:
      - nuclei
      - cellmask
      - cytoplasm
    - organelles:
      - lyso
      - mito
      - golgi
      - perox
      - ER
      - LD


## BATCHPROCESS WORKFLOW
- BATCH PROCESS
  - load workflow.jsons for: 
  1. masks
    - export: masks .tiff as stack (nuclei, cellmask, cytoplasm)
  2. organelles
    - export individual .tiffs



## NOTEBOOK ~~OR ***FUTURE*** PLUGIN~~
- COLLECT ORGANELLE STATS
  - extract masks.tiffs as individual
    - nuclei, cellmask, cytoplasm
  - collect regionprops for all organelles
    - export .csvs


## NOTEBOOK ~~OR __FUTURE__ PLUGIN~~
- SUMMARIZE STUDY DATA
  - munge .csv to create summary stats across all cells/images




_____________

## TO DO
- add "segmentation name" field instead of copying from workflow.json name


- choose alternate conf_XXX.json location. 
  - strategy:  add to "prebuilt" list from path


  
  ## FILE NAME CONVENTIONS

  raw file name is kept.

  PREFIX = "segmentation name" or regionprop name.  e.g. 
  SUFFIX = "description" i.e. 

In [1]:
# top level imports
from pathlib import Path
import os, sys
from typing import Optional, Union, Dict, List

import numpy as np
import pandas as pd

import napari

### import local python functions in ../infer_subc
sys.path.append(os.path.abspath((os.path.join(os.getcwd(), '..'))))

from infer_subc.core.file_io import (read_czi_image,
                                        export_inferred_organelle,
                                        import_inferred_organelle,
                                        export_tiff,
                                        list_image_files)



from infer_subc.constants import *
from infer_subc.utils.stats import *
from infer_subc.utils.stats_helpers import *



import time
%load_ext autoreload
%autoreload 2



In [2]:
# this will be the example image for testing the pipeline below
# build the datapath
# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
int_data_path = data_root_path / "raw"
im_type = ".czi"

# get the list of all files
img_file_list = list_image_files(int_data_path,im_type)

# save output ".tiff" files here
out_data_path = data_root_path / "out"

if not Path.exists(out_data_path):
    Path.mkdir(out_data_path)
    print(f"making {out_data_path}")

In [3]:
im_path = Path(img_file_list[0])
im_path

PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi')

## 1. get each unique cells accouding to filename


### extract ID. e.g.

### process each cell & summarize



## 2. aggregate all cells into a database 

In [4]:
full_name = im_path.name

cell_ids = [ Path(fn).stem.split("-")[0] for fn in img_file_list]
cell_ids = list(set(cell_ids))

masks_postfix = "masks2"
organelle_postfix = ["lyso", "mito","golgi","perox","ER","LD"]




# function to get masks, and organeles

def gather_segmentations(file_id:str, path_root: Path, masks_postfix: str, organelle_postfix: List[str] ) -> List[np.ndarray]:
    """
    

    """
    pass


    # create filename ID 


    # 

In [5]:
# MASK process
# 1. get a listof all files based on a "prefix" and "suffix" for a given path
# dump three .tiff from teh mask multichannel tiff
# from tifffile import imwrite, imread#, tiffcomment
from infer_subc.core.img import label_uint16
from infer_subc.core.file_io import export_tiff, read_tiff_image
from typing import Union
from pathlib import Path

def _explode_mask(mask_path: Union[Path,str], postfix: str= "masks", im_type: str = ".tiff") -> bool:
    """ 
    TODO: add loggin instead of printing
        append tiffcomments with provenance
    """
    if isinstance(mask_path, str): mask_path = Path(mask_path)
    # load image 
    full_stem = mask_path.stem
    if full_stem.endswith(postfix):
        stem = full_stem.rstrip(postfix)
        image = read_tiff_image(mask_path)
        assert image.shape[0]==3
        
        # make into np.uint16 labels
        nuclei = label_uint16(image[0])
        # export as np.uint8 (255)
        cellmask = image[1]>0            
        cytoplasm = image[2]>0

        # write wasks
        root_stem = mask_path.parent / stem
        # ret1 = imwrite(f"{root}nuclei{stem}", nuclei)
        ret1 = export_tiff(nuclei, f"{stem}nuc", mask_path.parent, None)
        # ret2 = imwrite(f"{root}cellmask{stem}", cellmask)
        ret2 = export_tiff(cellmask, f"{stem}cell", mask_path.parent, None)
        # ret3 = imwrite(f"{root}cytosol{stem}", cytosol)
        ret3 = export_tiff(cytoplasm, f"{stem}cyto", mask_path.parent, None)

        print(f"wrote {stem}-{{nuc,cell,cyto}}")
        return True
    else:
        return False



def _explode_masks(root_path: Union[Path,str], postfix: str= "masks", im_type: str = ".tiff"):
    """  
    TODO: add loggin instead of printing
        append tiffcomments with provenance
    """
    if isinstance(root_path, str): root_path = Path(root_path)
    img_file_list = list_image_files(root_path,im_type, postfix)
    wrote_cnt = 0
    for img_f in img_file_list:
        if _explode_mask(img_f, postfix=postfix, im_type=im_type): wrote_cnt += 1
        else: print(f"failed to explode {img_f}")
    else:
        print(f"how thefark!!! {img_f}")

    print(f"exploded {wrote_cnt*100./len(img_file_list)} pct of {len(img_file_list)} files")
    return wrote_cnt



In [6]:
from infer_subc.utils.batch import explode_masks

cnt = explode_masks(out_data_path, postfix='masks2')
cnt

exploded 100.0 pct of 35 files


35

In [7]:


# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"
# linearly unmixed ".czi" files are here
raw_data_path = data_root_path / "raw"
# save output ".tiff" files here
int_data_path = data_root_path / "out"
# save stats here
out_data_path = data_root_path / "out"


In [8]:
raw_path = raw_data_path
int_path = int_data_path
out_path = out_data_path


if isinstance(raw_path, str): raw_path = Path(raw_path)
if isinstance(int_path, str): int_path = Path(int_path)
if isinstance(out_path, str): out_path = Path(out_path)

img_file_list = list_image_files(raw_path,".czi")

if not Path.exists(out_path):
    Path.mkdir(out_path)
    print(f"making {out_path}")



In [9]:
img_file_list

[PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N15_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N16_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N17_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N18_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N20_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N21_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N22_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N23_Unmixed.czi'),
 PosixPath

In [10]:
from typing import Optional, Union, Dict, List

def _find_segmentation_tiff_files(prototype:Union[Path,str], organelles: List[str], int_path: Union[Path,str]) -> Dict:
    """
    find the nescessary image files based on protype, the organelles involved, and paths
    """

    # raw
    prototype = Path(prototype)
    if not prototype.exists():
        print(f"bad prototype. please choose an existing `raw` file as prototype")
        return dict()
    # make sure protoype ends with czi

    out_files = {"raw":prototype}

    int_path = Path(int_path) 
    # raw
    if not int_path.is_dir():
        print(f"bad path argument. please choose an existing path containing organelle segmentations")
        return out_files
    
    # cyto, cellmask
    cyto_nm = int_path / f"{prototype.stem}-cyto.tiff"
    if cyto_nm.exists():
        out_files["cyto"] = cyto_nm
    else:
        print(f"cytosol mask not found.  We'll try to extract from masks ")
        if explode_mask(int_path / f"{prototype.stem}-masks.tiff"): 
            out_files["cyto"] = cyto_nm
        else: 
            print(f"failed to explode {prototype.stem}-masks.tiff")
            return out_files
    
    cellmask_nm = int_path / f"{prototype.stem}-cellmask.tiff"
    if  cellmask_nm.exists():
        out_files["cellmask"] = cellmask_nm
    else:
        print(f"cellmask file not found in {int_path} returning")
        out_files["cellmask"] = None

    # organelles
    for org_n in organelles:
        org_name = Path(int_path) / f"{prototype.stem}-{org_n}.tiff"
        if org_name.exists(): 
            out_files[org_n] = org_name
        else: 
            print(f"{org_n} .tiff file not found in {int_path} returning")
            out_files[org_n] = None
    
    return out_files

    


In [11]:
from infer_subc.utils.batch import find_segmentation_tiff_files
prototype = '/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'
organelles = ["nuc","lyso", "mito","golgi","perox","ER","LD"]

filez = find_segmentation_tiff_files(prototype, organelles, out_data_path)



In [12]:
filez

{'raw': PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'),
 'cyto': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-cyto.tiff'),
 'cell': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-cell.tiff'),
 'nuc': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-nuc.tiff'),
 'lyso': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-lyso.tiff'),
 'mito': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-mito.tiff'),
 'golgi': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-golgi.tiff'),
 'perox': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-perox.tiff'),
 'ER': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-ER.

In [13]:
from infer_subc.utils.stats_helpers import make_organelle_stat_tables
from infer_subc.constants import *
from infer_subc.organelles import *
from infer_subc.core.file_io import read_tiff_image, read_czi_image

# names of organelles we have
organelle_names = ["nuc","lyso", "mito","golgi","perox","ER","LD"]

# get the intensities
organelle_channels = [NUC_CH, LYSO_CH,MITO_CH,GOLGI_CH,PEROX_CH,ER_CH,LD_CH]



In [15]:
# for a list of "prefixes"  collect stats + cross stats masked by cytosol (including nuclei masked by cellmask)

def dump_all_stats(int_path: Union[Path,str], out_path: Union[Path, str], raw_path: Union[Path,str], organelle_names: List[str]= ["nuclei","golgi","peroxi"], organelle_chs: List[int]= [NUC_CH,GOLGI_CH, PEROX_CH], postfix: str = ".tiff"):
    """  
    TODO: add loggin instead of printing
        append tiffcomments with provenance
    """

    
    if isinstance(raw_path, str): raw_path = Path(raw_path)
    if isinstance(int_path, str): int_path = Path(int_path)
    if isinstance(out_path, str): out_path = Path(out_path)
    
    img_file_list = list_image_files(raw_path,".czi")

    if not Path.exists(out_path):
        Path.mkdir(out_path)
        print(f"making {out_path}")
        
    for img_f in img_file_list:
        filez = find_segmentation_tiff_files(img_f, organelle_names, int_path)
        img_data,meta_dict = read_czi_image(filez["raw"])

        # load organelles and masks
        cyto_mask = read_tiff_image(filez["cyto"])
        cellmask_obj = read_tiff_image(filez["cell"])



        # create intensities from raw as list
        intensities = [img_data[ch] for ch in organelle_chs]

        # load organelles as list
        organelles = [read_tiff_image(filez[org]) for org in organelle_names]
        
        #get mask (cyto_mask)
        nuclei_obj = organelles[ organelle_names.index("nuc") ]

        n_files = make_organelle_stat_tables(organelle_names, 
                                      organelles,
                                      intensities, 
                                      nuclei_obj,
                                      cellmask_obj,
                                      cyto_mask, 
                                      out_data_path, 
                                      img_f,
                                      n_rad_bins=5,
                                      n_zernike=9)

    return n_files



# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"
# linearly unmixed ".czi" files are here
raw_data_path = data_root_path / "raw"
# save output ".tiff" files here
int_data_path = data_root_path / "out"
# save stats here
out_data_path = data_root_path / "out"

dump_all_stats(out_data_path, 
                     out_data_path, 
                     raw_data_path, 
                     organelle_names=organelle_names, 
                     organelle_chs=organelle_channels)



[PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'), PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N15_Unmixed.czi'), PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N16_Unmixed.czi'), PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N17_Unmixed.czi'), PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N18_Unmixed.czi'), PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed.czi'), PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N20_Unmixed.czi'), PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N21_Unmixed.czi'), PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N22_Unmixed.czi'), PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N23_Unmixed.czi'), PosixPath('/Users/e

  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  
  
  
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  
  
  
  
  
  
  
  
  
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))
  
  
  
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  vi = np.sum(pixels[:,:,np.newaxis]*z.imag, axis=(0,1))


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


# summary statistics

We now need to merge our files

In [16]:

data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
int_data_path = data_root_path / "out"


In [355]:
# for a list of "prefixes"  collect stats + cross stats masked by cytosol (including nuclei masked by cellmask)

def summarize_by_id(stats_in:pd.DataFrame,agg_fn: List) -> pd.DataFrame:
    """ 
    """
    summary = stats_in.groupby(['ID']).agg(agg_fn)
    summary.columns = ["_".join(col_name).rstrip('_') for col_name in summary.columns.to_flat_index()]
    return summary



def create_stats_summary(summary_df:pd.DataFrame) -> pd.DataFrame:
    """
    """

    column_names = ['ID', 'organelle', 'label', 'max_intensity', 'mean_intensity',
        'min_intensity', 'volume', 'equivalent_diameter', 'euler_number', 'extent',
        'standard_deviation_intensity', 'surface_area', 'NU_overlap',
        'NU_labels', 'MY_overlap', 'MY_labels', 'GL_overlap', 'GL_labels',
        'PR_overlap', 'PR_labels', 'ER_overlap', 'ER_labels', 'LD_overlap',
        'LD_labels'] #minus centers and 

    math_cols = ['ID', 'mean_intensity',
        'standard_deviation_intensity',
        'min_intensity','max_intensity', 'equivalent_diameter',
        'euler_number', 'extent']
        
    def fix_list_col(stats:pd.DataFrame) -> pd.DataFrame:
        """ 
        """
        def str_col(x):    
            if isinstance(x,str):
                if x == '[]': return list()
                xstr = x.strip("[]").replace("'", "").split(", ")
                return [int(x) for x in xstr]
            else:
                return x

        
        label_stats = pd.DataFrame() 
        for i,col in enumerate(stats.columns):    
            label_stats[col] = stats[col].apply(str_col) if i>0 else stats[col]
        return label_stats

    def frac(x):
        return (x>0).sum()/x.count() 

    vol_cols = ['ID','volume']
    overlap_cols = ['ID'] + [col for col in column_names if col[3:]=='overlap']
    labels_cols = ['ID'] + [col for col in column_names if col[3:]=='labels']
   
    agg_func_math = ['sum', 'mean', 'median', 'min', 'max', 'std','count']
    agg_func_overlap = ['sum', 'mean', 'median','count',frac]
    agg_func_labels = ['sum']
    agg_func_vol = ['sum', 'mean', 'median', 'min', 'max', 'std', 'var']

    math_summary = summarize_by_id( summary_df[math_cols] , agg_func_math)
    label_stats = fix_list_col(summary_df[labels_cols])
    label_summary = summarize_by_id( label_stats , agg_func_labels)
    overlap_summary = summarize_by_id( summary_df[overlap_cols] ,agg_func_overlap)
    vol_summary = summarize_by_id( summary_df[vol_cols] , agg_func_vol)
    result = pd.concat([math_summary, vol_summary, overlap_summary, label_summary], axis=1)

    result.insert(loc=0,column="ID",value=result.index)
    
    return result



# def create_proj_stats_summary(summary_df:pd.DataFrame) -> pd.DataFrame:
#     """
#     """
#     proj_cols = ['ID', 'organelle', 'mask', 'radial_n_bins', 'radial_bins',
#        'radial_cm_vox_cnt', 'radial_org_vox_cnt', 'radial_org_intensity',
#        'radial_n_pix', 'radial_cm_cv', 'radial_org_cv', 'radial_img_cv',
#        'zernike_n', 'zernike_m', 'zernike_cm_mag', 'zernike_cm_phs',
#        'zernike_obj_mag', 'zernike_obj_phs', 'zernike_nuc_mag',
#        'zernike_nuc_phs', 'zernike_img_mag', 'n_z', 'z', 'z_cm_vox_cnt',
#        'z_org_vox_cnt', 'z_org_intensity', 'z_nuc_vox_cnt']
#     def frac(x):
#         return (x>0).sum()/x.count() 
#     vol_cols = ['ID','volume']
#     overlap_cols = ['ID'] + [col for col in column_names if col[3:]=='overlap']
#     labels_cols = ['ID'] + [col for col in column_names if col[3:]=='labels']  
#     agg_func_math = ['sum', 'mean', 'median', 'min', 'max', 'std','count']
#     agg_func_overlap = ['sum', 'mean', 'median','count',frac]
#     agg_func_labels = ['sum']
#     agg_func_vol = ['sum', 'mean', 'median', 'min', 'max', 'std', 'var']
#     math_summary = summarize_by_id( summary_df[math_cols] , agg_func_math)
#     label_stats = fix_list_col(summary_df[labels_cols])
#     label_summary = summarize_by_id( label_stats , agg_func_labels)
#     overlap_summary = summarize_by_id( summary_df[overlap_cols] ,agg_func_overlap)
#     vol_summary = summarize_by_id( summary_df[vol_cols] , agg_func_vol)
#     result = pd.concat([math_summary, vol_summary, overlap_summary, label_summary], axis=1)
#     return result


def summarize_by_group(stats_in:pd.DataFrame, grp_col:list, agg_fn:list) -> pd.DataFrame:
    """ 
    """
    summary = stats_in.reset_index(drop=True).groupby(grp_col).agg(agg_fn)
    summary.columns = ["_".join(col_name).rstrip('_') for col_name in summary.columns.to_flat_index()]
    return summary


def create_cross_stats_summary(summary_df:pd.DataFrame) -> pd.DataFrame:
    """
    """
    # cross_cols = ['ID', 'organelle', 'organelle_b', 'shell', 'label_', 'label', 'volume',
    #    'equivalent_diameter', 'centroid-0', 'centroid-1', 'centroid-2',
    #    'bbox-0', 'bbox-1', 'bbox-2', 'bbox-3', 'bbox-4', 'bbox-5',
    #    'surface_area', 'label_a', 'label_b']
    cross_cols = ['ID', 'organelle', 'organelle_b', 'shell', 'label_', 'label', 'volume',
       'equivalent_diameter','surface_area', 'label_a', 'label_b']

    group_cols = ['ID','organelle_b', 'shell']
    id_cols = ['label_','label_a', 'label_b'] 
    math_cols = ['volume','equivalent_diameter','surface_area']

    def lst(x):
        return x.to_list()
       
    agg_func_math = ['sum', 'mean', 'median', 'min', 'max', 'std','count']
    agg_func_id = [lst]

    math_summary = summarize_by_group( summary_df[group_cols + math_cols],group_cols, agg_func_math)

    id_summary = summarize_by_group( summary_df[group_cols + id_cols],group_cols, agg_func_id)

    result = pd.concat([math_summary, id_summary], axis=1)
    return result

    # now 





def pivot_cross_stats(summary_df:pd.DataFrame) -> pd.DataFrame:
    """
    """
    xstat_df = pd.DataFrame()
    for i,org_b in enumerate(org_bs):
        org_i = summary_df.loc[summary_df["organelle_b"] == org_b]

        # get shell
        shell_summary_i = create_cross_stats_summary(org_i.loc[org_i["shell"] == False]).reset_index().drop("shell", axis = 1).add_prefix("shell_")
        # rename shell_ID to ID
        shell_summary_i = shell_summary_i.rename(columns={"shell_ID":"ID"})
        # get non-shell
        summary_i = create_cross_stats_summary(org_i.loc[org_i["shell"] == False]).reset_index().drop("shell", axis = 1)
        col_orgb = organelle_to_colname[org_b]

        summary_i = summary_i.merge(shell_summary_i).drop("organelle_b", axis=1).add_suffix(f"_{col_orgb}")
        if i>0:
            xstat_df = pd.concat([xstat_df,summary_i], axis=1)
        else:
            xstat_df = summary_i
            
    id_cols = [col for col in xstat_df.columns if "ID" in col]
    IDcol = xstat_df[id_cols[0]]
    xstat_df = xstat_df.drop(id_cols, axis=1)
    xstat_df.insert(loc=0,column="ID",value=IDcol)

    return xstat_df



def summarize_organelle_stats(int_path: Union[Path,str], 
                              out_path: Union[Path, str], 
                              organelle_names: List[str]= ["nuclei","golgi","peroxi"]):
    """  
    """
    # write out files... 
    # org_stats_tabs.append(A_stats_tab)
    # csv_path = out_data_path / f"{source_file.stem}-{target}-stats.csv"
    # csv_path = out_data_path / f"{source_file.stem}-{target}-cross-stats.csv"
    # csv_path = out_data_path / f"{source_file.stem}-{target}-proj-stats.csv"
    list_proj_cols = ['radial_bins',
       'radial_cm_vox_cnt', 'radial_org_vox_cnt', 'radial_org_intensity',
       'radial_n_pix', 'radial_cm_cv', 'radial_org_cv', 'radial_img_cv',
       'zernike_n', 'zernike_m', 'zernike_cm_mag', 'zernike_cm_phs',
       'zernike_obj_mag', 'zernike_obj_phs', 'zernike_nuc_mag',
       'zernike_nuc_phs', 'zernike_img_mag','z', 'z_cm_vox_cnt',
       'z_org_vox_cnt', 'z_org_intensity', 'z_nuc_vox_cnt'] 
    
    def _load_list_cols(stats:pd.DataFrame, list_cols) -> pd.DataFrame:
        """ 
        """
        def str_col(x):
            if x == '[]':
                return list()
            else:
                xstr = x.strip("[]").replace("'", "").split(", ")
            return [float(x) if x.isnumeric() else x for x in xstr]
            
        _stats = pd.DataFrame() 
        for col in stats.columns:    
            _stats[col] = stats[col].apply(str_col) if col in list_cols else stats[col]
        return _stats


    if isinstance(int_path, str): int_path = Path(int_path)
    if isinstance(out_path, str): out_path = Path(out_path)

    if not Path.exists(out_path):
        Path.mkdir(out_path)
        print(f"making {out_path}")

    for target in organelle_names:

        stat_file_list = sorted( int_path.glob(f"*{target}-stats.csv") )
        


        stats_df = pd.DataFrame()
        cross_stats_df = pd.DataFrame()
        proj_stats_df = pd.DataFrame()

        for stats_f in stat_file_list:
            # stats load the csv
            stats = pd.read_csv(stats_f, index_col=0)
    

            # projection stats
            stem = stats_f.stem.split("-")[0]
            csv_path = int_path / f"{stem}-{target}-proj-stats.csv"
            proj = pd.read_csv(csv_path, index_col=0)
            proj = _load_list_cols(proj, list_proj_cols)
        

            # cross stats
            csv_path = int_path / f"{stem}-{target}-cross-stats.csv"
            cross = pd.read_csv(csv_path, index_col=0)
        

            stats_df = pd.concat([stats_df,stats],axis=0, join='outer')
            proj_stats_df = pd.concat([proj_stats_df,proj],axis=0, join='outer')
            cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
            
            # if cnt > 0:
            #     summary_df = pd.concat([summary_df,stats],axis=0, join='outer')
            #     proj_stats_df = pd.concat([proj_stats_df,proj],axis=0, join='outer')
            #     cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
            # else:
            #     summary_df = stats 
            #     proj_stats_df = proj
            #     cross_stats_df = cross 


        # summary_df = pd.DataFrame(index=[f.stem.split("-")[0] for f in stat_file_list])
        # cross_stats_df = pd.DataFrame(index=[f.stem.split("-")[0] for f in stat_file_list])
        # proj_stats_df = pd.DataFrame(index=[f.stem.split("-")[0] for f in stat_file_list])

        summary_df = create_stats_summary(stats_df)

        cross_summary_df = pivot_cross_stats(cross_stats_df)

        # proj_stats_df.index = proj_stats_df["ID"]
        # cross_stats_df.index = cross_stats_df["ID"]
        # cross_summary_df.index = cross_summary_df["ID"]
        
        # org_summary = pd.concat([summary, proj_stats_df,cross_summary], axis=1).reindex()
        #org_summary = pd.concat([summary, proj_stats_df], axis=1).reindex()
    

        return summary_df, proj_stats_df, cross_summary_df, cross_stats_df
        



In [356]:

# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"
# linearly unmixed ".czi" files are here
raw_data_path = data_root_path / "raw"
# save output ".tiff" files here
int_data_path = data_root_path / "out"
# save stats here
out_data_path = data_root_path / "out"

summary_df, proj_stats_df, cross_summary_df, cross_stats_df = summarize_organelle_stats(out_data_path, 
                                                    out_data_path, 
                                                    organelle_names=organelle_names[1:])



  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

In [357]:
# split cross stats by 
# shell_df = cross_summary_df.loc[cross_summary_df.shell == True]
# full_df = cross_summary_df.loc[cross_summary_df.shell == False]

# summary_shell = create_cross_stats_summary(shell_df)
# summary_full = create_cross_stats_summary(full_df)

org_b = 'ER'
shell = False



#summary_full.xs((org_b,shell), level=[0,1], axis=0)
#summary_full.head()
# summary_full.loc[:,org_b,shell]
# summary_full.loc[:,org_b,shell]


#summary_shell.head()
cross_stats_df.head(25)


Unnamed: 0,ID,organelle,organelle_b,shell,label_,label,volume,equivalent_diameter,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,bbox-2,bbox-3,bbox-4,bbox-5,surface_area,label_a,label_b
0,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,43_5,1,2,1.563185,0.0,555.5,220.5,0,555,220,1,557,222,6.928203,43.0,5.0
1,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,69_5,2,8,2.481402,1.0,326.25,262.25,1,325,260,2,329,265,41.755173,69.0,5.0
2,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,14_5,3,54,4.689556,1.759259,362.851852,304.407407,1,358,301,4,369,310,192.85495,14.0,5.0
3,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,11_5,4,94,5.641264,2.510638,399.361702,320.617021,1,392,317,6,407,325,305.540283,11.0,5.0
4,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,32_5,5,4,1.96949,1.0,487.0,325.75,1,486,325,2,489,327,21.51326,32.0,5.0
5,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,32_5,6,45,4.413041,1.955556,500.955556,326.333333,1,495,324,5,511,329,174.034088,32.0,5.0
6,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,45_5,7,1,1.240701,1.0,565.0,233.0,1,565,233,2,566,234,6.928203,45.0,5.0
7,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,45_5,8,13,2.917303,1.0,568.0,240.307692,1,567,237,2,570,245,59.068878,45.0,5.0
8,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,89_5,9,3,1.7894,1.0,571.666667,248.333333,1,571,248,2,573,250,17.04916,89.0,5.0
9,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,nuc,False,21_5,10,3,1.7894,2.0,425.0,195.0,2,424,195,3,427,196,18.241911,21.0,5.0


In [272]:
cross_summary_df.index.to_list()[:20]



['ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR1_N15_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR1_N16_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR1_N17_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR1_N18_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR1_N20_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR1_N21_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR1_N22_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR1_N23_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR2_N01_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR2_N02_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR2_N03_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR2_N05_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR2_N06_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR2_N07_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR2_N08_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR2_N09_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR2_N10_Unmixed',
 'ZSTACK_PBTOhNGN2hiPSCs_BR3_N01_Unmixed']

In [243]:
i = 0
org_b = org_bs[i]


org_i = cross_summary_df.loc[cross_summary_df["organelle_b"] == org_b]

# get shell
shell_summary_i = create_cross_stats_summary(org_i.loc[org_i["shell"] == False]).reset_index().drop("shell", axis = 1).add_prefix("shell_")
# rename shell_ID to ID
shell_summary_i = shell_summary_i.rename(columns={"shell_ID":"ID"})
# get non-shell
summary_i = create_cross_stats_summary(org_i.loc[org_i["shell"] == False]).reset_index().drop("shell", axis = 1)
ID = summary_i["ID"]
col_orgb = organelle_to_colname[org_b]

summary_merge = summary_i.merge(shell_summary_i).drop("organelle_b", axis=1).add_suffix(f"_{col_orgb}")
xstat_df = summary_merge


In [244]:
i = 1
org_b = org_bs[i]


org_i = cross_summary_df.loc[cross_summary_df["organelle_b"] == org_b]

# get shell
shell_summary_i = create_cross_stats_summary(org_i.loc[org_i["shell"] == False]).reset_index().drop("shell", axis = 1).add_prefix("shell_")
# rename shell_ID to ID
shell_summary_i = shell_summary_i.rename(columns={"shell_ID":"ID"})
# get non-shell
summary_i = create_cross_stats_summary(org_i.loc[org_i["shell"] == False]).reset_index().drop("shell", axis = 1)
ID = summary_i["ID"]
col_orgb = organelle_to_colname[org_b]

summary_merge = summary_i.merge(shell_summary_i).drop("organelle_b", axis=1).add_suffix(f"_{col_orgb}")
xstat_df = pd.concat([xstat_df,summary_merge],axis=1)


In [245]:
id_cols = [col for col in xstat_df.columns if "ID" in col]
IDcol= xstat_df[id_cols[0]]
xstat_df = xstat_df.drop(id_cols, axis=1)
xstat_df.insert(loc=0,column="ID",value=IDcol)

In [246]:
xstat_df.tail(35)

Unnamed: 0,ID,volume_sum_NU,volume_mean_NU,volume_median_NU,volume_min_NU,volume_max_NU,volume_std_NU,volume_count_NU,equivalent_diameter_sum_NU,equivalent_diameter_mean_NU,...,shell_surface_area_sum_MY,shell_surface_area_mean_MY,shell_surface_area_median_MY,shell_surface_area_min_MY,shell_surface_area_max_MY,shell_surface_area_std_MY,shell_surface_area_count_MY,shell_label__lst_MY,shell_label_a_lst_MY,shell_label_b_lst_MY
0,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,1269,24.882353,12.0,1,120,30.045064,51,156.054542,3.059893,...,14394.032739,106.622465,52.947926,6.292529,849.940613,155.135088,135,"[62_7, 64_1, 65_1, 11_2, 72_2, 11_8, 11_3, 11_...","[62.0, 64.0, 65.0, 11.0, 72.0, 11.0, 11.0, 11....","[7.0, 1.0, 1.0, 2.0, 2.0, 8.0, 3.0, 3.0, 4.0, ..."
1,ZSTACK_PBTOhNGN2hiPSCs_BR1_N15_Unmixed,259,32.375,4.5,1,141,51.721611,8,23.26683,2.908354,...,4001.112537,117.679781,44.322157,6.928203,501.487,150.061146,34,"[24_6, 24_6, 24_5, 24_5, 36_5, 36_5, 24_5, 24_...","[24.0, 24.0, 24.0, 24.0, 36.0, 36.0, 24.0, 24....","[6.0, 6.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 5.0, ..."
2,ZSTACK_PBTOhNGN2hiPSCs_BR1_N16_Unmixed,1483,296.6,32.0,5,1119,477.524136,5,30.291583,6.058317,...,337.501599,56.250267,43.523121,12.585057,109.389397,36.933842,6,"[91_3, 133_3, 107_3, 35_3, 35_3, 35_3]","[91.0, 133.0, 107.0, 35.0, 35.0, 35.0]","[3.0, 3.0, 3.0, 3.0, 3.0, 3.0]"
3,ZSTACK_PBTOhNGN2hiPSCs_BR1_N17_Unmixed,3834,67.263158,13.0,1,920,160.507068,57,206.184406,3.61727,...,9233.909762,108.634232,78.34024,6.928203,619.803833,114.332031,85,"[46_8, 16_4, 15_4, 16_4, 16_4, 16_4, 16_4, 16_...","[46.0, 16.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16....","[8.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, ..."
4,ZSTACK_PBTOhNGN2hiPSCs_BR1_N18_Unmixed,5690,103.454545,16.0,1,1473,256.232592,55,225.33504,4.097001,...,23373.662732,129.136258,40.019726,3.464102,4807.653809,398.032074,181,"[7_3, 12_3, 1_3, 13_3, 1_3, 1_3, 15_10, 1_3, 1...","[7.0, 12.0, 1.0, 13.0, 1.0, 1.0, 15.0, 1.0, 1....","[3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 10.0, 3.0, 12.0..."
5,ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed,2302,37.129032,20.5,1,285,47.463354,62,216.214266,3.487327,...,11754.767349,115.242817,66.644073,6.928203,868.983948,136.139223,102,"[2_2, 17_2, 30_5, 31_5, 37_1, 2_1, 2_1, 2_1, 2...","[2.0, 17.0, 30.0, 31.0, 37.0, 2.0, 2.0, 2.0, 2...","[2.0, 2.0, 5.0, 5.0, 1.0, 1.0, 1.0, 1.0, 2.0, ..."
6,ZSTACK_PBTOhNGN2hiPSCs_BR1_N20_Unmixed,1036,38.37037,13.0,1,351,68.811419,27,93.206826,3.452105,...,7435.157293,51.276947,31.176914,6.928203,388.473877,53.823083,145,"[4_2, 3_2, 3_2, 11_2, 11_2, 12_2, 12_2, 7_10, ...","[4.0, 3.0, 3.0, 11.0, 11.0, 12.0, 12.0, 7.0, 2...","[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 10.0, 5.0,..."
7,ZSTACK_PBTOhNGN2hiPSCs_BR1_N21_Unmixed,2455,53.369565,26.5,1,358,69.686077,46,186.263393,4.049204,...,14919.365318,103.606704,43.926744,6.928203,745.934326,147.927352,144,"[19_2, 10_2, 37_2, 18_2, 39_2, 18_2, 18_2, 45_...","[19.0, 10.0, 37.0, 18.0, 39.0, 18.0, 18.0, 45....","[2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, ..."
8,ZSTACK_PBTOhNGN2hiPSCs_BR1_N22_Unmixed,804,34.956522,14.0,1,212,49.312066,23,75.518959,3.283433,...,10830.785287,121.694217,51.412025,3.464102,2133.559082,252.276181,89,"[4_1, 6_2, 11_2, 11_2, 11_2, 43_6, 43_7, 6_1, ...","[4.0, 6.0, 11.0, 11.0, 11.0, 43.0, 43.0, 6.0, ...","[1.0, 2.0, 2.0, 2.0, 2.0, 6.0, 7.0, 1.0, 1.0, ..."
9,ZSTACK_PBTOhNGN2hiPSCs_BR1_N23_Unmixed,1152,50.086957,24.0,1,193,54.651718,23,91.58452,3.981936,...,17992.457548,103.404928,45.217569,6.928203,1437.687744,165.188761,174,"[10_7, 10_7, 11_1, 4_1, 14_3, 63_4, 75_1, 81_1...","[10.0, 10.0, 11.0, 4.0, 14.0, 63.0, 75.0, 81.0...","[7.0, 7.0, 1.0, 1.0, 3.0, 4.0, 1.0, 1.0, 5.0, ..."


In [223]:
xstat_df.columns[55:],id_cols

(Index(['volume_std_MY', 'volume_count_MY', 'equivalent_diameter_sum_MY',
        'equivalent_diameter_mean_MY', 'equivalent_diameter_median_MY',
        'equivalent_diameter_min_MY', 'equivalent_diameter_max_MY',
        'equivalent_diameter_std_MY', 'equivalent_diameter_count_MY',
        'surface_area_sum_MY', 'surface_area_mean_MY', 'surface_area_median_MY',
        'surface_area_min_MY', 'surface_area_max_MY', 'surface_area_std_MY',
        'surface_area_count_MY', 'label__lst_MY', 'label_a_lst_MY',
        'label_b_lst_MY', 'shell_organelle_b_MY', 'shell_volume_sum_MY',
        'shell_volume_mean_MY', 'shell_volume_median_MY', 'shell_volume_min_MY',
        'shell_volume_max_MY', 'shell_volume_std_MY', 'shell_volume_count_MY',
        'shell_equivalent_diameter_sum_MY', 'shell_equivalent_diameter_mean_MY',
        'shell_equivalent_diameter_median_MY',
        'shell_equivalent_diameter_min_MY', 'shell_equivalent_diameter_max_MY',
        'shell_equivalent_diameter_std_MY',
     

In [None]:

if i>0:
    xstat_df = pd.concat([xstat_df,summary_i])
else:
    xstat_df = summary_i
        
id_cols = [col for col in xstat_df.columns if "ID" in col]
xstat_df.index = xstat_df[id_cols[0]]
xstat_df.drop(id_cols, axis=1)
xstat_df.insert(loc=0,column="ID",value=xstat_df.index)



out = pd.concat([pd.DataFrame(),summary_merge])

out.head()

In [133]:
xstat_df.head()

Unnamed: 0,ID_NU,volume_sum_NU,volume_mean_NU,volume_median_NU,volume_min_NU,volume_max_NU,volume_std_NU,volume_count_NU,equivalent_diameter_sum_NU,equivalent_diameter_mean_NU,...,shell_surface_area_sum_LD,shell_surface_area_mean_LD,shell_surface_area_median_LD,shell_surface_area_min_LD,shell_surface_area_max_LD,shell_surface_area_std_LD,shell_surface_area_count_LD,shell_label__lst_LD,shell_label_a_lst_LD,shell_label_b_lst_LD
0,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,1269,24.882353,12.0,1.0,120.0,30.045064,51.0,156.054542,3.059893,...,,,,,,,,,,
1,ZSTACK_PBTOhNGN2hiPSCs_BR1_N15_Unmixed,259,32.375,4.5,1.0,141.0,51.721611,8.0,23.26683,2.908354,...,,,,,,,,,,
2,ZSTACK_PBTOhNGN2hiPSCs_BR1_N16_Unmixed,1483,296.6,32.0,5.0,1119.0,477.524136,5.0,30.291583,6.058317,...,,,,,,,,,,
3,ZSTACK_PBTOhNGN2hiPSCs_BR1_N17_Unmixed,3834,67.263158,13.0,1.0,920.0,160.507068,57.0,206.184406,3.61727,...,,,,,,,,,,
4,ZSTACK_PBTOhNGN2hiPSCs_BR1_N18_Unmixed,5690,103.454545,16.0,1.0,1473.0,256.232592,55.0,225.33504,4.097001,...,,,,,,,,,,


In [61]:
pivot_cols = ['shell','organelle_b']
explode_cols = ['label_', 'label', 'volume', 'equivalent_diameter', 'surface_area', 'label_a', 'label_b']



ValueError: Index contains duplicate entries, cannot reshape

In [148]:

group_cols = ['ID','organelle_b', 'shell']
id_cols = ['label_','label_a', 'label_b'] 
math_cols = ['volume','equivalent_diameter','surface_area']

stats_in = cross_summary_df[group_cols+math_cols]


lst = lambda x: x.to_list()
   
agg_func_math = ['sum', 'mean', 'median', 'min', 'max', 'std','count']
agg_func_id = [lst]


stats_in.reset_index(drop=True)


Unnamed: 0,ID,organelle_b,shell,volume,equivalent_diameter,surface_area
0,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,nuclei,False,2,1.563185,6.928203
1,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,nuclei,False,8,2.481402,41.755173
2,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,nuclei,False,54,4.689556,192.854950
3,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,nuclei,False,94,5.641264,305.540283
4,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,nuclei,False,4,1.969490,21.513260
...,...,...,...,...,...,...
27288,ZSTACK_PBTOhNGN2hiPSCs_BR3_N16_Unmixed,ER,True,14,2.990266,46.485519
27289,ZSTACK_PBTOhNGN2hiPSCs_BR3_N16_Unmixed,ER,True,7,2.373376,31.170115
27290,ZSTACK_PBTOhNGN2hiPSCs_BR3_N16_Unmixed,ER,True,60,4.857180,160.698975
27291,ZSTACK_PBTOhNGN2hiPSCs_BR3_N16_Unmixed,ER,True,20,3.367781,58.387959


In [153]:

summary = create_cross_stats_summary(cross_summary_df)
summary.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,volume_sum,volume_mean,volume_median,volume_min,volume_max,volume_std,volume_count,equivalent_diameter_sum,equivalent_diameter_mean,equivalent_diameter_median,...,surface_area_sum,surface_area_mean,surface_area_median,surface_area_min,surface_area_max,surface_area_std,surface_area_count,label__lst,label_a_lst,label_b_lst
ID,organelle_b,shell,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
ZSTACK_PBTOhNGN2hiPSCs_BR3_N16_Unmixed,mito,True,4286,28.765101,11.0,1,511,59.102909,149,450.676199,3.024672,2.759294,...,14507.849634,97.368118,50.340225,6.928203,1541.225342,175.862959,149,"[6_1, 8_2, 11_3, 17_4, 9_2, 9_2, 15_2, 9_2, 9_...","[6.0, 8.0, 11.0, 17.0, 9.0, 9.0, 15.0, 9.0, 9....","[1.0, 2.0, 3.0, 4.0, 2.0, 2.0, 2.0, 2.0, 2.0, ..."
ZSTACK_PBTOhNGN2hiPSCs_BR3_N16_Unmixed,nuclei,False,2476,50.530612,41.0,1,314,52.809051,49,201.495147,4.112146,4.278207,...,7490.389992,152.865102,126.118042,3.464102,777.367004,133.684348,49,"[26_3, 6_3, 34_3, 10_3, 37_3, 38_3, 39_3, 42_3...","[26.0, 6.0, 34.0, 10.0, 37.0, 38.0, 39.0, 42.0...","[3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, ..."
ZSTACK_PBTOhNGN2hiPSCs_BR3_N16_Unmixed,nuclei,True,2311,47.163265,41.0,1,264,45.681117,49,198.634114,4.053757,4.278207,...,7373.088646,150.471197,126.118042,3.464102,727.68457,127.176715,49,"[26_3, 6_3, 34_3, 10_3, 37_3, 38_3, 39_3, 42_3...","[26.0, 6.0, 34.0, 10.0, 37.0, 38.0, 39.0, 42.0...","[3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, ..."
ZSTACK_PBTOhNGN2hiPSCs_BR3_N16_Unmixed,perox,False,77,5.923077,5.0,1,19,5.155281,13,27.080008,2.083078,2.121569,...,345.86884,26.605295,24.706015,6.928203,69.797531,17.455892,13,"[17_9, 17_35, 103_42, 15_45, 174_32, 202_71, 2...","[17.0, 17.0, 103.0, 15.0, 174.0, 202.0, 231.0,...","[9.0, 35.0, 42.0, 45.0, 32.0, 71.0, 67.0, 79.0..."
ZSTACK_PBTOhNGN2hiPSCs_BR3_N16_Unmixed,perox,True,71,5.461538,4.0,1,19,5.253814,13,26.136883,2.010529,1.96949,...,326.476534,25.11358,23.898766,6.928203,69.797531,17.657956,13,"[17_9, 17_35, 103_42, 15_45, 174_32, 202_71, 2...","[17.0, 17.0, 103.0, 15.0, 174.0, 202.0, 231.0,...","[9.0, 35.0, 42.0, 45.0, 32.0, 71.0, 67.0, 79.0..."


In [265]:
# stats = stats[['ID','max_intensity', 'mean_intensity',
#        'min_intensity', 'volume', 'equivalent_diameter',
#      'euler_number', 'extent',
#        'standard_deviation_intensity', 'surface_area', 'NU_overlap',
#        'NU_labels', 'MY_overlap', 'MY_labels', 'GL_overlap', 'GL_labels',
#        'PR_overlap', 'PR_labels', 'ER_overlap', 'ER_labels', 'LD_overlap',
#        'LD_labels']]

def load_list_cols(stats:pd.DataFrame) -> pd.DataFrame:
    """ 

    """
    def str_col(x):
        if x == '[]':
            return list()
        else:
            xstr = x.strip("[]").replace("'", "").split(", ")
        return [float(x) if x.isnumeric() else x for x in xstr]
        
    _stats = pd.DataFrame() 
    for i,col in enumerate(stats.columns):    
        _stats[col] = stats[col].apply(str_col) if stats.dtypes[col]=="object" else stats[col]
    return _stats

proj_ = load_list_cols(proj)

In [275]:
proj

Unnamed: 0,ID,organelle,mask,radial_n_bins,radial_bins,radial_cm_vox_cnt,radial_org_vox_cnt,radial_org_intensity,radial_n_pix,radial_cm_cv,...,zernike_obj_phs,zernike_nuc_mag,zernike_nuc_phs,zernike_img_mag,n_z,z,z_cm_vox_cnt,z_org_vox_cnt,z_org_intensity,z_nuc_vox_cnt
0,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,lyso,cellmask,5,"['Ctr', '1', '2', '3', '4']","[496425, 105969, 126007, 139503, 203120]","[13326, 10638, 16179, 12941, 6755]","[542977153, 457985099, 585842458, 480429801, 3...","[42848.0, 10835.0, 14321.0, 20060.0, 52407.0]","[0.04944660749010457, 0.2781282386329111, 0.31...",...,[ 1.57079633 2.40995814 -1.57079633 -1.782344...,[1.00000000e+00 1.82873702e-01 8.57799258e-01 ...,[ 1.57079633 0.09375639 -1.57079633 -1.414969...,[ 1.57079633 2.44263556 -1.57079633 -1.649467...,12,"range(0, 12)",[85482 89976 86026 81980 84617 85839 90970 916...,[1071 2378 2891 3818 5038 6402 7971 8455 8757 ...,[ 93191916 119545261 124119091 141010734 19881...,[26206 28659 31060 32431 33723 34142 33738 329...


In [51]:
summary = stats[['ID','max_intensity', 'mean_intensity',
       'min_intensity', 'volume', 'equivalent_diameter',,
     'euler_number', 'extent',
       'standard_deviation_intensity', 'surface_area', 'NU_overlap',
       'NU_labels', 'MY_overlap', 'MY_labels', 'GL_overlap', 'GL_labels',
       'PR_overlap', 'PR_labels', 'ER_overlap', 'ER_labels', 'LD_overlap',
       'LD_labels']].groupby("ID").describe(percentiles=[.5]).rename({"50%":"Median"})
summary.columns = ["_".join(col_name).rstrip('_') for col_name in summary.columns.to_flat_index()]
summary

Unnamed: 0_level_0,label_count,label_mean,label_std,label_min,label_50%,label_max,max_intensity_count,max_intensity_mean,max_intensity_std,max_intensity_min,...,ER_overlap_std,ER_overlap_min,ER_overlap_50%,ER_overlap_max,LD_overlap_count,LD_overlap_mean,LD_overlap_std,LD_overlap_min,LD_overlap_50%,LD_overlap_max
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,170.0,181.435294,102.392324,1.0,190.5,370.0,170.0,10480.964706,8826.358513,2491.0,...,577.071399,0.0,9.0,6976.0,170.0,0.0,0.0,0.0,0.0,0.0


In [261]:
proj.dtypes=="object"

Unnamed: 0              False
ID                       True
organelle                True
mask                     True
radial_n_bins           False
radial_bins              True
radial_cm_vox_cnt        True
radial_org_vox_cnt       True
radial_org_intensity     True
radial_n_pix             True
radial_cm_cv             True
radial_org_cv            True
radial_img_cv            True
zernike_n                True
zernike_m                True
zernike_cm_mag           True
zernike_cm_phs           True
zernike_obj_mag          True
zernike_obj_phs          True
zernike_nuc_mag          True
zernike_nuc_phs          True
zernike_img_mag          True
n_z                     False
z                        True
z_cm_vox_cnt             True
z_org_vox_cnt            True
z_org_intensity          True
z_nuc_vox_cnt            True
dtype: bool

In [238]:

result.head(15)

# grp_stats = stats[grp3_cols].explode(grp3_cols[1:]).sum()

Unnamed: 0_level_0,mean_intensity_sum,mean_intensity_mean,mean_intensity_median,mean_intensity_min,mean_intensity_max,mean_intensity_std,mean_intensity_count,standard_deviation_intensity_sum,standard_deviation_intensity_mean,standard_deviation_intensity_median,...,LD_overlap_mean,LD_overlap_median,LD_overlap_count,LD_overlap_frac,NU_labels_sum,MY_labels_sum,GL_labels_sum,PR_labels_sum,ER_labels_sum,LD_labels_sum
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,900141.049688,5294.947351,4594.630061,2008.227273,19213.243085,2798.18144,170,332166.502887,1953.920605,1556.432242,...,0.0,0.0,170,0.0,"[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, ...","[1, 2, 3, 8, 13, 17, 3, 8, 7, 12, 7, 1, 1, 1, ...","[2, 3, 4, 4, 4, 2, 2, 4, 4, 2, 2, 4, 4, 4, 4]","[15, 25, 34, 61, 62, 4, 5, 43, 44, 53, 70, 46,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",[]


59

AttributeError: 'bool' object has no attribute 'sum'

In [75]:
for col in column_names:
    col[2:]

In [77]:
col[2:]

'tent'