# PUTATIVE WORKFLOW


## WORKFLOW EDITOR PLUGIN
- FINE-TUNE SEGMENTATIONS
  - export workflow.jsons
    - masks:
      - nuclei
      - cellmask
      - cytoplasm
    - organelles:
      - lyso
      - mito
      - golgi
      - perox
      - ER
      - LD


## BATCHPROCESS WORKFLOW
- BATCH PROCESS
  - load workflow.jsons for: 
  1. masks
    - export: masks .tiff as stack (nuclei, cellmask, cytoplasm)
  2. organelles
    - export individual .tiffs



## NOTEBOOK ~~OR ***FUTURE*** PLUGIN~~
- COLLECT ORGANELLE STATS
  - extract masks.tiffs as individual
    - nuclei, cellmask, cytoplasm
  - collect regionprops for all organelles
    - export .csvs


## NOTEBOOK ~~OR __FUTURE__ PLUGIN~~
- SUMMARIZE STUDY DATA
  - munge .csv to create summary stats across all cells/images




_____________

## TO DO
- add "segmentation name" field instead of copying from workflow.json name


- choose alternate conf_XXX.json location. 
  - strategy:  add to "prebuilt" list from path


  
  ## FILE NAME CONVENTIONS

  raw file name is kept.

  PREFIX = "segmentation name" or regionprop name.  e.g. 
  SUFFIX = "description" i.e. 

In [1]:
# top level imports
from pathlib import Path
import os, sys
from typing import Optional, Union, Dict, List

import numpy as np
import pandas as pd

import napari

### import local python functions in ../infer_subc
sys.path.append(os.path.abspath((os.path.join(os.getcwd(), '..'))))

from infer_subc.core.file_io import (read_czi_image,
                                        export_inferred_organelle,
                                        import_inferred_organelle,
                                        export_tiff,
                                        list_image_files)



from infer_subc.constants import *
from infer_subc.utils.stats import *
from infer_subc.utils.stats_helpers import *



import time
%load_ext autoreload
%autoreload 2



In [2]:
# this will be the example image for testing the pipeline below
# build the datapath
# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
int_data_path = data_root_path / "raw"
im_type = ".czi"

# get the list of all files
img_file_list = list_image_files(int_data_path,im_type)

# save output ".tiff" files here
out_data_path = data_root_path / "out"

if not Path.exists(out_data_path):
    Path.mkdir(out_data_path)
    print(f"making {out_data_path}")

In [3]:
im_path = Path(img_file_list[0])
im_path

PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi')

## 1. get each unique cells accouding to filename


### extract ID. e.g.

### process each cell & summarize



## 2. aggregate all cells into a database 

In [5]:
full_name = im_path.name

cell_ids = [ Path(fn).stem.split("-")[0] for fn in img_file_list]
cell_ids = list(set(cell_ids))

masks_postfix = "masks2"
organelle_postfix = ["lyso", "mito","golgi","perox","ER","LD"]


In [6]:
# MASK process
# 1. get a listof all files based on a "prefix" and "suffix" for a given path
# dump three .tiff from teh mask multichannel tiff
# from tifffile import imwrite, imread#, tiffcomment
from infer_subc.core.img import label_uint16
from infer_subc.core.file_io import export_tiff, read_tiff_image
from typing import Union
from pathlib import Path

def _explode_mask(mask_path: Union[Path,str], postfix: str= "masks", im_type: str = ".tiff") -> bool:
    """ 
    TODO: add loggin instead of printing
        append tiffcomments with provenance
    """
    if isinstance(mask_path, str): mask_path = Path(mask_path)
    # load image 
    full_stem = mask_path.stem
    if full_stem.endswith(postfix):
        stem = full_stem.rstrip(postfix)
        image = read_tiff_image(mask_path)
        assert image.shape[0]==3
        
        # make into np.uint16 labels
        nuclei = label_uint16(image[0])
        # export as np.uint8 (255)
        cellmask = image[1]>0            
        cytoplasm = image[2]>0

        # write wasks
        root_stem = mask_path.parent / stem
        # ret1 = imwrite(f"{root}nuclei{stem}", nuclei)
        ret1 = export_tiff(nuclei, f"{stem}nuc", mask_path.parent, None)
        # ret2 = imwrite(f"{root}cellmask{stem}", cellmask)
        ret2 = export_tiff(cellmask, f"{stem}cell", mask_path.parent, None)
        # ret3 = imwrite(f"{root}cytosol{stem}", cytosol)
        ret3 = export_tiff(cytoplasm, f"{stem}cyto", mask_path.parent, None)

        print(f"wrote {stem}-{{nuc,cell,cyto}}")
        return True
    else:
        return False



def _explode_masks(root_path: Union[Path,str], postfix: str= "masks", im_type: str = ".tiff"):
    """  
    TODO: add loggin instead of printing
        append tiffcomments with provenance
    """
    if isinstance(root_path, str): root_path = Path(root_path)
    img_file_list = list_image_files(root_path,im_type, postfix)
    wrote_cnt = 0
    for img_f in img_file_list:
        if _explode_mask(img_f, postfix=postfix, im_type=im_type): wrote_cnt += 1
        else: print(f"failed to explode {img_f}")
    else:
        print(f"how thefark!!! {img_f}")

    print(f"exploded {wrote_cnt*100./len(img_file_list)} pct of {len(img_file_list)} files")
    return wrote_cnt



In [7]:
from infer_subc.utils.batch import explode_masks

cnt = explode_masks(out_data_path, postfix='masks2')
cnt

exploded 100.0 pct of 35 files


35

In [8]:


# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"
# linearly unmixed ".czi" files are here
raw_data_path = data_root_path / "raw"
# save output ".tiff" files here
int_data_path = data_root_path / "out"
# save stats here
out_data_path = data_root_path / "out"


In [9]:
raw_path = raw_data_path
int_path = int_data_path
out_path = out_data_path


if isinstance(raw_path, str): raw_path = Path(raw_path)
if isinstance(int_path, str): int_path = Path(int_path)
if isinstance(out_path, str): out_path = Path(out_path)

img_file_list = list_image_files(raw_path,".czi")

if not Path.exists(out_path):
    Path.mkdir(out_path)
    print(f"making {out_path}")



In [10]:
img_file_list

[PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N15_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N16_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N17_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N18_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N19_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N20_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N21_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N22_Unmixed.czi'),
 PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N23_Unmixed.czi'),
 PosixPath

In [11]:
from typing import Optional, Union, Dict, List

def _find_segmentation_tiff_files(prototype:Union[Path,str], organelles: List[str], int_path: Union[Path,str]) -> Dict:
    """
    find the nescessary image files based on protype, the organelles involved, and paths
    """

    # raw
    prototype = Path(prototype)
    if not prototype.exists():
        print(f"bad prototype. please choose an existing `raw` file as prototype")
        return dict()
    # make sure protoype ends with czi

    out_files = {"raw":prototype}

    int_path = Path(int_path) 
    # raw
    if not int_path.is_dir():
        print(f"bad path argument. please choose an existing path containing organelle segmentations")
        return out_files
    
    # cyto, cellmask
    cyto_nm = int_path / f"{prototype.stem}-cyto.tiff"
    if cyto_nm.exists():
        out_files["cyto"] = cyto_nm
    else:
        print(f"cytosol mask not found.  We'll try to extract from masks ")
        if explode_mask(int_path / f"{prototype.stem}-masks.tiff"): 
            out_files["cyto"] = cyto_nm
        else: 
            print(f"failed to explode {prototype.stem}-masks.tiff")
            return out_files
    
    cellmask_nm = int_path / f"{prototype.stem}-cellmask.tiff"
    if  cellmask_nm.exists():
        out_files["cellmask"] = cellmask_nm
    else:
        print(f"cellmask file not found in {int_path} returning")
        out_files["cellmask"] = None

    # organelles
    for org_n in organelles:
        org_name = Path(int_path) / f"{prototype.stem}-{org_n}.tiff"
        if org_name.exists(): 
            out_files[org_n] = org_name
        else: 
            print(f"{org_n} .tiff file not found in {int_path} returning")
            out_files[org_n] = None
    
    return out_files

    


In [12]:
from infer_subc.utils.batch import find_segmentation_tiff_files
prototype = '/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'
organelles = ["nuc","lyso", "mito","golgi","perox","ER","LD"]

filez = find_segmentation_tiff_files(prototype, organelles, out_data_path)



In [13]:
filez

{'raw': PosixPath('/Users/ergonyc/Projects/Imaging/data/raw/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed.czi'),
 'cyto': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-cyto.tiff'),
 'cell': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-cell.tiff'),
 'nuc': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-nuc.tiff'),
 'lyso': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-lyso.tiff'),
 'mito': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-mito.tiff'),
 'golgi': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-golgi.tiff'),
 'perox': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-perox.tiff'),
 'ER': PosixPath('/Users/ergonyc/Projects/Imaging/data/out/ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed-ER.

In [14]:
from infer_subc.utils.stats_helpers import make_organelle_stat_tables
from infer_subc.constants import *
from infer_subc.organelles import *
from infer_subc.core.file_io import read_tiff_image, read_czi_image

# names of organelles we have
organelle_names = ["nuc","lyso", "mito","golgi","perox","ER","LD"]

# get the intensities
organelle_channels = [NUC_CH, LYSO_CH,MITO_CH,GOLGI_CH,PEROX_CH,ER_CH,LD_CH]



In [15]:
# for a list of "prefixes"  collect stats + cross stats masked by cytosol (including nuclei masked by cellmask)

def dump_allout_df(int_path: Union[Path,str], out_path: Union[Path, str], raw_path: Union[Path,str], organelle_names: List[str]= ["nuclei","golgi","peroxi"], organelle_chs: List[int]= [NUC_CH,GOLGI_CH, PEROX_CH], postfix: str = ".tiff"):
    """  
    TODO: add loggin instead of printing
        append tiffcomments with provenance
    """

    
    if isinstance(raw_path, str): raw_path = Path(raw_path)
    if isinstance(int_path, str): int_path = Path(int_path)
    if isinstance(out_path, str): out_path = Path(out_path)
    
    img_file_list = list_image_files(raw_path,".czi")

    if not Path.exists(out_path):
        Path.mkdir(out_path)
        print(f"making {out_path}")
        
    for img_f in img_file_list:
        filez = find_segmentation_tiff_files(img_f, organelle_names, int_path)
        img_data,meta_dict = read_czi_image(filez["raw"])

        # load organelles and masks
        cyto_mask = read_tiff_image(filez["cyto"])
        cellmask_obj = read_tiff_image(filez["cell"])



        # create intensities from raw as list
        intensities = [img_data[ch] for ch in organelle_chs]

        # load organelles as list
        organelles = [read_tiff_image(filez[org]) for org in organelle_names]
        
        #get mask (cyto_mask)
        nuclei_obj = organelles[ organelle_names.index("nuc") ]

        n_files = make_organelle_stat_tables(organelle_names, 
                                      organelles,
                                      intensities, 
                                      nuclei_obj,
                                      cellmask_obj,
                                      cyto_mask, 
                                      out_data_path, 
                                      img_f,
                                      n_rad_bins=5,
                                      n_zernike=9)

    return n_files



# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"
# linearly unmixed ".czi" files are here
raw_data_path = data_root_path / "raw"
# save output ".tiff" files here
int_data_path = data_root_path / "out"
# save stats here
out_data_path = data_root_path / "out"

dump_all_stats(out_data_path, 
                     out_data_path, 
                     raw_data_path, 
                     organelle_names=organelle_names, 
                     organelle_chs=organelle_channels)



  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  cv_cmsk.append(float(np.mean(radial_cv_cmsk)))  #convert to float to make importing from csv more straightforward
  cv_obj.append(float(np.mean(radial_cv_obj)))
  cv_img.append(float(np.mean(radial_cv_obj)))
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  cv_cmsk.append(float(np.mean(radial_cv_cmsk)))  #convert to float to make importing from csv more straightforward
  cv_obj.append(float(np.mean(radial_cv_obj)))
  cv_img.append(float(np.mean(radial_cv_obj)))
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum(

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_mea

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  magnitude 

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_mea

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_mea

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  magnitude = np.sqrt(vr * vr + vi * vi) / pixels.sum()


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_means_cmsk = np.ma.masked_array(radial_counts_cmsk / pixel_count, mask)
  radial_means_obj = np.ma.masked_array(radial_counts / pixel_count, mask)
  radial_means_img = np.ma.masked_array(radial_values / pixel_count, mask)
  radial_mea

dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


dumped 49x3 organelle stats (['nuc', 'lyso', 'mito', 'golgi', 'perox', 'ER', 'LD']) csvs


49

# summary statistics

We now need to merge our files


-----------------
##  SUMMARY STATS  
> WARNING: (🚨🚨🚨🚨 WIP)
### normalizations.

- overlaps, normalized by CYTOPLASM, A, and B
- per cell averages, medians, std, and totals

These is all pandas munging and very straightforward tabular manipulation.


In [16]:

data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"

# linearly unmixed ".czi" files are here
int_data_path = data_root_path / "out"


In [294]:
# for a list of "prefixes"  collect stats + cross stats masked by cytosol (including nuclei masked by cellmask)

def summarize_by_id(stats_in:pd.DataFrame,agg_fn: List) -> pd.DataFrame:
    """ 
    """
    summary = stats_in.groupby(['ID']).agg(agg_fn)
    summary.columns = ["_".join(col_name).rstrip('_') for col_name in summary.columns.to_flat_index()]
    return summary



def create_stats_summary(summary_df:pd.DataFrame) -> pd.DataFrame:
    """
    """
    column_names = summary_df.columns

    def frac(x):
        return (x>0).sum()/x.count() 

    math_cols = ['ID', 'mean_intensity',
        'standard_deviation_intensity',
        'min_intensity','max_intensity', 'equivalent_diameter',
        'euler_number', 'extent']
    vol_cols = ['ID','volume']
    overlap_cols = ['ID'] + [col for col in column_names if col.endswith('_overlap')]
    labels_cols = ['ID'] + [col for col in column_names if col.endswith('_labels')]
   
    agg_func_math = ['sum', 'mean', 'median', 'min', 'max', 'std','count']
    agg_func_overlap = ['sum', 'mean', 'median','count',frac]
    agg_func_labels = ['sum']
    agg_func_vol = ['sum', 'mean', 'median', 'min', 'max', 'std', 'var']

    math_summary = summarize_by_id( summary_df[math_cols] , agg_func_math)
    
    # label_stats = fix_list_col(summary_df[labels_cols])
    label_summary = summarize_by_id( summary_df[labels_cols] , agg_func_labels)
    overlap_summary = summarize_by_id( summary_df[overlap_cols] ,agg_func_overlap)
    vol_summary = summarize_by_id( summary_df[vol_cols] , agg_func_vol)
    result = pd.concat([math_summary, vol_summary, overlap_summary, label_summary], axis=1)

    result.insert(loc=0,column="ID",value=result.index)

    return result


def summarize_by_group(stats_in:pd.DataFrame, grp_col:list, agg_fn:list) -> pd.DataFrame:
    """ 
    """
    summary = stats_in.reset_index(drop=True).groupby(grp_col).agg(agg_fn)
    summary.columns = ["_".join(col_name).rstrip('_') for col_name in summary.columns.to_flat_index()]
    return summary


def create_cross_stats_summary(summary_df:pd.DataFrame) -> pd.DataFrame:
    """
    """
    # cross_cols = ['ID', 'organelle', 'organelle_b', 'shell', 'label_', 'label', 'volume',
    #    'equivalent_diameter','surface_area', 'label_a', 'label_b']

    group_cols = ['ID','organelle_b', 'shell']
    id_cols = ['label_','label_a', 'label_b'] 
    math_cols = ['volume','equivalent_diameter','surface_area']

    def lst(x):
        return x.to_list()
       
    agg_func_math = ['sum', 'mean', 'median', 'min', 'max', 'std','count']
    agg_func_id = [lst]

    math_summary = summarize_by_group( summary_df[group_cols + math_cols],group_cols, agg_func_math)

    id_summary = summarize_by_group( summary_df[group_cols + id_cols],group_cols, agg_func_id)

    result = pd.concat([math_summary, id_summary], axis=1)
    return result

    # now 

def summarize_cross_stats(summary_df:pd.DataFrame) -> pd.DataFrame:
    """
    """
    # get shell
    shell_summary_i = create_cross_stats_summary(summary_df.loc[summary_df["shell"] == True]).reset_index().drop("shell", axis = 1).add_prefix("shell_")
    # rename shell_ID to ID
    shell_summary_i = shell_summary_i.rename(columns={"shell_ID":"ID","shell_organelle_b":"organelle_b"})

    # get non-shell
    summary_i = create_cross_stats_summary(summary_df.loc[summary_df["shell"] == False]).reset_index().drop("shell", axis = 1)

    summary_i = summary_i.merge(shell_summary_i,on=["ID","organelle_b"])

    return summary_i


def pivot_cross_stats(summary_df:pd.DataFrame) -> pd.DataFrame:
    """
    """
    xstat_df = pd.DataFrame()
    for i,org_b in enumerate(org_bs):
        org_i = summary_df.loc[summary_df["organelle_b"] == org_b]

        # get shell
        shell_summary_i = create_cross_stats_summary(org_i.loc[org_i["shell"] == False]).reset_index().drop("shell", axis = 1).add_prefix("shell_")
        # rename shell_ID to ID
        shell_summary_i = shell_summary_i.rename(columns={"shell_ID":"ID","shell_organelle_b":"organelle_b"})
        # get non-shell
        summary_i = create_cross_stats_summary(org_i.loc[org_i["shell"] == False]).reset_index().drop("shell", axis = 1)
        col_orgb = organelle_to_colname[org_b]

        summary_i = summary_i.merge(shell_summary_i,on=["ID","organelle_b"]).drop("organelle_b", axis=1).add_suffix(f"_{col_orgb}")
        if i>0:
            xstat_df = pd.concat([xstat_df,summary_i], axis=1)
        else:
            xstat_df = summary_i
            
    id_cols = [col for col in xstat_df.columns if "ID" in col]
    IDcol = xstat_df[id_cols[0]]
    xstat_df = xstat_df.drop(id_cols, axis=1)
    xstat_df.insert(loc=0,column="ID",value=IDcol)

    return xstat_df


def fix_int_list_cols(in_df:pd.DataFrame, list_cols) -> pd.DataFrame:
    """ 
    """

    def _str_to_list(x):
        if x == '[]':
            return list()
        elif isinstance(x,float): # catch nans
            return x
        else:
            xstr = x.strip("[]").replace("'", "").split(", ")
        return [int(float(x)) for x in xstr]
        
    out_df = pd.DataFrame() 
    for col in in_df.columns:    
        out_df[col] = in_df[col].apply(_str_to_list) if col in list_cols else in_df[col]
    return out_df

def fix_float_list_cols(in_df:pd.DataFrame, list_cols) -> pd.DataFrame:
    """ 
    """
    def _str_to_list(x):
        if x == '[]':
            return list()
        elif isinstance(x,float): # catch nans
            return x
        else:
            xstr = x.strip("[]").replace("'", "").split(", ")
        return [float(x) for x in xstr]
        
    out_df = pd.DataFrame() 
    for col in in_df.columns:    
        out_df[col] = in_df[col].apply(_str_to_list) if col in list_cols else in_df[col]
    return out_df

def fix_str_list_cols(in_df:pd.DataFrame, list_cols) -> pd.DataFrame:
    """ 
    """
    def _str_to_list(x):
        if x == '[]':
            return list()
        elif isinstance(x,float): # catch nans
            return x
        else:
            xstr = x.strip("[]").replace("'", "").split(", ")
        return [x for x in xstr]
        
    out_df = pd.DataFrame() 
    for col in in_df.columns:    
        out_df[col] = in_df[col].apply(_str_to_list) if col in list_cols else in_df[col]
    return out_df


def load_stats_csv(in_path: Path, img_id: str, target_org: str) -> pd.DataFrame:
    """ helper to load the basic stats csv: `img_id`-`target_organelle` -stats.csv
    returns pandas DataFrame """
    csv_path = in_path / f"{img_id}-{target_org}-stats.csv"
    stats = pd.read_csv(csv_path, index_col=0,dtype={"ID":str,"organelle":str})
    # need to convert columns *_labels
    list_cols = [col for col in stats.columns if col.endswith('_labels')]
    stats = fix_int_list_cols(stats,list_cols)
    return stats
        

def load_proj_stats_csv(in_path: Path, img_id: str, target_org: str) -> pd.DataFrame:
    """ helper to load  the projection stats csv: `img_id`-`target_organelle` -proj-stats.csv
    returns pandas DataFrame """
    # obj_cols =  ['ID', 'organelle','radial_n_bins','n_z']  # leave alone
    # str_cols = [ 'radial_bins']
    int_cols = ['radial_cm_vox_cnt', 'radial_org_vox_cnt', 'radial_org_intensity', 'radial_n_pix','zernike_n', 'zernike_m', 'z','z_cm_vox_cnt','z_org_vox_cnt', 'z_org_intensity', 'z_nuc_vox_cnt']
    float_cols = ['radial_cm_cv', 'radial_org_cv', 'radial_img_cv','zernike_cm_mag', 'zernike_cm_phs','zernike_obj_mag', 'zernike_obj_phs', 'zernike_nuc_mag','zernike_nuc_phs', 'zernike_img_mag']

    csv_path = in_path / f"{img_id}-{target_org}-proj-stats.csv"
    proj = pd.read_csv(csv_path, index_col=0)
    proj['radial_bins'] = proj['radial_bins'].values.squeeze().tolist()
    # proj = fix_str_list_cols(proj, str_cols)
    proj = fix_int_list_cols(proj, int_cols)
    proj = fix_float_list_cols(proj, float_cols)
    return proj
        

def load_cross_stats_csv(in_path: Path, img_id: str, target_org: str) -> pd.DataFrame:
    """ helper to load  the cross- stats csv: `img_id`-`target_organelle` -cross-stats.csv
    returns pandas DataFrame """
    csv_path = in_path / f"{img_id}-{target_org}-cross-stats.csv"
    cross = pd.read_csv(csv_path, index_col=0)
    return cross


def summarize_organelle_stats(int_path: Union[Path,str], 
                              out_path: Union[Path, str], 
                              organelle_names: List[str]= ["nuclei","golgi","peroxi"]):
    """  
    """
    # write out files... 

    if isinstance(int_path, str): int_path = Path(int_path)
    if isinstance(out_path, str): out_path = Path(out_path)

    if not Path.exists(out_path):
        Path.mkdir(out_path)
        print(f"making {out_path}")

    all_stats_df = pd.DataFrame()
    all_cross_stats_df = pd.DataFrame()
    all_proj_stats_df = pd.DataFrame()
    
    for target in organelle_names:
        stat_file_list = sorted( int_path.glob(f"*{target}-stats.csv") )

        stats_df = pd.DataFrame()
        cross_stats_df = pd.DataFrame()
        proj_stats_df = pd.DataFrame()

        for stats_f in stat_file_list:
            stem = stats_f.stem.split("-")[0]
            # stats load the csv
            stats = load_stats_csv(out_path,stem, target)
            # projection stats
            proj = load_proj_stats_csv(out_path,stem, target)
            # cross stats
            cross = load_cross_stats_csv(out_path,stem, target)

            stats_df = pd.concat([stats_df,stats],axis=0, join='outer')
            proj_stats_df = pd.concat([proj_stats_df,proj],axis=0, join='outer')
            cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
        

        ## maybe merge into all the possible files?
        # summary_df = pd.DataFrame(index=[f.stem.split("-")[0] for f in stat_file_list])
        # cross_stats_df = pd.DataFrame(index=[f.stem.split("-")[0] for f in stat_file_list])
        # proj_stats_df = pd.DataFrame(index=[f.stem.split("-")[0] for f in stat_file_list])

        summary_df = create_stats_summary(stats_df)
        summary_df.insert(loc=1,column="organelle",value=target)
        cross_summary_df = summarize_cross_stats(cross_stats_df)
        ## cross_summary_df = pivot_cross_stats(cross_stats_df)  #makes a wide version... but has a bug
        cross_summary_df.insert(loc=1,column="organelle",value=target)

        all_stats_df = pd.concat([all_stats_df,summary_df],axis=0)
        all_proj_stats_df = pd.concat([all_proj_stats_df,proj_stats_df],axis=0)
        all_cross_stats_df = pd.concat([all_cross_stats_df,cross_summary_df],axis=0)
    

    return all_stats_df, all_proj_stats_df, all_cross_stats_df
        



In [305]:

# all the imaging data goes here.
data_root_path = Path(os.path.expanduser("~")) / "Projects/Imaging/data"
# linearly unmixed ".czi" files are here
raw_data_path = data_root_path / "raw"
# save output ".tiff" files here
int_data_path = data_root_path / "out"
# save stats here
out_data_path = data_root_path / "out"





all_stats_df, all_proj_stats_df, all_cross_stats_df = summarize_organelle_stats(out_data_path, 
                                                    out_data_path, 
                                                    organelle_names=organelle_names);

nuc


  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

lyso


  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

mito


  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

golgi


  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

perox


  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

ER


  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

LD


  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_stats_df,cross],axis=0, join='outer')
  cross_stats_df = pd.concat([cross_st

In [215]:
all_stats_df.head()

Unnamed: 0_level_0,ID,organelle,mean_intensity_sum,mean_intensity_mean,mean_intensity_median,mean_intensity_min,mean_intensity_max,mean_intensity_std,mean_intensity_count,standard_deviation_intensity_sum,...,GL_labels_sum,PR_labels_sum,ER_labels_sum,LD_labels_sum,NU_overlap_sum,NU_overlap_mean,NU_overlap_median,NU_overlap_count,NU_overlap_frac,NU_labels_sum
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,ZSTACK_PBTOhNGN2hiPSCs_BR1_N14_Unmixed,nuc,7529.208496,7529.208496,7529.208496,7529.208496,7529.208496,,1,3910.083483,...,[3],"[3, 6, 7, 24, 26, 32, 33, 42, 48, 49, 59, 60, ...",[1],[],,,,,,
ZSTACK_PBTOhNGN2hiPSCs_BR1_N15_Unmixed,ZSTACK_PBTOhNGN2hiPSCs_BR1_N15_Unmixed,nuc,7877.621808,7877.621808,7877.621808,7877.621808,7877.621808,,1,3427.678155,...,[3],[78],[1],[],,,,,,
ZSTACK_PBTOhNGN2hiPSCs_BR1_N16_Unmixed,ZSTACK_PBTOhNGN2hiPSCs_BR1_N16_Unmixed,nuc,8770.506566,8770.506566,8770.506566,8770.506566,8770.506566,,1,5831.848097,...,[],[],[1],[],,,,,,
ZSTACK_PBTOhNGN2hiPSCs_BR1_N17_Unmixed,ZSTACK_PBTOhNGN2hiPSCs_BR1_N17_Unmixed,nuc,7394.871101,7394.871101,7394.871101,7394.871101,7394.871101,,1,3292.971232,...,"[1, 2]","[25, 79, 93, 100, 113, 114]",[1],[],,,,,,
ZSTACK_PBTOhNGN2hiPSCs_BR1_N18_Unmixed,ZSTACK_PBTOhNGN2hiPSCs_BR1_N18_Unmixed,nuc,7486.117821,7486.117821,7486.117821,7486.117821,7486.117821,,1,3499.060182,...,"[1, 6]","[3, 4, 9, 14, 16, 17, 23, 25, 32, 45, 48, 49, ...",[1],[],,,,,,


In [216]:
list_cols = [col for col in all_cross_stats_df.columns if "label" in col] #if col.contains("label")

In [217]:
all_cross_stats_df[list_cols[2]].loc[0].values[0][0]

1.0

In [301]:

csv_path = out_data_path / f"summary-stats.csv"
all_stats_df.to_csv(csv_path)

csv_path = out_data_path / f"summary-proj-stats.csv"
all_proj_stats_df.to_csv(csv_path)

csv_path = out_data_path / f"summary-cross-stats.csv"
all_cross_stats_df.to_csv(csv_path)


In [304]:
# x = test[list_cols[2]][0]

# xstr = x.strip("[]").replace("'", "").split(", ")
# float(xstr[0])
# [int(float(x)) for x in xstr]

# xstr

test.radial_bins.iloc[0][:4]


"['Ct"

Make some wrappers to deal with reading our summary stats into pandas properly.


In [297]:


def load_summary_stats_csv(in_path: Path) -> pd.DataFrame:
    """ helper to load the summary stats csv: summary-stats.csv
    returns pandas DataFrame """
    csv_path = in_path / f"summary-stats.csv"
    summary_df = pd.read_csv(csv_path, index_col=0)
    # need to convert columns *_labels
    list_cols = [col for col in summary_df.columns if "labels" in col] #if col.contains("label")
    summary_df = fix_int_list_cols(summary_df,list_cols)
    return summary_df


def load_summary_proj_stats_csv(in_path: Path) -> pd.DataFrame:
    """ helper to load summary projection stats csv: summary-proj-stats.csv
    returns pandas DataFrame """
    obj_cols =  ['ID', 'organelle','mask','radial_n_bins','n_z']  # leave alone
    str_cols = [ 'radial_bins']
    int_cols = ['radial_cm_vox_cnt', 'radial_org_vox_cnt', 'radial_org_intensity', 'radial_n_pix','zernike_n', 'zernike_m', 'z','z_cm_vox_cnt','z_org_vox_cnt', 'z_org_intensity', 'z_nuc_vox_cnt']
    float_cols = ['radial_cm_cv', 'radial_org_cv', 'radial_img_cv','zernike_cm_mag', 'zernike_cm_phs','zernike_obj_mag', 'zernike_obj_phs', 'zernike_nuc_mag','zernike_nuc_phs', 'zernike_img_mag']

    csv_path = in_path / f"summary-proj-stats.csv"
    proj = pd.read_csv(csv_path, index_col=0)
    proj = fix_str_list_cols(proj, str_cols)
    proj = fix_int_list_cols(proj, int_cols)
    proj = fix_float_list_cols(proj, float_cols)
    return proj
        

def load_summary_cross_stats_csv(in_path: Path) -> pd.DataFrame:
    """ helper to load summary cross- stats csv: summary-cross-stats.csv
    returns pandas DataFrame """

    csv_path = in_path / f"summary-cross-stats.csv"
    summary_df = pd.read_csv(csv_path, index_col=0)

    list_cols = [col for col in summary_df.columns if "label" in col] #if col.contains("label")
    str_list_cols = [col for col in list_cols if "__" in col]
    int_list_cols = [col for col in list_cols if "__" not in col]

    summary_df = fix_str_list_cols(summary_df,str_list_cols)
    summary_df = fix_int_list_cols(summary_df,int_list_cols)

    return summary_df
    


In [298]:

#summary_shell.head()
test = load_summary_stats_csv(out_data_path)
test_proj = load_summary_proj_stats_csv(out_data_path)
test_cross = load_summary_cross_stats_csv(out_data_path)

In [208]:
def fix_int_list_cols(in_df:pd.DataFrame, list_cols) -> pd.DataFrame:
    """ 
    """
    def _str_to_list(x):
        if x == '[]':
            return list()
        else:
            xstr = x.strip("[]").replace("'", "").split(", ")
        return [int(float(x)) for x in xstr]
        
    out_df = pd.DataFrame() 
    for col in in_df.columns:    
        out_df[col] = in_df[col].apply(_str_to_list) if col in list_cols else in_df[col]
    return out_df


def load_stats_csv(in_path: Path, img_id: str, target_org: str) -> pd.DataFrame:
    """ helper to load the basic stats csv: `img_id`-`target_organelle` -stats.csv
    returns pandas DataFrame """
    csv_path = in_path / f"{img_id}-{target_org}-stats.csv"
    stats = pd.read_csv(csv_path, index_col=0,dtype={"ID":str,"organelle":str})
    # need to convert columns *_labels
    list_cols = [col for col in stats.columns if col.endswith('_labels')]
    stats = fix_int_list_cols(stats,list_cols)
    return stats
        

def load_proj_stats_csv(in_path: Path, img_id: str, target_org: str) -> pd.DataFrame:
    """ helper to load  the projection stats csv: `img_id`-`target_organelle` -proj-stats.csv
    returns pandas DataFrame """
    # obj_cols =  ['ID', 'organelle','radial_n_bins','n_z']  # leave alone
    # str_cols = [ 'radial_bins']
    int_cols = ['radial_cm_vox_cnt', 'radial_org_vox_cnt', 'radial_org_intensity', 'radial_n_pix','zernike_n', 'zernike_m', 'z','z_cm_vox_cnt','z_org_vox_cnt', 'z_org_intensity', 'z_nuc_vox_cnt']
    float_cols = ['radial_cm_cv', 'radial_org_cv', 'radial_img_cv','zernike_cm_mag', 'zernike_cm_phs','zernike_obj_mag', 'zernike_obj_phs', 'zernike_nuc_mag','zernike_nuc_phs', 'zernike_img_mag']

    csv_path = in_path / f"{img_id}-{target_org}-proj-stats.csv"
    proj = pd.read_csv(csv_path, index_col=0)
    proj['radial_bins'] = proj['radial_bins'].values.squeeze().tolist()
    
    proj = fix_str_list_cols(proj, str_cols)
    proj = fix_int_list_cols(proj, int_cols)
    proj = fix_float_list_cols(proj, float_cols)
    return proj
        

def load_cross_stats_csv(in_path: Path, img_id: str, target_org: str) -> pd.DataFrame:
    """ helper to load  the cross- stats csv: `img_id`-`target_organelle` -cross-stats.csv
    returns pandas DataFrame """
    csv_path = in_path / f"{img_id}-{target_org}-cross-stats.csv"
    cross = pd.read_csv(csv_path, index_col=0)
    return cross
    
    

In [187]:
out_data_path

PosixPath('/Users/ergonyc/Projects/Imaging/data/out')

In [188]:
stats = load_stats_csv(out_data_path,img_id, target_org)
proj = load_proj_stats_csv(out_data_path,img_id, target_org)
cross = load_cross_stats_csv(out_data_path,img_id, target_org)

In [191]:
obj_cols =  ['ID', 'organelle','mask','radial_n_bins','n_z']  # leave alone
str_cols = [ 'radial_bins']
int_cols = ['radial_cm_vox_cnt', 'radial_org_vox_cnt', 'radial_org_intensity', 'radial_n_pix','zernike_n', 'zernike_m', 'z','z_cm_vox_cnt','z_org_vox_cnt', 'z_org_intensity', 'z_nuc_vox_cnt']
float_cols = ['radial_cm_cv', 'radial_org_cv', 'radial_img_cv','zernike_cm_mag', 'zernike_cm_phs','zernike_obj_mag', 'zernike_obj_phs', 'zernike_nuc_mag','zernike_nuc_phs', 'zernike_img_mag']

csv_path = in_path / f"{img_id}-{target_org}-proj-stats.csv"
proj = pd.read_csv(csv_path, index_col=0)


In [193]:
all_cols = proj.columns
proj['radial_bins'] = proj['radial_bins'].values.squeeze().tolist()


In [203]:
x=proj[int_cols[1]][0]
xstr = x.strip("[]").replace("'", "").split(", ")
[int(float(x)) for x in xstr]

[7125, 8863, 1838, 149, 0]

In [205]:
def _str_to_list(x):
    if x == '[]':
        return list()
    else:
        xstr = x.strip("[]").replace("'", "").split(", ")
    return [int(float(x)) for x in xstr]
    
in_df = proj
out_df = pd.DataFrame() 
for col in in_df.columns:    
    out_df[col] = in_df[col].apply(_str_to_list) if col in list_cols else in_df[col]

# proj = fix_int_list_cols(proj, int_cols)


In [209]:

proj = fix_int_list_cols(proj, int_cols)

In [211]:

proj = fix_float_list_cols(proj, float_cols)