In [1]:
# top level imports
from pathlib import Path
import os, sys
from typing import Optional, Union, Dict, List
import itertools 
import glob

import warnings
import time

import numpy as np
import pandas as pd

import napari

### import local python functions in ../infer_subc
sys.path.append(os.path.abspath((os.path.join(os.getcwd(), '..'))))

from infer_subc.core.file_io import (read_czi_image,
                                        export_inferred_organelle,
                                        import_inferred_organelle,
                                        export_tiff,
                                        list_image_files,
                                        read_tiff_image)



from infer_subc.constants import *
from infer_subc.utils.stats import *
from infer_subc.utils.stats_helpers import *
from infer_subc.utils.stats import _assert_uint16_labels
from infer_subc.core.img import label_uint16


import time
%load_ext autoreload
%autoreload 2

__________________________
## Collecting Summary Stats across multiple experiments - Part 1.6

---------
## **Batch Summary Stats**

### summary of steps

🛠️ BUILD FUNCTION PROTOTYPE

- **`0`** - Establish csv paths *(preliminary step)*

- **`1`** - Read in and categorize csv files

    - read in and categorize csv files for all listed paths
    - Combine comprehensive metrics tables to be summarized and restructured

- **`2`** - Restructure comprehensive organelle two-way interaction metrics table

    - breakdown the interaction table column names
    - group observations by the **first** organelle involved in interactions
    - unstack the grouped table to create a column for every unique organelle interaction type
    - correct column names for unstacked tables to accurately describe each the **first** organelles involved in interaction sites
    - repeat last three substeps for the **second** organelle involved in the interactions
    - combine and merge the data from **both** unstacked tables to include interaction metrics from all organelle objects

- **`3`** - Apply aggregate statistics for summarization

    - determine aggregate statistics to be applied per organelle object
    - summarize metrics between the organelle morphology and interaction tables
    - summarize metrics in the region morphology table
    - summarize additional metrics in the organelle morphology table

- **`4`** - Restructure distribution metrics tables

    - for XY-distribution collect summary statistics for voxel bins and wedges
    - for Z-distribution collect summary statistics for voxel bins and wedges
    - calculate the coefficient of variation for the **mean**, **median**, **standard deviation** for the XY-distribution bin values
    - repeat the first two substeps for the nucleus distribution metrics
    - combine nucleus and organelle distribution tables


- **`5`** - Add normalized metrics

    - calculate fraction of cell area taken up by the organelles
    - calculate fraction of organelle objects involved in specific interorganelle contacts

- **`6`** - Unstack and finalize summary stats tables

    - unstack and reorder organelle morphology summary table columns
    - fill "NaN" values with 0 when necessary to final organelle morphology summary table
    - unstack and reorder organelle interactions summary table columns
    - fill "NaN" values with 0 when necessary to final organelle interactions summary table
    - unstack and reorder distribution measurements summary table columns to create finalized table
    - unstack and reorder region morphology summary table columns
    - add normalization to finalize region morphology summary table
    - combine all four tables to create a complete summary table

- **`7`** - Export summary stats tables as .csv files

⚙️ EXECUTE FUNCTION PROTOTYPE

- Define prototype `_batch_summary_stats` function

- Run prototype `_batch_summary_stats` function

# ***BUILD FUNCTION PROTOTYPE***

## **`0` - Establish csv paths *(preliminary step)***

In [2]:
# List of file paths to be included in the summary
csv_path_list=["F:/Experiments (C2-80 - C2-108)/C2-107/20230919_C2-107_3D-analysis/20240217_C2-107_quantification_10per",
                "F:/Experiments (C2-80 - C2-108)/C2-108/20230920_C2-108_3D-analysis/20240216_C2-108_qantification_10per",
                "D:/Experiments (C2-117 - current)/C2-123/20230922_C2-123_3D-analysis/20240118_C2-123_quantification",
                "D:/Experiments (C2-117 - current)/C2-121/20230921_C2-121_3D-analysis/20240221_C2-121_quantification_10per",
                "D:/Experiments (C2-117 - current)/C2-117/20230922_C2-117_3D-analysis/20240118_C2-117_quantification"]

## **`1` - Read in and categorize csv files**

- read in and categorize csv files for all listed paths

In [None]:
org_tabs = []
contact_tabs = []
dist_tabs = []
region_tabs = []

for loc in csv_path_list:
    ds_count = ds_count + 1
    loc=Path(loc)
    files_store = sorted(loc.glob("*.csv"))
    for file in files_store:
        fl_count = fl_count + 1
        stem = file.stem

        org = "organelles"
        contacts = "contacts"
        dist = "distributions"
        regions = "_regions"

        if org in stem:
            test_orgs = pd.read_csv(file, index_col=0)
            test_orgs.insert(0, "dataset", stem[:-11])
            org_tabs.append(test_orgs)
        if contacts in stem:
            test_contact = pd.read_csv(file, index_col=0)
            test_contact.insert(0, "dataset", stem[:-9])
            contact_tabs.append(test_contact)
        if dist in stem:
            test_dist = pd.read_csv(file, index_col=0)
            test_dist.insert(0, "dataset", stem[:-14])
            dist_tabs.append(test_dist)
        if regions in stem:
            test_regions = pd.read_csv(file, index_col=0)
            test_regions.insert(0, "dataset", stem[:-8])
            region_tabs.append(test_regions)


- Combine comprehensive metrics tables to be summarized and restructured

In [None]:
org_df = pd.concat(org_tabs,axis=0, join='outer')
contacts_df = pd.concat(contact_tabs,axis=0, join='outer')
dist_df = pd.concat(dist_tabs,axis=0, join='outer')
regions_df = pd.concat(region_tabs,axis=0, join='outer')

In [3]:
org_df.head()

Unnamed: 0,dataset,image_name,object,label,scale,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,...,SA_to_volume_ratio,equivalent_diameter,extent,euler_number,solidity,axis_major_length,min_intensity,max_intensity,mean_intensity,standard_deviation_intensity
0,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,LD,34,"(0.4106, 0.0799, 0.0799)",5.748316,79.220209,66.087097,14,990,...,30.710824,0.380601,0.916667,1,inf,0.388472,3209.564941,4803.942871,3969.853027,513.974487
1,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,LD,41,"(0.4106, 0.0799, 0.0799)",6.15891,80.546603,74.23079,15,1007,...,44.560389,0.27166,1.0,1,inf,0.178767,3409.148438,3808.100098,3615.759521,167.172455
2,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,LD,46,"(0.4106, 0.0799, 0.0799)",6.569504,65.396646,64.373324,16,817,...,32.012333,0.3687,0.833333,1,inf,0.35031,3437.308105,5740.641113,4511.916992,669.799744
3,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,ER,1,"(0.4106, 0.0799, 0.0799)",7.077805,75.250102,70.922115,8,0,...,4.717567,14.705835,0.007423,-56,0.019041,83.485697,0.882749,9488.813477,1570.648071,1015.394409
4,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,golgi,1,"(0.4106, 0.0799, 0.0799)",6.518533,71.400411,65.367354,12,855,...,7.743745,2.856022,0.101596,-8,0.327393,8.271302,1037.344482,7995.974609,2276.678223,1043.566162


In [4]:
contacts_df.head()

Unnamed: 0,dataset,image_name,object,label,scale,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,...,bbox-4,bbox-5,surface_area,volume,SA_to_volume_ratio,equivalent_diameter,extent,euler_number,solidity,axis_major_length
0,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,LDXER,34_1,"(0.4106, 0.0799, 0.0799)",5.748316,79.174179,66.049547,14,990,...,992,829,0.701645,0.015746,44.560389,0.310973,0.75,1,inf,0.39001
1,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,LDXER,41_1,"(0.4106, 0.0799, 0.0799)",6.15891,80.546603,74.23079,15,1007,...,1009,930,0.467763,0.010497,44.560389,0.27166,1.0,1,inf,0.178767
2,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,LDXER,46_1,"(0.4106, 0.0799, 0.0799)",6.569504,65.396646,64.373324,16,817,...,820,808,0.840106,0.026243,32.012333,0.3687,0.833333,1,inf,0.35031
3,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,LDXlyso,34_4,"(0.4106, 0.0799, 0.0799)",5.748316,79.220209,66.087097,14,990,...,993,829,0.886546,0.028868,30.710824,0.380601,0.916667,1,inf,0.388472
4,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,ERXgolgi,1_1,"(0.4106, 0.0799, 0.0799)",5.79484,69.793156,67.015573,12,855,...,896,852,31.132222,1.459122,21.336265,1.407228,0.08371,-3,0.18873,4.252173


In [5]:
dist_df.head()

Unnamed: 0,dataset,image_name,object,scale,XY_n_bins,XY_bins,XY_mask_vox_cnt_perbin,XY_obj_vox_cnt_perbin,XY_center_vox_cnt_perbin,XY_n_pix_perbin,...,XY_area_wedges_perbin,Z_n_slices,Z_slices,Z_mask_vox_cnt,Z_obj_vox_cnt,Z_center_vox_cnt,Z_height,Z_mask_volume,Z_obj_volume,Z_center_volume
0,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,LD,"(0.4106, 0.0799, 0.0799)",5,"[1, 2, 3, 4, 5]","[427967, 140309, 156031, 169187, 919030]","[0, 0, 4, 21, 0]","[146905, 0, 0, 0, 0]","[16331.0, 6640.0, 9089.0, 12684.0, 171328.0]",...,"[[14.508756776430001, 12.380379681033002, 11.1...",39,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 1254, 28021, 58752, 7916...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",16.013166,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.29089842...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,ER,"(0.4106, 0.0799, 0.0799)",5,"[1, 2, 3, 4, 5]","[427967, 140309, 156031, 169187, 919030]","[111076, 75286, 90475, 94836, 262855]","[146905, 0, 0, 0, 0]","[16331.0, 6640.0, 9089.0, 12684.0, 171328.0]",...,"[[14.508756776430001, 12.380379681033002, 11.1...",39,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 1254, 28021, 58752, 7916...","[0, 0, 0, 0, 0, 0, 0, 0, 128, 2355, 15555, 385...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",16.013166,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.29089842...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.335...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,golgi,"(0.4106, 0.0799, 0.0799)",5,"[1, 2, 3, 4, 5]","[427967, 140309, 156031, 169187, 919030]","[3103, 8700, 4532, 987, 0]","[146905, 0, 0, 0, 0]","[16331.0, 6640.0, 9089.0, 12684.0, 171328.0]",...,"[[14.508756776430001, 12.380379681033002, 11.1...",39,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 1254, 28021, 58752, 7916...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 400, 1437...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",16.013166,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.29089842...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,lyso,"(0.4106, 0.0799, 0.0799)",5,"[1, 2, 3, 4, 5]","[427967, 140309, 156031, 169187, 919030]","[16963, 7188, 7259, 8103, 9731]","[146905, 0, 0, 0, 0]","[16331.0, 6640.0, 9089.0, 12684.0, 171328.0]",...,"[[14.508756776430001, 12.380379681033002, 11.1...",39,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 1254, 28021, 58752, 7916...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 1517, 3608, 4...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",16.013166,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.29089842...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,mito,"(0.4106, 0.0799, 0.0799)",5,"[1, 2, 3, 4, 5]","[427967, 140309, 156031, 169187, 919030]","[19049, 11861, 15057, 16599, 58895]","[146905, 0, 0, 0, 0]","[16331.0, 6640.0, 9089.0, 12684.0, 171328.0]",...,"[[14.508756776430001, 12.380379681033002, 11.1...",39,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 0, 0, 0, 0, 0, 1254, 28021, 58752, 7916...","[0, 0, 0, 0, 0, 0, 0, 0, 6, 853, 4861, 9597, 1...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",16.013166,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.29089842...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.015...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
310,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 8_cell 5_un...,golgiXmito,"(0.4106, 0.0799, 0.0799)",5,"[1, 2, 3, 4, 5]","[1029383, 0, 0, 0, 0]","[542, 0, 0, 0, 0]","[914846, 0, 0, 0, 0]","[110214.0, 0.0, 0.0, 0.0, 0.0]",...,"[[71.96854682934001, 82.66795601160601, 68.919...",42,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 755, 3958, 11477, 21049, 27787, 32284, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 84, 97, 8...","[0, 0, 755, 3958, 11477, 21049, 27787, 32284, ...",17.244948,"[0.0, 0.0, 1.9813622917601024, 10.387062186472...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 1.9813622917601024, 10.387062186472..."
311,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 8_cell 5_un...,golgiXperox,"(0.4106, 0.0799, 0.0799)",5,"[1, 2, 3, 4, 5]","[1029383, 0, 0, 0, 0]","[1, 0, 0, 0, 0]","[914846, 0, 0, 0, 0]","[110214.0, 0.0, 0.0, 0.0, 0.0]",...,"[[71.96854682934001, 82.66795601160601, 68.919...",42,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 755, 3958, 11477, 21049, 27787, 32284, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...","[0, 0, 755, 3958, 11477, 21049, 27787, 32284, ...",17.244948,"[0.0, 0.0, 1.9813622917601024, 10.387062186472...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 1.9813622917601024, 10.387062186472..."
312,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 8_cell 5_un...,lysoXmito,"(0.4106, 0.0799, 0.0799)",5,"[1, 2, 3, 4, 5]","[1029383, 0, 0, 0, 0]","[12325, 0, 0, 0, 0]","[914846, 0, 0, 0, 0]","[110214.0, 0.0, 0.0, 0.0, 0.0]",...,"[[71.96854682934001, 82.66795601160601, 68.919...",42,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 755, 3958, 11477, 21049, 27787, 32284, ...","[0, 0, 0, 0, 0, 5, 75, 114, 118, 191, 468, 109...","[0, 0, 755, 3958, 11477, 21049, 27787, 32284, ...",17.244948,"[0.0, 0.0, 1.9813622917601024, 10.387062186472...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.013121604581192731...","[0.0, 0.0, 1.9813622917601024, 10.387062186472..."
313,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 8_cell 5_un...,lysoXperox,"(0.4106, 0.0799, 0.0799)",5,"[1, 2, 3, 4, 5]","[1029383, 0, 0, 0, 0]","[1338, 0, 0, 0, 0]","[914846, 0, 0, 0, 0]","[110214.0, 0.0, 0.0, 0.0, 0.0]",...,"[[71.96854682934001, 82.66795601160601, 68.919...",42,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[0, 0, 755, 3958, 11477, 21049, 27787, 32284, ...","[0, 0, 0, 0, 0, 0, 0, 19, 11, 0, 52, 179, 139,...","[0, 0, 755, 3958, 11477, 21049, 27787, 32284, ...",17.244948,"[0.0, 0.0, 1.9813622917601024, 10.387062186472...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04986209...","[0.0, 0.0, 1.9813622917601024, 10.387062186472..."


In [6]:
regions_df.head()

Unnamed: 0,dataset,image_name,object,label,scale,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,...,mean_intensity-golgi_ch,mean_intensity-lyso_ch,mean_intensity-mito_ch,mean_intensity-perox_ch,standard_deviation_intensity-LD_ch,standard_deviation_intensity-ER_ch,standard_deviation_intensity-golgi_ch,standard_deviation_intensity-lyso_ch,standard_deviation_intensity-mito_ch,standard_deviation_intensity-perox_ch
0,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,nuc,1,"(0.4106, 0.0799, 0.0799)",9.656524,73.476817,72.015354,15,848,...,0.00398,0.033965,10.3276,16.790661,0.011877,0.4352,0.003469,0.192766,10.107121,15.170704
1,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 1_2...,cell,1,"(0.4106, 0.0799, 0.0799)",7.245385,74.949189,71.010845,7,0,...,32.623714,57.594608,133.466919,376.963348,72.220337,934.518188,244.761063,292.651947,293.67984,786.370483
2,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 2_2...,nuc,1,"(0.4106, 0.0799, 0.0799)",10.791034,67.889576,85.661055,14,782,...,0.110757,0.092745,51.535671,113.979195,0.161533,9.4659,0.246002,0.151073,36.946209,64.928368
3,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 2_2...,cell,1,"(0.4106, 0.0799, 0.0799)",6.552921,66.637646,74.770419,1,0,...,234.530655,39.489193,450.387329,1322.078247,138.427841,5498.334961,1012.422791,167.863388,830.095337,2822.268066
4,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 10_cell 3_2...,nuc,1,"(0.4106, 0.0799, 0.0799)",10.186749,60.882566,68.357664,15,697,...,0.027503,0.056172,47.277412,195.726944,0.064345,3.224035,0.063066,0.091853,32.378677,78.06469


## **`2` - Restructure comprehensive two-way organelle interaction metrics table**

> ###### **📝 Please note that in the following steps, a specific procedure will be repeated to ensure that all unique organelle objects are described by their interaction metrics, regardless of whether they are the first organelle (A) or the second organelle (B) involved in the two-way contact.**

- breakdown the interaction table column names

In [7]:
contact_cnt = contacts_df[["dataset", "image_name", "object", "label", "volume"]]
contact_cnt[["orgA", "orgB"]] = contact_cnt["object"].str.split('X', expand=True)
contact_cnt[["A_ID", "B_ID"]] = contact_cnt["label"].str.split('_', expand=True)
contact_cnt["A"] = contact_cnt["orgA"] +"_" + contact_cnt["A_ID"].astype(str)
contact_cnt["B"] = contact_cnt["orgB"] +"_" + contact_cnt["B_ID"].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  contact_cnt[["orgA", "orgB"]] = contact_cnt["object"].str.split('X', expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  contact_cnt[["orgA", "orgB"]] = contact_cnt["object"].str.split('X', expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  contact_cnt[["A_ID", "B_ID"]] = contact_c

- group observations by the **first** organelle involved in interactions

In [None]:
contact_cnt_percell = contact_cnt[["dataset", "image_name", "orgA", "A_ID", "object", "volume"]].groupby(["dataset", "image_name", "orgA", "A_ID", "object"]).agg(["count", "sum"])
contact_cnt_percell.columns = ["_".join(col_name).rstrip('_') for col_name in contact_cnt_percell.columns.to_flat_index()]

- unstack the grouped table to create a column for every unique organelle interaction type

In [None]:
unstacked = contact_cnt_percell.unstack(level='object')
unstacked.columns = ["_".join(col_name).rstrip('_') for col_name in unstacked.columns.to_flat_index()]
unstacked = unstacked.reset_index()

- correct column names for unstacked tables to accurately describe each the **first** organelles involved in interaction sites

In [None]:
# Fixes the count and volume metrics
for col in unstacked.columns:
    if col.startswith("volume_count_"):
        newname = col.split("_")[-1] + "_count"
        unstacked.rename(columns={col:newname}, inplace=True)
    if col.startswith("volume_sum_"):
        newname = col.split("_")[-1] + "_volume"
        unstacked.rename(columns={col:newname}, inplace=True)

# first organelle is simply referred to as object
# label of first organelle is simply reffered to as label
unstacked.rename(columns={"orgA":"object", "A_ID":"label"}, inplace=True)
unstacked.set_index(['dataset', 'image_name', 'object', 'label'])


- repeat last three substeps for the **second** organelle involved in the interactions

In [None]:
contact_percellB = contact_cnt[["dataset", "image_name", "orgB", "B_ID", "object", "volume"]].groupby(["dataset", "image_name", "orgB", "B_ID", "object"]).agg(["count", "sum"])
contact_percellB.columns = ["_".join(col_name).rstrip('_') for col_name in contact_percellB.columns.to_flat_index()]
unstackedB = contact_percellB.unstack(level='object')
unstackedB.columns = ["_".join(col_name).rstrip('_') for col_name in unstackedB.columns.to_flat_index()]
unstackedB = unstackedB.reset_index()
for col in unstackedB.columns:
    if col.startswith("volume_count_"):
        newname = col.split("_")[-1] + "_count"
        unstackedB.rename(columns={col:newname}, inplace=True)
    if col.startswith("volume_sum_"):
        newname = col.split("_")[-1] + "_volume"
        unstackedB.rename(columns={col:newname}, inplace=True)
unstackedB.rename(columns={"orgB":"object", "B_ID":"label"}, inplace=True)
unstackedB.set_index(['dataset', 'image_name', 'object', 'label'])

- combine and merge the data from **both** unstacked tables to include interaction metrics from all organelle objects

In [None]:
contact_cnt = pd.concat([unstacked, unstackedB], axis=0).sort_index(axis=0)
contact_cnt = contact_cnt.groupby(['dataset', 'image_name', 'object', 'label']).sum().reset_index()
contact_cnt['label']=contact_cnt['label'].astype("Int64")

org_df = pd.merge(org_df, contact_cnt, how='left', on=['dataset', 'image_name', 'object', 'label'], sort=True)
org_df[contact_cnt.columns] = org_df[contact_cnt.columns].fillna(0)

In [10]:
contact_cnt_percell

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,volume_count,volume_sum
dataset,image_name,orgA,A_ID,object,Unnamed: 5_level_1,Unnamed: 6_level_1
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,ER,1,ERXgolgi,104,58.950121
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,ER,1,ERXlyso,77,57.029118
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,ER,1,ERXmito,874,110.140125
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,ER,1,ERXperox,297,24.907430
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,LD,209,LDXER,1,0.002624
...,...,...,...,...,...,...
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,mito,86,mitoXperox,1,0.015746
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,mito,87,mitoXperox,5,0.136465
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,mito,89,mitoXperox,1,0.002624
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,mito,93,mitoXperox,1,0.018370


In [9]:
unstacked

Unnamed: 0,dataset,image_name,object,label,ERXgolgi_count,ERXlyso_count,ERXmito_count,ERXperox_count,LDXER_count,LDXgolgi_count,...,LDXgolgi_volume,LDXlyso_volume,LDXmito_volume,LDXperox_volume,golgiXlyso_volume,golgiXmito_volume,golgiXperox_volume,lysoXmito_volume,lysoXperox_volume,mitoXperox_volume
0,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,ER,1,104.0,77.0,874.0,297.0,,,...,,,,,,,,,,
1,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,209,,,,,1.0,,...,,,,,,,,,,
2,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,23,,,,,2.0,,...,,,,,,,,,,
3,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,295,,,,,1.0,,...,,,,,,,,,,
4,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,320,,,,,1.0,,...,,,0.01837,0.023619,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27661,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,mito,86,,,,,,,...,,,,,,,,,,0.015746
27662,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,mito,87,,,,,,,...,,,,,,,,,,0.136465
27663,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,mito,89,,,,,,,...,,,,,,,,,,0.002624
27664,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,mito,93,,,,,,,...,,,,,,,,,,0.018370


In [8]:
contact_cnt

Unnamed: 0,dataset,image_name,object,label,ERXgolgi_count,ERXlyso_count,ERXmito_count,ERXperox_count,LDXER_count,LDXgolgi_count,...,LDXgolgi_volume,LDXlyso_volume,LDXmito_volume,LDXperox_volume,golgiXlyso_volume,golgiXmito_volume,golgiXperox_volume,lysoXmito_volume,lysoXperox_volume,mitoXperox_volume
0,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,ER,1,104.0,77.0,874.0,297.0,9.0,0.0,...,0.0,0.0,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000
1,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,209,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000
2,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,23,0.0,0.0,0.0,0.0,2.0,0.0,...,0.0,0.0,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000
3,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,295,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000
4,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,320,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.01837,0.023619,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102088,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,perox,95,0.0,0.0,0.0,2.0,0.0,0.0,...,0.0,0.0,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000
102089,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,perox,96,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.002624
102090,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,perox,97,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000
102091,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,perox,98,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.00000,0.000000,0.0,0.0,0.0,0.0,0.0,0.002624


In [11]:
org_df

Unnamed: 0,dataset,image_name,object,label,scale,centroid-0,centroid-1,centroid-2,bbox-0,bbox-1,...,LDXgolgi_volume,LDXlyso_volume,LDXmito_volume,LDXperox_volume,golgiXlyso_volume,golgiXmito_volume,golgiXperox_volume,lysoXmito_volume,lysoXperox_volume,mitoXperox_volume
0,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,ER,1,"(0.4106, 0.0799, 0.0799)",6.109634,66.835593,63.892707,2,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
1,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,23,"(0.4106, 0.0799, 0.0799)",2.247069,9.690667,57.380142,5,113,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
2,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,46,"(0.4106, 0.0799, 0.0799)",1.826724,58.178574,24.628571,4,724,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
3,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,98,"(0.4106, 0.0799, 0.0799)",2.131930,34.215779,72.164467,5,424,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
4,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_2...,LD,99,"(0.4106, 0.0799, 0.0799)",2.052970,34.756387,71.737585,5,430,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104834,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,perox,351,"(0.4106, 0.0799, 0.0799)",10.874520,67.504340,68.323191,26,842,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005249,0.002624
104835,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,perox,352,"(0.4106, 0.0799, 0.0799)",11.219927,63.613480,69.818063,27,793,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000
104836,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,perox,353,"(0.4106, 0.0799, 0.0799)",11.275543,66.946388,64.956937,27,836,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.005249
104837,20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_un...,perox,354,"(0.4106, 0.0799, 0.0799)",11.788370,64.504606,67.654097,28,804,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.010497


## **`3` - Apply aggregate statistics for summarization**

- determine aggregate statistics to be applied per organelle object

In [12]:
###################
# summary stat group
###################

# ensure summary statistics are applied on a per organelle object level
group_by = ['dataset', 'image_name', 'object']

# metrics to be observed
sharedcolumns = ["SA_to_volume_ratio", "equivalent_diameter", "extent", "euler_number", "solidity", "axis_major_length"]

# statistical functions to be performed on the metrics
ag_func_standard = ['mean', 'median', 'std']

- summarize shared metrics between the organelle morphology and interaction tables

In [None]:
###################
# summarize shared measurements between org_df and contacts_df
###################
org_cont_tabs = []
for tab in [org_df, contacts_df]:
    tab1 = tab[group_by + ['volume']].groupby(group_by).agg(['count', 'sum'] + ag_func_standard)
    tab2 = tab[group_by + ['surface_area']].groupby(group_by).agg(['sum'] + ag_func_standard)
    tab3 = tab[group_by + sharedcolumns].groupby(group_by).agg(ag_func_standard)
    shared_metrics = pd.merge(tab1, tab2, 'outer', on=group_by)
    shared_metrics = pd.merge(shared_metrics, tab3, 'outer', on=group_by)
    org_cont_tabs.append(shared_metrics)

org_summary = org_cont_tabs[0]
contact_summary = org_cont_tabs[1]

- summarize metrics in the region morphology table

In [None]:
###################
# group metrics from regions_df similar to the above
###################
regions_summary = regions_df[group_by + ['volume', 'surface_area'] + sharedcolumns].set_index(group_by)

- summarize additional metrics in the organelle morphology table

In [None]:
###################
# summarize extra metrics from org_df
###################
columns2 = [col for col in org_df.columns if col.endswith(("_count", "_volume"))]
contact_counts_summary = org_df[group_by + columns2].groupby(group_by).agg(['sum'] + ag_func_standard)
org_summary = pd.merge(org_summary, contact_counts_summary, 'outer', on=group_by)#left_on=group_by, right_on=True)

In [15]:
org_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,volume,volume,volume,volume,volume,surface_area,surface_area,surface_area,surface_area,SA_to_volume_ratio,SA_to_volume_ratio,SA_to_volume_ratio,equivalent_diameter,equivalent_diameter,equivalent_diameter,extent,extent,extent,euler_number,euler_number,euler_number,solidity,solidity,solidity,axis_major_length,axis_major_length,axis_major_length,ERXgolgi_count,ERXgolgi_count,ERXgolgi_count,ERXgolgi_count,ERXlyso_count,ERXlyso_count,ERXlyso_count,ERXlyso_count,ERXmito_count,ERXmito_count,ERXmito_count,ERXmito_count,ERXperox_count,ERXperox_count,ERXperox_count,ERXperox_count,LDXER_count,LDXER_count,LDXER_count,LDXER_count,LDXgolgi_count,LDXgolgi_count,LDXgolgi_count,LDXgolgi_count,LDXlyso_count,LDXlyso_count,LDXlyso_count,LDXlyso_count,LDXmito_count,LDXmito_count,LDXmito_count,LDXmito_count,LDXperox_count,LDXperox_count,LDXperox_count,LDXperox_count,golgiXlyso_count,golgiXlyso_count,golgiXlyso_count,golgiXlyso_count,golgiXmito_count,golgiXmito_count,golgiXmito_count,golgiXmito_count,golgiXperox_count,golgiXperox_count,golgiXperox_count,golgiXperox_count,lysoXmito_count,lysoXmito_count,lysoXmito_count,lysoXmito_count,lysoXperox_count,lysoXperox_count,lysoXperox_count,lysoXperox_count,mitoXperox_count,mitoXperox_count,mitoXperox_count,mitoXperox_count,ERXgolgi_volume,ERXgolgi_volume,ERXgolgi_volume,ERXgolgi_volume,ERXlyso_volume,ERXlyso_volume,ERXlyso_volume,ERXlyso_volume,ERXmito_volume,ERXmito_volume,ERXmito_volume,ERXmito_volume,ERXperox_volume,ERXperox_volume,ERXperox_volume,ERXperox_volume,LDXER_volume,LDXER_volume,LDXER_volume,LDXER_volume,LDXgolgi_volume,LDXgolgi_volume,LDXgolgi_volume,LDXgolgi_volume,LDXlyso_volume,LDXlyso_volume,LDXlyso_volume,LDXlyso_volume,LDXmito_volume,LDXmito_volume,LDXmito_volume,LDXmito_volume,LDXperox_volume,LDXperox_volume,LDXperox_volume,LDXperox_volume,golgiXlyso_volume,golgiXlyso_volume,golgiXlyso_volume,golgiXlyso_volume,golgiXmito_volume,golgiXmito_volume,golgiXmito_volume,golgiXmito_volume,golgiXperox_volume,golgiXperox_volume,golgiXperox_volume,golgiXperox_volume,lysoXmito_volume,lysoXmito_volume,lysoXmito_volume,lysoXmito_volume,lysoXperox_volume,lysoXperox_volume,lysoXperox_volume,lysoXperox_volume,mitoXperox_volume,mitoXperox_volume,mitoXperox_volume,mitoXperox_volume
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,sum,mean,median,std,sum,mean,median,std,mean,median,std,mean,median,std,mean,median,std,mean,median,std,mean,median,std,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std
dataset,image_name,object,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Unnamed: 87_level_2,Unnamed: 88_level_2,Unnamed: 89_level_2,Unnamed: 90_level_2,Unnamed: 91_level_2,Unnamed: 92_level_2,Unnamed: 93_level_2,Unnamed: 94_level_2,Unnamed: 95_level_2,Unnamed: 96_level_2,Unnamed: 97_level_2,Unnamed: 98_level_2,Unnamed: 99_level_2,Unnamed: 100_level_2,Unnamed: 101_level_2,Unnamed: 102_level_2,Unnamed: 103_level_2,Unnamed: 104_level_2,Unnamed: 105_level_2,Unnamed: 106_level_2,Unnamed: 107_level_2,Unnamed: 108_level_2,Unnamed: 109_level_2,Unnamed: 110_level_2,Unnamed: 111_level_2,Unnamed: 112_level_2,Unnamed: 113_level_2,Unnamed: 114_level_2,Unnamed: 115_level_2,Unnamed: 116_level_2,Unnamed: 117_level_2,Unnamed: 118_level_2,Unnamed: 119_level_2,Unnamed: 120_level_2,Unnamed: 121_level_2,Unnamed: 122_level_2,Unnamed: 123_level_2,Unnamed: 124_level_2,Unnamed: 125_level_2,Unnamed: 126_level_2,Unnamed: 127_level_2,Unnamed: 128_level_2,Unnamed: 129_level_2,Unnamed: 130_level_2,Unnamed: 131_level_2,Unnamed: 132_level_2,Unnamed: 133_level_2,Unnamed: 134_level_2,Unnamed: 135_level_2,Unnamed: 136_level_2,Unnamed: 137_level_2,Unnamed: 138_level_2,Unnamed: 139_level_2,Unnamed: 140_level_2,Unnamed: 141_level_2,Unnamed: 142_level_2,Unnamed: 143_level_2,Unnamed: 144_level_2,Unnamed: 145_level_2,Unnamed: 146_level_2,Unnamed: 147_level_2,Unnamed: 148_level_2,Unnamed: 149_level_2
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,ER,1,1259.650421,1259.650421,1259.650421,,5954.872147,5954.872147,5954.872147,,4.727401,4.727401,,13.399352,13.399352,,0.004319,0.004319,,-9.000000,-9.0,,0.013972,0.013972,,90.510874,90.510874,,104.0,104.000000,104.0,,77.0,77.000000,77.0,,874.0,874.000000,874.0,,297.0,297.000000,297.0,,9.0,9.000000,9.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,58.950121,58.950121,58.950121,,57.029118,57.029118,57.029118,,110.140125,110.140125,110.140125,,24.907430,24.90743,24.907430,,0.157459,0.157459,0.157459,,0.000000,0.000000,0.0,,0.000000,0.000000,0.0,,0.000000,0.000000,0.0,,0.000000,0.000000,0.0,,0.000000,0.000000,0.000000,,0.000000,0.000000,0.000000,,0.000000,0.000000,0.0,,0.000000,0.000000,0.000000,,0.000000,0.000000,0.0,,0.000000,0.000000,0.000000,
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,LD,14,1.243928,0.088852,0.041989,0.146990,24.863251,1.775946,1.155915,2.177455,34.269611,26.199740,19.282674,0.457205,0.431046,0.224537,0.740389,0.841667,0.283388,1.142857,1.0,0.534522,,inf,,0.621417,0.512314,0.527286,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,9.0,0.642857,0.5,0.744946,1.0,0.071429,0.0,0.267261,1.0,0.071429,0.0,0.267261,1.0,0.071429,0.0,0.267261,1.0,0.071429,0.0,0.267261,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.157459,0.011247,0.001312,0.023256,0.002624,0.000187,0.0,0.000701,0.002624,0.000187,0.0,0.000701,0.018370,0.001312,0.0,0.004910,0.023619,0.001687,0.0,0.006312,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,golgi,75,79.734742,1.063130,0.118094,5.916377,596.196861,7.949291,2.067932,36.789805,19.110737,17.472123,8.451447,0.760169,0.608710,0.581177,0.564059,0.570248,0.197049,0.946667,1.0,0.461880,,0.916667,,1.175097,0.911100,1.296454,104.0,1.386667,1.0,2.680729,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,1.0,0.013333,0.0,0.115470,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,53.0,0.706667,0.0,2.363994,137.0,1.826667,1.0,7.635927,42.0,0.560000,0.0,2.360943,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,58.950121,0.786002,0.099724,4.138630,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.002624,0.000035,0.0,0.000303,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,25.448040,0.339307,0.000000,2.191560,3.162307,0.042164,0.002624,0.186222,0.978872,0.013052,0.0,0.061349,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,lyso,57,66.663000,1.169526,0.173205,5.599211,543.714592,9.538852,2.899388,35.847076,18.084921,17.146584,7.923687,0.845905,0.691599,0.585465,0.419182,0.428571,0.140429,0.736842,1.0,1.727160,,0.800000,,1.687937,1.332782,1.366172,0.0,0.000000,0.0,0.000000,77.0,1.350877,1.0,1.202545,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,1.0,0.017544,0.0,0.132453,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,53.0,0.929825,1.0,2.381923,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,160.0,2.807018,1.0,10.664424,44.0,0.771930,0.0,3.207233,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,57.029118,1.000511,0.146962,4.949856,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.002624,0.000046,0.0,0.000348,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,25.448040,0.446457,0.034116,2.243222,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,8.964680,0.157275,0.023619,0.672297,1.102215,0.019337,0.0,0.101425,0.000000,0.000000,0.000000,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,mito,457,238.443174,0.521757,0.204697,1.226899,3275.444656,7.167275,3.141593,15.261926,17.019415,15.328325,5.622062,0.837970,0.731202,0.349802,0.402071,0.416667,0.168114,0.875274,1.0,0.757840,,0.814815,,1.819279,1.451167,1.112839,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,874.0,1.912473,1.0,3.805726,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,1.0,0.002188,0.0,0.046778,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,137.0,0.299781,0.0,0.991112,0.0,0.000000,0.0,0.000000,160.0,0.350109,0.0,1.332771,0.0,0.000000,0.0,0.000000,249.0,0.544858,0.0,2.066509,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,110.140125,0.241007,0.110221,0.520770,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.018370,0.000040,0.0,0.000859,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,3.162307,0.006920,0.000000,0.032338,0.000000,0.000000,0.0,0.000000,8.964680,0.019616,0.000000,0.101550,0.000000,0.000000,0.0,0.000000,4.739524,0.010371,0.000000,0.051541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,LD,17,0.558980,0.032881,0.018370,0.046188,14.644310,0.861430,0.680771,0.808391,43.297162,37.058370,17.339279,0.336667,0.327370,0.149991,0.834320,1.000000,0.209847,1.000000,1.0,0.000000,,inf,,0.332167,0.270270,0.283924,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,5.0,0.294118,0.0,0.469668,0.0,0.000000,0.0,0.000000,2.0,0.117647,0.0,0.332106,2.0,0.117647,0.0,0.332106,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.039365,0.002316,0.000000,0.005786,0.000000,0.000000,0.0,0.000000,0.005249,0.000309,0.0,0.000872,0.007873,0.000463,0.0,0.001387,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,golgi,87,85.188081,0.979173,0.099724,5.659629,733.118287,8.426647,1.977620,38.938999,21.158273,20.387615,8.290973,0.744922,0.575352,0.565635,0.515791,0.521429,0.221074,0.885057,1.0,0.868376,,0.867925,,1.645027,0.903063,3.229610,152.0,1.747126,1.0,5.302957,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,125.0,1.436782,1.0,3.820694,216.0,2.482759,1.0,11.803490,50.0,0.574713,0.0,1.499086,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,41.322557,0.474972,0.070857,2.444326,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,13.580861,0.156102,0.068232,0.541189,4.038830,0.046423,0.005249,0.242042,1.511609,0.017375,0.0,0.066861,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,lyso,213,199.996873,0.938952,0.236189,2.668875,1571.991500,7.380242,3.469454,14.867220,16.504594,14.449669,8.710405,0.906029,0.766926,0.523834,0.449229,0.444444,0.173721,0.924883,1.0,0.438684,,0.814815,,1.591670,1.307996,1.363472,0.0,0.000000,0.0,0.000000,234.0,1.098592,1.0,0.610172,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,2.0,0.009390,0.0,0.096672,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,125.0,0.586854,0.0,1.565614,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,478.0,2.244131,1.0,3.862803,89.0,0.417840,0.0,1.532374,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,138.409309,0.649809,0.154835,1.900375,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.005249,0.000025,0.0,0.000254,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,13.580861,0.063760,0.000000,0.261585,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,40.970898,0.192352,0.047238,0.543824,2.369762,0.011126,0.0,0.052907,0.000000,0.000000,0.000000,0.000000
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,mito,353,491.501191,1.392355,0.191575,11.687704,6685.839277,18.940055,3.432852,145.018530,20.345043,17.705478,7.037690,0.874724,0.715232,0.591904,0.326103,0.320513,0.185053,0.053824,1.0,12.641694,,0.716049,,2.796367,1.427905,4.637126,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,564.0,1.597734,1.0,3.880633,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,2.0,0.005666,0.0,0.075164,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,216.0,0.611898,0.0,9.102920,0.0,0.000000,0.0,0.000000,478.0,1.354108,0.0,14.022986,0.0,0.000000,0.0,0.000000,371.0,1.050992,0.0,9.995892,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,367.772333,1.041848,0.115470,9.443933,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.007873,0.000022,0.0,0.000312,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,4.038830,0.011441,0.000000,0.165878,0.000000,0.000000,0.0,0.000000,40.970898,0.116065,0.000000,1.432483,0.000000,0.000000,0.0,0.000000,9.038161,0.025604,0.000000,0.248840


In [14]:
pd.set_option('display.max_columns', None)

## **`4` - Restructure distribution metrics tables**

- for XY-distribution collect summary statistics for voxel bins and wedges

- for Z-distribution collect summary statistics for voxel bins and wedges

> ###### Statistics collected: **mean, median, mode, minimum, maximum, range, standard deviation, skew, kurtosis and variance**

- calculate the coefficient of variation for the **mean**, **median**, **standard deviation** for the XY-distribution bin values

In [47]:
hist_dfs = []
for ind in range(0,1):#len(dist_df.index)):
    selection = dist_df.iloc[[ind]] #    selection = dist_df.loc[[ind]]
    bins_df = pd.DataFrame()
    wedges_df = pd.DataFrame()
    Z_df = pd.DataFrame()
    CV_df = pd.DataFrame()

    bins_df[['bins', 'masks', 'obj']] = selection[['XY_bins', 'XY_mask_vox_cnt_perbin', 'XY_obj_vox_cnt_perbin']]
    wedges_df[['bins', 'masks', 'obj']] = selection[['XY_wedges', 'XY_mask_vox_cnt_perwedge', 'XY_obj_vox_cnt_perwedge']]
    Z_df[['bins', 'masks', 'obj']] = selection[['Z_slices', 'Z_mask_vox_cnt', 'Z_obj_vox_cnt']]

    dfs = [selection[['dataset', 'image_name', 'object']].reset_index()]
    for df, prefix in zip([bins_df, wedges_df, Z_df], ["XY_bins_", "XY_wedges_", "Z_slices_"]):
        single_df = pd.DataFrame(list(zip(df["bins"].values[0][1:-1].split(", "), 
                                        df["obj"].values[0][1:-1].split(", "), 
                                        df["masks"].values[0][1:-1].split(", "))), columns =['bins', 'obj', 'mask']).astype(int)
        
        if "Z_" in prefix:
            single_df =  single_df.drop(single_df[single_df['mask'] == 0].index)
            single_df['bins'] = (single_df["bins"]/max(single_df.bins)*9.99).apply(np.floor)+1
            single_df = single_df.groupby("bins").agg(['sum']).reset_index()
            single_df.columns = ['bins',"obj","mask"]
    
        single_df['mask_fract'] = single_df['mask']/single_df['mask'].max()
        # single_df['obj_normed_tocell'] = (single_df["obj"]*single_df["mask_fract"]).fillna(0)
        single_df['obj_perc_per_bin'] = (single_df["obj"] / single_df["obj"].sum())*100
        single_df['obj_portion_normed_tobin'] = (single_df["obj_perc_per_bin"]/single_df["mask_fract"]).fillna(0)

        sumstats_df = pd.DataFrame()

        s = single_df['bins'].repeat(single_df['obj_portion_normed_tobin']*100)
        ###################################
        #SUB-STEPS 1 & 2
        ###################################
        sumstats_df['hist_mean']=[s.mean()]
        sumstats_df['hist_median']=[s.median()]
        if single_df['obj_portion_normed_tobin'].sum() != 0: sumstats_df['hist_mode']=[s.mode().iloc[0]]
        else: sumstats_df['hist_mode']=['NaN']
        sumstats_df['hist_min']=[s.min()]
        sumstats_df['hist_max']=[s.max()]
        sumstats_df['hist_range']=[s.max() - s.min()]
        sumstats_df['hist_stdev']=[s.std()]
        sumstats_df['hist_skew']=[s.skew()]
        sumstats_df['hist_kurtosis']=[s.kurtosis()]
        sumstats_df['hist_var']=[s.var()]
        sumstats_df.columns = [prefix+col for col in sumstats_df.columns]
        dfs.append(sumstats_df.reset_index())
    
    ###################################
    #SUB-STEP 3
    ###################################
    CV_df = pd.DataFrame(list(zip(selection["XY_obj_cv_perbin"].values[0][1:-1].split(", "))), columns =['CV']).astype(float)
    sumstats_CV_df = pd.DataFrame()
    sumstats_CV_df['XY_bin_CV_mean'] = CV_df.mean()
    sumstats_CV_df['XY_bin_CV_median'] = CV_df.median()
    sumstats_CV_df['XY_bin_CV_std'] = CV_df.std()
    dfs.append(sumstats_CV_df.reset_index().drop(['index'], axis=1))
    
    ###################################
    # Combine all resulting tables
    ###################################
    combined_df = pd.concat(dfs, axis=1).drop(columns="index")
    hist_dfs.append(combined_df)
dist_org_summary = pd.concat(hist_dfs, ignore_index=True)
dist_org_summary

Unnamed: 0,dataset,image_name,object,XY_bins_hist_mean,XY_bins_hist_median,XY_bins_hist_mode,XY_bins_hist_min,XY_bins_hist_max,XY_bins_hist_range,XY_bins_hist_stdev,XY_bins_hist_skew,XY_bins_hist_kurtosis,XY_bins_hist_var,XY_wedges_hist_mean,XY_wedges_hist_median,XY_wedges_hist_mode,XY_wedges_hist_min,XY_wedges_hist_max,XY_wedges_hist_range,XY_wedges_hist_stdev,XY_wedges_hist_skew,XY_wedges_hist_kurtosis,XY_wedges_hist_var,Z_slices_hist_mean,Z_slices_hist_median,Z_slices_hist_mode,Z_slices_hist_min,Z_slices_hist_max,Z_slices_hist_range,Z_slices_hist_stdev,Z_slices_hist_skew,Z_slices_hist_kurtosis,Z_slices_hist_var,XY_bin_CV_mean,XY_bin_CV_median,XY_bin_CV_std
0,20240220_C2-107_quant,20221027_C2-107_well 1_cell 1_untreated_Linear...,LD,4.286058,5.0,5,1,5,4,1.254602,-1.541087,0.958762,1.574027,3.142645,2.0,2,1,8,7,2.209255,0.929232,-0.685337,4.880806,2.389615,2.0,1.0,1.0,7.0,6.0,1.997943,1.546951,0.928892,3.991776,1.938065,1.835369,0.618022


- repeat the first two substeps for the nucleus distribution metrics

In [53]:
# nucleus distribution
nuc_dist_df = dist_df[["dataset", "image_name", 
                    "XY_bins", "XY_center_vox_cnt_perbin", "XY_mask_vox_cnt_perbin",
                    "XY_wedges", "XY_center_vox_cnt_perwedge", "XY_mask_vox_cnt_perwedge",
                    "Z_slices", "Z_center_vox_cnt", "Z_mask_vox_cnt"]].set_index(["dataset", "image_name"])
nuc_hist_dfs = []
for idx in nuc_dist_df.index.unique():
    selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
    bins_df = pd.DataFrame()
    wedges_df = pd.DataFrame()
    Z_df = pd.DataFrame()

    bins_df[['bins', 'center', 'masks']] = selection[['XY_bins', 'XY_center_vox_cnt_perbin', 'XY_mask_vox_cnt_perbin']]
    wedges_df[['bins', 'center', 'masks']] = selection[['XY_wedges', 'XY_center_vox_cnt_perwedge', 'XY_mask_vox_cnt_perwedge']]
    Z_df[['bins', 'center', 'masks']] = selection[['Z_slices', 'Z_center_vox_cnt', 'Z_mask_vox_cnt']]

    dfs = [selection[['dataset', 'image_name']]]
    
    for df, prefix in zip([bins_df, wedges_df, Z_df], ["XY_bins_", "XY_wedges_", "Z_slices_"]):
        single_df = pd.DataFrame(list(zip(df["bins"].values[0][1:-1].split(", "), 
                                        df["masks"].values[0][1:-1].split(", "),
                                        df["center"].values[0][1:-1].split(", "))), columns =['bins', 'mask', 'obj']).astype(int)
        
        if "Z_" in prefix:
            single_df =  single_df.drop(single_df[single_df['mask'] == 0].index)
            single_df['bins'] = (single_df["bins"]/max(single_df.bins)*9.99).apply(np.floor)+1
            single_df = single_df.groupby("bins").agg(['sum']).reset_index()
            single_df.columns = ['bins',"mask","obj"]

        single_df['mask_fract'] = single_df['mask']/single_df['mask'].max()
        # single_df['obj_normed_tocell'] = (single_df["obj"]*single_df["mask_fract"]).fillna(0)
        single_df['obj_perc_per_bin'] = (single_df["obj"] / single_df["obj"].sum())*100
        single_df['obj_portion_normed_tobin'] = (single_df["obj_perc_per_bin"]/single_df["mask_fract"]).fillna(0)

        sumstats_df = pd.DataFrame()

        s = single_df['bins'].repeat(single_df['obj_portion_normed_tobin']*100)
        ###################################
        #SUB-STEPS 1 & 2 FOR NUC
        ###################################
        sumstats_df['hist_mean']=[s.mean()]
        sumstats_df['hist_median']=[s.median()]
        if single_df['obj_portion_normed_tobin'].sum() != 0: sumstats_df['hist_mode']=[s.mode().iloc[0]]
        else: sumstats_df['hist_mode']=['NaN']
        sumstats_df['hist_min']=[s.min()]
        sumstats_df['hist_max']=[s.max()]
        sumstats_df['hist_range']=[s.max() - s.min()]
        sumstats_df['hist_stdev']=[s.std()]
        sumstats_df['hist_skew']=[s.skew()]
        sumstats_df['hist_kurtosis']=[s.kurtosis()]
        sumstats_df['hist_var']=[s.var()]
        sumstats_df.columns = [prefix+col for col in sumstats_df.columns]
        dfs.append(sumstats_df.reset_index())
        
    ###################################
    # Combine all resulting tables
    ###################################
    combined_df = pd.concat(dfs, axis=1).drop(columns="index")
    nuc_hist_dfs.append(combined_df)
dist_center_summary = pd.concat(nuc_hist_dfs, ignore_index=True)
dist_center_summary.insert(2, column="object", value="nuc")
dist_center_summary

  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
  selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index

Unnamed: 0,dataset,image_name,object,XY_bins_hist_mean,XY_bins_hist_median,XY_bins_hist_mode,XY_bins_hist_min,XY_bins_hist_max,XY_bins_hist_range,XY_bins_hist_stdev,...,Z_slices_hist_mean,Z_slices_hist_median,Z_slices_hist_mode,Z_slices_hist_min,Z_slices_hist_max,Z_slices_hist_range,Z_slices_hist_stdev,Z_slices_hist_skew,Z_slices_hist_kurtosis,Z_slices_hist_var
0,20240220_C2-107_quant,20221027_C2-107_well 1_cell 1_untreated_Linear...,nuc,1.0,1.0,1,1,1,0,0.0,...,7.294004,7.0,8.0,4.0,9.0,5.0,1.098648,-0.347350,-0.579546,1.207027
1,20240220_C2-107_quant,20221027_C2-107_well 1_cell 2_untreated_Linear...,nuc,1.0,1.0,1,1,1,0,0.0,...,6.261085,6.0,6.0,4.0,8.0,4.0,1.060288,0.113038,-1.002106,1.124210
2,20240220_C2-107_quant,20221027_C2-107_well 1_cell 3_untreated_Linear...,nuc,1.0,1.0,1,1,1,0,0.0,...,6.603834,7.0,7.0,4.0,9.0,5.0,1.090285,-0.356748,-0.527568,1.188722
3,20240220_C2-107_quant,20221027_C2-107_well 1_cell 4_untreated_Linear...,nuc,1.0,1.0,1,1,1,0,0.0,...,5.893096,6.0,6.0,3.0,8.0,5.0,1.293846,-0.100112,-0.813658,1.674037
4,20240220_C2-107_quant,20221027_C2-107_well 1_cell 5_untreated_Linear...,nuc,1.0,1.0,1,1,1,0,0.0,...,5.950500,6.0,6.0,3.0,8.0,5.0,1.201364,-0.097283,-0.765856,1.443274
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 7_cell 1_50...,nuc,1.0,1.0,1,1,1,0,0.0,...,6.592731,7.0,7.0,4.0,9.0,5.0,1.042108,-0.248665,-0.533085,1.085988
137,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 7_cell 2_50...,nuc,1.0,1.0,1,1,1,0,0.0,...,6.169485,6.0,6.0,4.0,8.0,4.0,0.858970,-0.411233,-0.278896,0.737829
138,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 8_cell 3_un...,nuc,1.0,1.0,1,1,1,0,0.0,...,6.875607,7.0,7.0,4.0,9.0,5.0,1.085858,-0.179305,-0.516143,1.179087
139,20240118_C2-117_quant,20230622_C2-117_unconditioned_well 8_cell 4_un...,nuc,1.0,1.0,1,1,1,0,0.0,...,5.329227,5.0,1.0,1.0,10.0,9.0,2.970119,0.138550,-1.281238,8.821606


- combine nucleus and organelle distribution tables

In [None]:
dist_summary = pd.concat([dist_org_summary, dist_center_summary], axis=0).set_index(group_by).sort_index()
dist_summary

## **`5` - Add normalized metrics**

- calculate fraction of cell area taken up by the organelles

In [None]:
###################
# add normalization
###################
# organelle area fraction
area_fractions = []
for idx in org_summary.index.unique():
    org_vol = org_summary.loc[idx][('volume', 'sum')]
    cell_vol = regions_summary.loc[idx[:-1] + ('cell',)]["volume"]
    afrac = org_vol/cell_vol
    area_fractions.append(afrac)
org_summary[('volume', 'fraction')] = area_fractions
# TODO: add in line to reorder the level=0 columns here

- calculate fraction of organelle objects involved in specific interorganelle contacts

In [None]:
# contact sites volume normalized
norm_toA_list = []
norm_toB_list = []
for col in contact_summary.index:
    norm_toA_list.append(contact_summary.loc[col][('volume', 'sum')]/org_summary.loc[col[:-1]+(col[-1].split('X')[0],)][('volume', 'sum')])
    norm_toB_list.append(contact_summary.loc[col][('volume', 'sum')]/org_summary.loc[col[:-1]+(col[-1].split('X')[1],)][('volume', 'sum')])
contact_summary[('volume', 'norm_to_A')] = norm_toA_list
contact_summary[('volume', 'norm_to_B')] = norm_toB_list

# number and area of individuals organelle involved in contact
cont_cnt = org_df[group_by]
cont_cnt[[col.split('_')[0] for col in org_df.columns if col.endswith(("_count"))]] = org_df[[col for col in org_df.columns if col.endswith(("_count"))]].astype(bool)
cont_cnt_perorg = cont_cnt.groupby(group_by).agg('sum')
cont_cnt_perorg.columns = pd.MultiIndex.from_product([cont_cnt_perorg.columns, ['count_in']])
for col in cont_cnt_perorg.columns:
    cont_cnt_perorg[(col[0], 'num_fraction_in')] = cont_cnt_perorg[col].values/org_summary[('volume', 'count')].values
cont_cnt_perorg.sort_index(axis=1, inplace=True)
org_summary = pd.merge(org_summary, cont_cnt_perorg, on=group_by, how='outer')

In [19]:
cont_cnt_perorg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ERXgolgi,ERXgolgi,ERXlyso,ERXlyso,ERXmito,ERXmito,ERXperox,ERXperox,LDXER,LDXER,LDXgolgi,LDXgolgi,LDXlyso,LDXlyso,LDXmito,LDXmito,LDXperox,LDXperox,golgiXlyso,golgiXlyso,golgiXmito,golgiXmito,golgiXperox,golgiXperox,lysoXmito,lysoXmito,lysoXperox,lysoXperox,mitoXperox,mitoXperox
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in
dataset,image_name,object,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,ER,1,1.000000,1,1.000000,1,1.000000,1,1.000000,1,1.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,LD,0,0.000000,0,0.000000,0,0.000000,0,0.000000,7,0.500000,1,0.071429,1,0.071429,1,0.071429,1,0.071429,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,golgi,75,1.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,1,0.013333,0,0.000000,0,0.000000,0,0.000000,28,0.373333,41,0.546667,18,0.240000,0,0.000000,0,0.000000,0,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,lyso,0,0.000000,53,0.929825,0,0.000000,0,0.000000,0,0.000000,0,0.000000,1,0.017544,0,0.000000,0,0.000000,32,0.561404,0,0.000000,0,0.000000,46,0.807018,16,0.280702,0,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,mito,0,0.000000,0,0.000000,448,0.980306,0,0.000000,0,0.000000,0,0.000000,0,0.000000,1,0.002188,0,0.000000,0,0.000000,79,0.172867,0,0.000000,70,0.153173,0,0.000000,96,0.210066
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,LD,0,0.000000,0,0.000000,0,0.000000,0,0.000000,5,0.294118,0,0.000000,2,0.117647,2,0.117647,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,golgi,76,0.873563,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,77,0.885057,52,0.597701,28,0.321839,0,0.000000,0,0.000000,0,0.000000
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,lyso,0,0.000000,204,0.957746,0,0.000000,0,0.000000,0,0.000000,0,0.000000,2,0.009390,0,0.000000,0,0.000000,62,0.291080,0,0.000000,0,0.000000,171,0.802817,37,0.173709,0,0.000000
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,mito,0,0.000000,0,0.000000,346,0.980170,0,0.000000,0,0.000000,0,0.000000,0,0.000000,2,0.005666,0,0.000000,0,0.000000,39,0.110482,0,0.000000,82,0.232295,0,0.000000,88,0.249292


In [17]:
org_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,volume,volume,volume,volume,volume,surface_area,surface_area,surface_area,surface_area,SA_to_volume_ratio,SA_to_volume_ratio,SA_to_volume_ratio,equivalent_diameter,equivalent_diameter,equivalent_diameter,extent,extent,extent,euler_number,euler_number,euler_number,solidity,solidity,solidity,axis_major_length,axis_major_length,axis_major_length,ERXgolgi_count,ERXgolgi_count,ERXgolgi_count,ERXgolgi_count,ERXlyso_count,ERXlyso_count,ERXlyso_count,ERXlyso_count,ERXmito_count,ERXmito_count,ERXmito_count,ERXmito_count,ERXperox_count,ERXperox_count,ERXperox_count,ERXperox_count,LDXER_count,LDXER_count,LDXER_count,LDXER_count,LDXgolgi_count,LDXgolgi_count,LDXgolgi_count,LDXgolgi_count,LDXlyso_count,LDXlyso_count,LDXlyso_count,LDXlyso_count,LDXmito_count,LDXmito_count,LDXmito_count,LDXmito_count,LDXperox_count,LDXperox_count,LDXperox_count,LDXperox_count,golgiXlyso_count,golgiXlyso_count,golgiXlyso_count,golgiXlyso_count,golgiXmito_count,golgiXmito_count,golgiXmito_count,golgiXmito_count,golgiXperox_count,golgiXperox_count,golgiXperox_count,golgiXperox_count,lysoXmito_count,lysoXmito_count,lysoXmito_count,lysoXmito_count,lysoXperox_count,lysoXperox_count,lysoXperox_count,lysoXperox_count,mitoXperox_count,mitoXperox_count,mitoXperox_count,mitoXperox_count,ERXgolgi_volume,ERXgolgi_volume,ERXgolgi_volume,ERXgolgi_volume,ERXlyso_volume,ERXlyso_volume,ERXlyso_volume,ERXlyso_volume,ERXmito_volume,ERXmito_volume,ERXmito_volume,ERXmito_volume,ERXperox_volume,ERXperox_volume,ERXperox_volume,ERXperox_volume,LDXER_volume,LDXER_volume,LDXER_volume,LDXER_volume,LDXgolgi_volume,LDXgolgi_volume,LDXgolgi_volume,LDXgolgi_volume,LDXlyso_volume,LDXlyso_volume,LDXlyso_volume,LDXlyso_volume,LDXmito_volume,LDXmito_volume,LDXmito_volume,LDXmito_volume,LDXperox_volume,LDXperox_volume,LDXperox_volume,LDXperox_volume,golgiXlyso_volume,golgiXlyso_volume,golgiXlyso_volume,golgiXlyso_volume,golgiXmito_volume,golgiXmito_volume,golgiXmito_volume,golgiXmito_volume,golgiXperox_volume,golgiXperox_volume,golgiXperox_volume,golgiXperox_volume,lysoXmito_volume,lysoXmito_volume,lysoXmito_volume,lysoXmito_volume,lysoXperox_volume,lysoXperox_volume,lysoXperox_volume,lysoXperox_volume,mitoXperox_volume,mitoXperox_volume,mitoXperox_volume,mitoXperox_volume,volume,ERXgolgi,ERXgolgi,ERXlyso,ERXlyso,ERXmito,ERXmito,ERXperox,ERXperox,LDXER,LDXER,LDXgolgi,LDXgolgi,LDXlyso,LDXlyso,LDXmito,LDXmito,LDXperox,LDXperox,golgiXlyso,golgiXlyso,golgiXmito,golgiXmito,golgiXperox,golgiXperox,lysoXmito,lysoXmito,lysoXperox,lysoXperox,mitoXperox,mitoXperox
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,sum,mean,median,std,sum,mean,median,std,mean,median,std,mean,median,std,mean,median,std,mean,median,std,mean,median,std,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,sum,mean,median,std,fraction,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in,count_in,num_fraction_in
dataset,image_name,object,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Unnamed: 87_level_2,Unnamed: 88_level_2,Unnamed: 89_level_2,Unnamed: 90_level_2,Unnamed: 91_level_2,Unnamed: 92_level_2,Unnamed: 93_level_2,Unnamed: 94_level_2,Unnamed: 95_level_2,Unnamed: 96_level_2,Unnamed: 97_level_2,Unnamed: 98_level_2,Unnamed: 99_level_2,Unnamed: 100_level_2,Unnamed: 101_level_2,Unnamed: 102_level_2,Unnamed: 103_level_2,Unnamed: 104_level_2,Unnamed: 105_level_2,Unnamed: 106_level_2,Unnamed: 107_level_2,Unnamed: 108_level_2,Unnamed: 109_level_2,Unnamed: 110_level_2,Unnamed: 111_level_2,Unnamed: 112_level_2,Unnamed: 113_level_2,Unnamed: 114_level_2,Unnamed: 115_level_2,Unnamed: 116_level_2,Unnamed: 117_level_2,Unnamed: 118_level_2,Unnamed: 119_level_2,Unnamed: 120_level_2,Unnamed: 121_level_2,Unnamed: 122_level_2,Unnamed: 123_level_2,Unnamed: 124_level_2,Unnamed: 125_level_2,Unnamed: 126_level_2,Unnamed: 127_level_2,Unnamed: 128_level_2,Unnamed: 129_level_2,Unnamed: 130_level_2,Unnamed: 131_level_2,Unnamed: 132_level_2,Unnamed: 133_level_2,Unnamed: 134_level_2,Unnamed: 135_level_2,Unnamed: 136_level_2,Unnamed: 137_level_2,Unnamed: 138_level_2,Unnamed: 139_level_2,Unnamed: 140_level_2,Unnamed: 141_level_2,Unnamed: 142_level_2,Unnamed: 143_level_2,Unnamed: 144_level_2,Unnamed: 145_level_2,Unnamed: 146_level_2,Unnamed: 147_level_2,Unnamed: 148_level_2,Unnamed: 149_level_2,Unnamed: 150_level_2,Unnamed: 151_level_2,Unnamed: 152_level_2,Unnamed: 153_level_2,Unnamed: 154_level_2,Unnamed: 155_level_2,Unnamed: 156_level_2,Unnamed: 157_level_2,Unnamed: 158_level_2,Unnamed: 159_level_2,Unnamed: 160_level_2,Unnamed: 161_level_2,Unnamed: 162_level_2,Unnamed: 163_level_2,Unnamed: 164_level_2,Unnamed: 165_level_2,Unnamed: 166_level_2,Unnamed: 167_level_2,Unnamed: 168_level_2,Unnamed: 169_level_2,Unnamed: 170_level_2,Unnamed: 171_level_2,Unnamed: 172_level_2,Unnamed: 173_level_2,Unnamed: 174_level_2,Unnamed: 175_level_2,Unnamed: 176_level_2,Unnamed: 177_level_2,Unnamed: 178_level_2,Unnamed: 179_level_2,Unnamed: 180_level_2
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,ER,1,1259.650421,1259.650421,1259.650421,,5954.872147,5954.872147,5954.872147,,4.727401,4.727401,,13.399352,13.399352,,0.004319,0.004319,,-9.000000,-9.0,,0.013972,0.013972,,90.510874,90.510874,,104.0,104.000000,104.0,,77.0,77.000000,77.0,,874.0,874.000000,874.0,,297.0,297.000000,297.0,,9.0,9.000000,9.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,0.0,0.000000,0.0,,58.950121,58.950121,58.950121,,57.029118,57.029118,57.029118,,110.140125,110.140125,110.140125,,24.907430,24.90743,24.907430,,0.157459,0.157459,0.157459,,0.000000,0.000000,0.0,,0.000000,0.000000,0.0,,0.000000,0.000000,0.0,,0.000000,0.000000,0.0,,0.000000,0.000000,0.000000,,0.000000,0.000000,0.000000,,0.000000,0.000000,0.0,,0.000000,0.000000,0.000000,,0.000000,0.000000,0.0,,0.000000,0.000000,0.000000,,0.379624,1,1.000000,1,1.000000,1,1.000000,1,1.000000,1,1.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,LD,14,1.243928,0.088852,0.041989,0.146990,24.863251,1.775946,1.155915,2.177455,34.269611,26.199740,19.282674,0.457205,0.431046,0.224537,0.740389,0.841667,0.283388,1.142857,1.0,0.534522,,inf,,0.621417,0.512314,0.527286,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,9.0,0.642857,0.5,0.744946,1.0,0.071429,0.0,0.267261,1.0,0.071429,0.0,0.267261,1.0,0.071429,0.0,0.267261,1.0,0.071429,0.0,0.267261,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.157459,0.011247,0.001312,0.023256,0.002624,0.000187,0.0,0.000701,0.002624,0.000187,0.0,0.000701,0.018370,0.001312,0.0,0.004910,0.023619,0.001687,0.0,0.006312,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000375,0,0.000000,0,0.000000,0,0.000000,0,0.000000,7,0.500000,1,0.071429,1,0.071429,1,0.071429,1,0.071429,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,golgi,75,79.734742,1.063130,0.118094,5.916377,596.196861,7.949291,2.067932,36.789805,19.110737,17.472123,8.451447,0.760169,0.608710,0.581177,0.564059,0.570248,0.197049,0.946667,1.0,0.461880,,0.916667,,1.175097,0.911100,1.296454,104.0,1.386667,1.0,2.680729,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,1.0,0.013333,0.0,0.115470,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,53.0,0.706667,0.0,2.363994,137.0,1.826667,1.0,7.635927,42.0,0.560000,0.0,2.360943,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,58.950121,0.786002,0.099724,4.138630,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.002624,0.000035,0.0,0.000303,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,25.448040,0.339307,0.000000,2.191560,3.162307,0.042164,0.002624,0.186222,0.978872,0.013052,0.0,0.061349,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.024030,75,1.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,1,0.013333,0,0.000000,0,0.000000,0,0.000000,28,0.373333,41,0.546667,18,0.240000,0,0.000000,0,0.000000,0,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,lyso,57,66.663000,1.169526,0.173205,5.599211,543.714592,9.538852,2.899388,35.847076,18.084921,17.146584,7.923687,0.845905,0.691599,0.585465,0.419182,0.428571,0.140429,0.736842,1.0,1.727160,,0.800000,,1.687937,1.332782,1.366172,0.0,0.000000,0.0,0.000000,77.0,1.350877,1.0,1.202545,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,1.0,0.017544,0.0,0.132453,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,53.0,0.929825,1.0,2.381923,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,160.0,2.807018,1.0,10.664424,44.0,0.771930,0.0,3.207233,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,57.029118,1.000511,0.146962,4.949856,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.002624,0.000046,0.0,0.000348,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,25.448040,0.446457,0.034116,2.243222,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,8.964680,0.157275,0.023619,0.672297,1.102215,0.019337,0.0,0.101425,0.000000,0.000000,0.000000,0.000000,0.020090,0,0.000000,53,0.929825,0,0.000000,0,0.000000,0,0.000000,0,0.000000,1,0.017544,0,0.000000,0,0.000000,32,0.561404,0,0.000000,0,0.000000,46,0.807018,16,0.280702,0,0.000000
20240118_C2-117_quant,20230622_C2-117_unconditioned_well 11_cell 3_25nM TG_Linear unmixing_0_cmle.ome,mito,457,238.443174,0.521757,0.204697,1.226899,3275.444656,7.167275,3.141593,15.261926,17.019415,15.328325,5.622062,0.837970,0.731202,0.349802,0.402071,0.416667,0.168114,0.875274,1.0,0.757840,,0.814815,,1.819279,1.451167,1.112839,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,874.0,1.912473,1.0,3.805726,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,1.0,0.002188,0.0,0.046778,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,137.0,0.299781,0.0,0.991112,0.0,0.000000,0.0,0.000000,160.0,0.350109,0.0,1.332771,0.0,0.000000,0.0,0.000000,249.0,0.544858,0.0,2.066509,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,110.140125,0.241007,0.110221,0.520770,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.018370,0.000040,0.0,0.000859,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,3.162307,0.006920,0.000000,0.032338,0.000000,0.000000,0.0,0.000000,8.964680,0.019616,0.000000,0.101550,0.000000,0.000000,0.0,0.000000,4.739524,0.010371,0.000000,0.051541,0.071860,0,0.000000,0,0.000000,448,0.980306,0,0.000000,0,0.000000,0,0.000000,0,0.000000,1,0.002188,0,0.000000,0,0.000000,79,0.172867,0,0.000000,70,0.153173,0,0.000000,96,0.210066
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,LD,17,0.558980,0.032881,0.018370,0.046188,14.644310,0.861430,0.680771,0.808391,43.297162,37.058370,17.339279,0.336667,0.327370,0.149991,0.834320,1.000000,0.209847,1.000000,1.0,0.000000,,inf,,0.332167,0.270270,0.283924,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,5.0,0.294118,0.0,0.469668,0.0,0.000000,0.0,0.000000,2.0,0.117647,0.0,0.332106,2.0,0.117647,0.0,0.332106,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.039365,0.002316,0.000000,0.005786,0.000000,0.000000,0.0,0.000000,0.005249,0.000309,0.0,0.000872,0.007873,0.000463,0.0,0.001387,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000121,0,0.000000,0,0.000000,0,0.000000,0,0.000000,5,0.294118,0,0.000000,2,0.117647,2,0.117647,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,golgi,87,85.188081,0.979173,0.099724,5.659629,733.118287,8.426647,1.977620,38.938999,21.158273,20.387615,8.290973,0.744922,0.575352,0.565635,0.515791,0.521429,0.221074,0.885057,1.0,0.868376,,0.867925,,1.645027,0.903063,3.229610,152.0,1.747126,1.0,5.302957,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,125.0,1.436782,1.0,3.820694,216.0,2.482759,1.0,11.803490,50.0,0.574713,0.0,1.499086,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,41.322557,0.474972,0.070857,2.444326,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,13.580861,0.156102,0.068232,0.541189,4.038830,0.046423,0.005249,0.242042,1.511609,0.017375,0.0,0.066861,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.018454,76,0.873563,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,0,0.000000,77,0.885057,52,0.597701,28,0.321839,0,0.000000,0,0.000000,0,0.000000
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,lyso,213,199.996873,0.938952,0.236189,2.668875,1571.991500,7.380242,3.469454,14.867220,16.504594,14.449669,8.710405,0.906029,0.766926,0.523834,0.449229,0.444444,0.173721,0.924883,1.0,0.438684,,0.814815,,1.591670,1.307996,1.363472,0.0,0.000000,0.0,0.000000,234.0,1.098592,1.0,0.610172,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,2.0,0.009390,0.0,0.096672,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,125.0,0.586854,0.0,1.565614,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,478.0,2.244131,1.0,3.862803,89.0,0.417840,0.0,1.532374,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,138.409309,0.649809,0.154835,1.900375,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.005249,0.000025,0.0,0.000254,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,13.580861,0.063760,0.000000,0.261585,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,40.970898,0.192352,0.047238,0.543824,2.369762,0.011126,0.0,0.052907,0.000000,0.000000,0.000000,0.000000,0.043324,0,0.000000,204,0.957746,0,0.000000,0,0.000000,0,0.000000,0,0.000000,2,0.009390,0,0.000000,0,0.000000,62,0.291080,0,0.000000,0,0.000000,171,0.802817,37,0.173709,0,0.000000
20240118_C2-123_quant,20230817_C2-123_unconditioned_well 4_cell 1_untreated_Linear unmixing_0_cmle.ome,mito,353,491.501191,1.392355,0.191575,11.687704,6685.839277,18.940055,3.432852,145.018530,20.345043,17.705478,7.037690,0.874724,0.715232,0.591904,0.326103,0.320513,0.185053,0.053824,1.0,12.641694,,0.716049,,2.796367,1.427905,4.637126,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,564.0,1.597734,1.0,3.880633,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,2.0,0.005666,0.0,0.075164,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,216.0,0.611898,0.0,9.102920,0.0,0.000000,0.0,0.000000,478.0,1.354108,0.0,14.022986,0.0,0.000000,0.0,0.000000,371.0,1.050992,0.0,9.995892,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,367.772333,1.041848,0.115470,9.443933,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.007873,0.000022,0.0,0.000312,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,4.038830,0.011441,0.000000,0.165878,0.000000,0.000000,0.0,0.000000,40.970898,0.116065,0.000000,1.432483,0.000000,0.000000,0.0,0.000000,9.038161,0.025604,0.000000,0.248840,0.106471,0,0.000000,0,0.000000,346,0.980170,0,0.000000,0,0.000000,0,0.000000,0,0.000000,2,0.005666,0,0.000000,0,0.000000,39,0.110482,0,0.000000,82,0.232295,0,0.000000,88,0.249292


## **`6` - Unstack and finalize summary stats tables**

- unstack and reorder organelle morphology summary table columns

In [15]:
###################
# flatten datasheets and combine
# TODO: restructure this so that all of the datasheets and unstacked and then reorded based on shared level 0 columns before flattening
###################
# org flattening
org_final = org_summary.unstack(-1)
for col in org_final.columns:
    if col[1] in ('count_in', 'num_fraction_in') or col[0].endswith(('_count', '_volume')):
        if col[2] not in col[0]:
            org_final.drop(col,axis=1, inplace=True)
new_col_order = ['dataset', 'image_name', 'object', 'volume', 'surface_area', 'SA_to_volume_ratio', 
                'equivalent_diameter', 'extent', 'euler_number', 'solidity', 'axis_major_length', 
                'ERXLD', 'ERXLD_count', 'ERXLD_volume', 'golgiXER', 'golgiXER_count', 'golgiXER_volume', 
                'golgiXLD', 'golgiXLD_count', 'golgiXLD_volume', 'golgiXperox', 'golgiXperox_count', 'golgiXperox_volume', 
                'lysoXER', 'lysoXER_count', 'lysoXER_volume', 'lysoXLD', 'lysoXLD_count', 'lysoXLD_volume', 
                'lysoXgolgi', 'lysoXgolgi_count', 'lysoXgolgi_volume', 'lysoXmito', 'lysoXmito_count', 'lysoXmito_volume', 
                'lysoXperox', 'lysoXperox_count', 'lysoXperox_volume', 'mitoXER', 'mitoXER_count', 'mitoXER_volume', 
                'mitoXLD', 'mitoXLD_count', 'mitoXLD_volume', 'mitoXgolgi', 'mitoXgolgi_count', 'mitoXgolgi_volume', 
                'mitoXperox', 'mitoXperox_count', 'mitoXperox_volume', 'peroxXER', 'peroxXER_count', 'peroxXER_volume', 
                'peroxXLD', 'peroxXLD_count', 'peroxXLD_volume']
new_cols = org_final.columns.reindex(new_col_order, level=0)
org_final = org_final.reindex(columns=new_cols[0])
org_final.columns = ["_".join((col_name[-1], col_name[1], col_name[0])) for col_name in org_final.columns.to_flat_index()]

- fill "NaN" values with 0 when necessary to final organelle morphology summary table

In [None]:
#renaming, filling "NaN" with 0 when needed, and removing ER_std columns
for col in org_final.columns:
    if '_count_in_' or '_fraction_in_' in col:
        org_final[col] = org_final[col].fillna(0)
    if col.endswith(("_count_volume","_sum_volume", "_mean_volume", "_median_volume")):
        org_final[col] = org_final[col].fillna(0)
    if col.endswith("_count_volume"):
        org_final.rename(columns={col:col.split("_")[0]+"_count"}, inplace=True)
    if col.startswith("ER_std_"):
        org_final.drop(columns=[col], inplace=True)
org_final = org_final.reset_index()

- unstack and reorder organelle interactions summary table columns

In [None]:
# contacts flattened
contact_final = contact_summary.unstack(-1)
contact_final.columns = ["_".join((col_name[-1], col_name[1], col_name[0])) for col_name in contact_final.columns.to_flat_index()]

- fill "NaN" values with 0 when necessary to final organelle interactions summary table

In [None]:
#renaming and filling "NaN" with 0 when needed
for col in contact_final.columns:
    if col.endswith(("_count_volume","_sum_volume", "_mean_volume", "_median_volume")):
        contact_final[col] = contact_final[col].fillna(0)
    if col.endswith("_count_volume"):
        contact_final.rename(columns={col:col.split("_")[0]+"_count"}, inplace=True)
contact_final = contact_final.reset_index()

- unstack and reorder distribution measurements summary table columns to create finalized table

In [17]:
# distributions flattened
dist_final = dist_summary.unstack(-1)
dist_final.columns = ["_".join((col_name[1], col_name[0])) for col_name in dist_final.columns.to_flat_index()]
dist_final = dist_final.reset_index()

- unstack and reorder region morphology summary table columns

In [18]:
# regions flattened
regions_final = regions_summary.unstack(-1)
regions_final.columns = ["_".join((col_name[1], col_name[0])) for col_name in regions_final.columns.to_flat_index()]

ValueError: Index contains duplicate entries, cannot reshape

- add normalization to finalize region morphology summary table

In [None]:
# normalization added
regions_final['nuc_area_fraction'] = regions_final['nuc_volume'] / regions_final['cell_volume']
regions_final = regions_final.reset_index()

- combine all four tables to create a complete summary table

In [None]:
# combining them all
combined = pd.merge(org_final, contact_final, on=["dataset", "image_name"], how="outer")
combined = pd.merge(combined, dist_final, on=["dataset", "image_name"], how="outer")
combined = pd.merge(combined, regions_final, on=["dataset", "image_name"], how="outer").set_index(["dataset", "image_name"])
combined.columns = [col.replace('sum', 'total') for col in combined.columns]

## **`7` - Export summary stats tables as .csv files**

In [None]:
###################
# export summary sheets
###################

# location for the final csv files to be exported to
out_path="D:/Paper 1 - Neuron vs astrocyte multispectral/Data/RAW summary data - UPDATED20240712"

# prefix added to summary tables
out_preffix="20240712_10per_neuron_"

org_summary.to_csv(out_path + f"/{out_preffix}per_org_summarystats.csv")
contact_summary.to_csv(out_path + f"/{out_preffix}per_contact_summarystats.csv")
dist_summary.to_csv(out_path + f"/{out_preffix}distribution_summarystats.csv")
regions_summary.to_csv(out_path + f"/{out_preffix}per_region_summarystats.csv")
combined.to_csv(out_path + f"/{out_preffix}summarystats_combined.csv")

# ***EXECUTE FUNCTION PROTOTYPE***

## **Define prototype `_batch_summary_stats` function**

In [55]:
def _batch_summary_stats(csv_path_list: List[str],
                         out_path: str,
                         out_preffix: str):
    """" 
    csv_path_list: List[str],
        A list of path strings where .csv files to analyze are located.
    out_path: str,
        A path string where the summary data file will be output to
    out_preffix: str
        The prefix used to name the output file.    
    """
    ds_count = 0
    fl_count = 0
    ###################
    # Read in the csv files and combine them into one of each type
    ###################
    org_tabs = []
    contact_tabs = []
    dist_tabs = []
    region_tabs = []

    for loc in csv_path_list:
        ds_count = ds_count + 1
        loc=Path(loc)
        files_store = sorted(loc.glob("*.csv"))
        for file in files_store:
            fl_count = fl_count + 1
            stem = file.stem

            org = "organelles"
            contacts = "contacts"
            dist = "distributions"
            regions = "_regions"

            if org in stem:
                test_orgs = pd.read_csv(file, index_col=0)
                test_orgs.insert(0, "dataset", stem[:-11])
                org_tabs.append(test_orgs)
            if contacts in stem:
                test_contact = pd.read_csv(file, index_col=0)
                test_contact.insert(0, "dataset", stem[:-9])
                contact_tabs.append(test_contact)
            if dist in stem:
                test_dist = pd.read_csv(file, index_col=0)
                test_dist.insert(0, "dataset", stem[:-14])
                dist_tabs.append(test_dist)
            if regions in stem:
                test_regions = pd.read_csv(file, index_col=0)
                test_regions.insert(0, "dataset", stem[:-8])
                region_tabs.append(test_regions)
            
    org_df = pd.concat(org_tabs,axis=0, join='outer')
    contacts_df = pd.concat(contact_tabs,axis=0, join='outer')
    dist_df = pd.concat(dist_tabs,axis=0, join='outer')
    regions_df = pd.concat(region_tabs,axis=0, join='outer')

    ###################
    # adding new metrics to the original sheets
    ###################
    # TODO: include these labels when creating the original sheets
    contact_cnt = contacts_df[["dataset", "image_name", "object", "label", "volume"]]
    contact_cnt[["orgA", "orgB"]] = contact_cnt["object"].str.split('X', expand=True)
    contact_cnt[["A_ID", "B_ID"]] = contact_cnt["label"].str.split('_', expand=True)
    contact_cnt["A"] = contact_cnt["orgA"] +"_" + contact_cnt["A_ID"].astype(str)
    contact_cnt["B"] = contact_cnt["orgB"] +"_" + contact_cnt["B_ID"].astype(str)

    contact_cnt_percell = contact_cnt[["dataset", "image_name", "orgA", "A_ID", "object", "volume"]].groupby(["dataset", "image_name", "orgA", "A_ID", "object"]).agg(["count", "sum"])
    contact_cnt_percell.columns = ["_".join(col_name).rstrip('_') for col_name in contact_cnt_percell.columns.to_flat_index()]
    unstacked = contact_cnt_percell.unstack(level='object')
    unstacked.columns = ["_".join(col_name).rstrip('_') for col_name in unstacked.columns.to_flat_index()]
    unstacked = unstacked.reset_index()
    for col in unstacked.columns:
        if col.startswith("volume_count_"):
            newname = col.split("_")[-1] + "_count"
            unstacked.rename(columns={col:newname}, inplace=True)
        if col.startswith("volume_sum_"):
            newname = col.split("_")[-1] + "_volume"
            unstacked.rename(columns={col:newname}, inplace=True)
    unstacked.rename(columns={"orgA":"object", "A_ID":"label"}, inplace=True)
    unstacked.set_index(['dataset', 'image_name', 'object', 'label'])

    contact_percellB = contact_cnt[["dataset", "image_name", "orgB", "B_ID", "object", "volume"]].groupby(["dataset", "image_name", "orgB", "B_ID", "object"]).agg(["count", "sum"])
    contact_percellB.columns = ["_".join(col_name).rstrip('_') for col_name in contact_percellB.columns.to_flat_index()]
    unstackedB = contact_percellB.unstack(level='object')
    unstackedB.columns = ["_".join(col_name).rstrip('_') for col_name in unstackedB.columns.to_flat_index()]
    unstackedB = unstackedB.reset_index()
    for col in unstackedB.columns:
        if col.startswith("volume_count_"):
            newname = col.split("_")[-1] + "_count"
            unstackedB.rename(columns={col:newname}, inplace=True)
        if col.startswith("volume_sum_"):
            newname = col.split("_")[-1] + "_volume"
            unstackedB.rename(columns={col:newname}, inplace=True)
    unstackedB.rename(columns={"orgB":"object", "B_ID":"label"}, inplace=True)
    unstackedB.set_index(['dataset', 'image_name', 'object', 'label'])

    contact_cnt = pd.concat([unstacked, unstackedB], axis=0).sort_index(axis=0)
    contact_cnt = contact_cnt.groupby(['dataset', 'image_name', 'object', 'label']).sum().reset_index()
    contact_cnt['label']=contact_cnt['label'].astype("Int64")

    org_df = pd.merge(org_df, contact_cnt, how='left', on=['dataset', 'image_name', 'object', 'label'], sort=True)
    org_df[contact_cnt.columns] = org_df[contact_cnt.columns].fillna(0)

    ###################
    # summary stat group
    ###################
    group_by = ['dataset', 'image_name', 'object']
    sharedcolumns = ["SA_to_volume_ratio", "equivalent_diameter", "extent", "euler_number", "solidity", "axis_major_length"]
    ag_func_standard = ['mean', 'median', 'std']

    ###################
    # summarize shared measurements between org_df and contacts_df
    ###################
    org_cont_tabs = []
    for tab in [org_df, contacts_df]:
        tab1 = tab[group_by + ['volume']].groupby(group_by).agg(['count', 'sum'] + ag_func_standard)
        tab2 = tab[group_by + ['surface_area']].groupby(group_by).agg(['sum'] + ag_func_standard)
        tab3 = tab[group_by + sharedcolumns].groupby(group_by).agg(ag_func_standard)
        shared_metrics = pd.merge(tab1, tab2, 'outer', on=group_by)
        shared_metrics = pd.merge(shared_metrics, tab3, 'outer', on=group_by)
        org_cont_tabs.append(shared_metrics)

    org_summary = org_cont_tabs[0]
    contact_summary = org_cont_tabs[1]

    ###################
    # group metrics from regions_df similar to the above
    ###################
    regions_summary = regions_df[group_by + ['volume', 'surface_area'] + sharedcolumns].set_index(group_by)

    ###################
    # summarize extra metrics from org_df
    ###################
    columns2 = [col for col in org_df.columns if col.endswith(("_count", "_volume"))]
    contact_counts_summary = org_df[group_by + columns2].groupby(group_by).agg(['sum'] + ag_func_standard)
    org_summary = pd.merge(org_summary, contact_counts_summary, 'outer', on=group_by)#left_on=group_by, right_on=True)

    ###################
    # summarize distribution measurements
    ###################
    # organelle distributions
    hist_dfs = []
    for ind in range(0,len(dist_df.index)):
        selection = dist_df.iloc[[ind]] #    selection = dist_df.loc[[ind]]
        bins_df = pd.DataFrame()
        wedges_df = pd.DataFrame()
        Z_df = pd.DataFrame()
        CV_df = pd.DataFrame()

        bins_df[['bins', 'masks', 'obj']] = selection[['XY_bins', 'XY_mask_vox_cnt_perbin', 'XY_obj_vox_cnt_perbin']]
        wedges_df[['bins', 'masks', 'obj']] = selection[['XY_wedges', 'XY_mask_vox_cnt_perwedge', 'XY_obj_vox_cnt_perwedge']]
        Z_df[['bins', 'masks', 'obj']] = selection[['Z_slices', 'Z_mask_vox_cnt', 'Z_obj_vox_cnt']]

        dfs = [selection[['dataset', 'image_name', 'object']].reset_index()]
        for df, prefix in zip([bins_df, wedges_df, Z_df], ["XY_bins_", "XY_wedges_", "Z_slices_"]):
            single_df = pd.DataFrame(list(zip(df["bins"].values[0][1:-1].split(", "), 
                                            df["obj"].values[0][1:-1].split(", "), 
                                            df["masks"].values[0][1:-1].split(", "))), columns =['bins', 'obj', 'mask']).astype(int)
            
            if "Z_" in prefix:
                single_df =  single_df.drop(single_df[single_df['mask'] == 0].index)
                single_df['bins'] = (single_df["bins"]/max(single_df.bins)*9.99).apply(np.floor)+1
                single_df = single_df.groupby("bins").agg(['sum']).reset_index()
                single_df.columns = ['bins',"obj","mask"]
        
            single_df['mask_fract'] = single_df['mask']/single_df['mask'].max()
            # single_df['obj_normed_tocell'] = (single_df["obj"]*single_df["mask_fract"]).fillna(0)
            single_df['obj_perc_per_bin'] = (single_df["obj"] / single_df["obj"].sum())*100
            single_df['obj_portion_normed_tobin'] = (single_df["obj_perc_per_bin"]/single_df["mask_fract"]).fillna(0)

            sumstats_df = pd.DataFrame()

            s = single_df['bins'].repeat(single_df['obj_portion_normed_tobin']*100)

            sumstats_df['hist_mean']=[s.mean()]
            sumstats_df['hist_median']=[s.median()]
            if single_df['obj_portion_normed_tobin'].sum() != 0: sumstats_df['hist_mode']=[s.mode().iloc[0]]
            else: sumstats_df['hist_mode']=['NaN']
            sumstats_df['hist_min']=[s.min()]
            sumstats_df['hist_max']=[s.max()]
            sumstats_df['hist_range']=[s.max() - s.min()]
            sumstats_df['hist_stdev']=[s.std()]
            sumstats_df['hist_skew']=[s.skew()]
            sumstats_df['hist_kurtosis']=[s.kurtosis()]
            sumstats_df['hist_var']=[s.var()]
            sumstats_df.columns = [prefix+col for col in sumstats_df.columns]
            dfs.append(sumstats_df.reset_index())

        CV_df = pd.DataFrame(list(zip(selection["XY_obj_cv_perbin"].values[0][1:-1].split(", "))), columns =['CV']).astype(float)
        sumstats_CV_df = pd.DataFrame()
        sumstats_CV_df['XY_bin_CV_mean'] = CV_df.mean()
        sumstats_CV_df['XY_bin_CV_median'] = CV_df.median()
        sumstats_CV_df['XY_bin_CV_std'] = CV_df.std()
        dfs.append(sumstats_CV_df.reset_index().drop(['index'], axis=1))

        combined_df = pd.concat(dfs, axis=1).drop(columns="index")
        hist_dfs.append(combined_df)
    dist_org_summary = pd.concat(hist_dfs, ignore_index=True)
    dist_org_summary

    # nucleus distribution
    nuc_dist_df = dist_df[["dataset", "image_name", 
                        "XY_bins", "XY_center_vox_cnt_perbin", "XY_mask_vox_cnt_perbin",
                        "XY_wedges", "XY_center_vox_cnt_perwedge", "XY_mask_vox_cnt_perwedge",
                        "Z_slices", "Z_center_vox_cnt", "Z_mask_vox_cnt"]].set_index(["dataset", "image_name"])
    nuc_hist_dfs = []
    for idx in nuc_dist_df.index.unique():
        selection = nuc_dist_df.loc[idx].iloc[[0]].reset_index()
        bins_df = pd.DataFrame()
        wedges_df = pd.DataFrame()
        Z_df = pd.DataFrame()

        bins_df[['bins', 'center', 'masks']] = selection[['XY_bins', 'XY_center_vox_cnt_perbin', 'XY_mask_vox_cnt_perbin']]
        wedges_df[['bins', 'center', 'masks']] = selection[['XY_wedges', 'XY_center_vox_cnt_perwedge', 'XY_mask_vox_cnt_perwedge']]
        Z_df[['bins', 'center', 'masks']] = selection[['Z_slices', 'Z_center_vox_cnt', 'Z_mask_vox_cnt']]

        dfs = [selection[['dataset', 'image_name']]]
        for df, prefix in zip([bins_df, wedges_df, Z_df], ["XY_bins_", "XY_wedges_", "Z_slices_"]):
            single_df = pd.DataFrame(list(zip(df["bins"].values[0][1:-1].split(", "), 
                                            df["masks"].values[0][1:-1].split(", "),
                                            df["center"].values[0][1:-1].split(", "))), columns =['bins', 'mask', 'obj']).astype(int)

            if "Z_" in prefix:
                single_df =  single_df.drop(single_df[single_df['mask'] == 0].index)
                single_df['bins'] = (single_df["bins"]/max(single_df.bins)*9.99).apply(np.floor)+1
                single_df = single_df.groupby("bins").agg(['sum']).reset_index()
                single_df.columns = ['bins',"mask","obj"]
        
            single_df['mask_fract'] = single_df['mask']/single_df['mask'].max()
            # single_df['obj_normed_tocell'] = (single_df["obj"]*single_df["mask_fract"]).fillna(0)
            single_df['obj_perc_per_bin'] = (single_df["obj"] / single_df["obj"].sum())*100
            single_df['obj_portion_normed_tobin'] = (single_df["obj_perc_per_bin"]/single_df["mask_fract"]).fillna(0)

            sumstats_df = pd.DataFrame()

            s = single_df['bins'].repeat(single_df['obj_portion_normed_tobin']*100)

            sumstats_df['hist_mean']=[s.mean()]
            sumstats_df['hist_median']=[s.median()]
            if single_df['obj_portion_normed_tobin'].sum() != 0: sumstats_df['hist_mode']=[s.mode().iloc[0]]
            else: sumstats_df['hist_mode']=['NaN']
            sumstats_df['hist_min']=[s.min()]
            sumstats_df['hist_max']=[s.max()]
            sumstats_df['hist_range']=[s.max() - s.min()]
            sumstats_df['hist_stdev']=[s.std()]
            sumstats_df['hist_skew']=[s.skew()]
            sumstats_df['hist_kurtosis']=[s.kurtosis()]
            sumstats_df['hist_var']=[s.var()]
            sumstats_df.columns = [prefix+col for col in sumstats_df.columns]
            dfs.append(sumstats_df.reset_index())
        combined_df = pd.concat(dfs, axis=1).drop(columns="index")
        nuc_hist_dfs.append(combined_df)
    dist_center_summary = pd.concat(nuc_hist_dfs, ignore_index=True)
    dist_center_summary.insert(2, column="object", value="nuc")

    dist_summary = pd.concat([dist_org_summary, dist_center_summary], axis=0).set_index(group_by).sort_index()


    ###################
    # add normalization
    ###################
    # organelle area fraction
    area_fractions = []
    for idx in org_summary.index.unique():
        org_vol = org_summary.loc[idx][('volume', 'sum')]
        cell_vol = regions_summary.loc[idx[:-1] + ('cell',)]["volume"]
        afrac = org_vol/cell_vol
        area_fractions.append(afrac)
    org_summary[('volume', 'fraction')] = area_fractions
    # TODO: add in line to reorder the level=0 columns here

    # contact sites volume normalized
    norm_toA_list = []
    norm_toB_list = []
    for col in contact_summary.index:
        norm_toA_list.append(contact_summary.loc[col][('volume', 'sum')]/org_summary.loc[col[:-1]+(col[-1].split('X')[0],)][('volume', 'sum')])
        norm_toB_list.append(contact_summary.loc[col][('volume', 'sum')]/org_summary.loc[col[:-1]+(col[-1].split('X')[1],)][('volume', 'sum')])
    contact_summary[('volume', 'norm_to_A')] = norm_toA_list
    contact_summary[('volume', 'norm_to_B')] = norm_toB_list

    # number and area of individuals organelle involved in contact
    cont_cnt = org_df[group_by]
    cont_cnt[[col.split('_')[0] for col in org_df.columns if col.endswith(("_count"))]] = org_df[[col for col in org_df.columns if col.endswith(("_count"))]].astype(bool)
    cont_cnt_perorg = cont_cnt.groupby(group_by).agg('sum')
    cont_cnt_perorg.columns = pd.MultiIndex.from_product([cont_cnt_perorg.columns, ['count_in']])
    for col in cont_cnt_perorg.columns:
        cont_cnt_perorg[(col[0], 'num_fraction_in')] = cont_cnt_perorg[col].values/org_summary[('volume', 'count')].values
    cont_cnt_perorg.sort_index(axis=1, inplace=True)
    org_summary = pd.merge(org_summary, cont_cnt_perorg, on=group_by, how='outer')


    ###################
    # flatten datasheets and combine
    # TODO: restructure this so that all of the datasheets and unstacked and then reorded based on shared level 0 columns before flattening
    ###################
    # org flattening
    org_final = org_summary.unstack(-1)
    for col in org_final.columns:
        if col[1] in ('count_in', 'num_fraction_in') or col[0].endswith(('_count', '_volume')):
            if col[2] not in col[0]:
                org_final.drop(col,axis=1, inplace=True)
    new_col_order = ['dataset', 'image_name', 'object', 'volume', 'surface_area', 'SA_to_volume_ratio', 
                 'equivalent_diameter', 'extent', 'euler_number', 'solidity', 'axis_major_length', 
                 'ERXLD', 'ERXLD_count', 'ERXLD_volume', 'golgiXER', 'golgiXER_count', 'golgiXER_volume', 
                 'golgiXLD', 'golgiXLD_count', 'golgiXLD_volume', 'golgiXperox', 'golgiXperox_count', 'golgiXperox_volume', 
                 'lysoXER', 'lysoXER_count', 'lysoXER_volume', 'lysoXLD', 'lysoXLD_count', 'lysoXLD_volume', 
                 'lysoXgolgi', 'lysoXgolgi_count', 'lysoXgolgi_volume', 'lysoXmito', 'lysoXmito_count', 'lysoXmito_volume', 
                 'lysoXperox', 'lysoXperox_count', 'lysoXperox_volume', 'mitoXER', 'mitoXER_count', 'mitoXER_volume', 
                 'mitoXLD', 'mitoXLD_count', 'mitoXLD_volume', 'mitoXgolgi', 'mitoXgolgi_count', 'mitoXgolgi_volume', 
                 'mitoXperox', 'mitoXperox_count', 'mitoXperox_volume', 'peroxXER', 'peroxXER_count', 'peroxXER_volume', 
                 'peroxXLD', 'peroxXLD_count', 'peroxXLD_volume']
    new_cols = org_final.columns.reindex(new_col_order, level=0)
    org_final = org_final.reindex(columns=new_cols[0])
    org_final.columns = ["_".join((col_name[-1], col_name[1], col_name[0])) for col_name in org_final.columns.to_flat_index()]

    #renaming, filling "NaN" with 0 when needed, and removing ER_std columns
    for col in org_final.columns:
        if '_count_in_' or '_fraction_in_' in col:
            org_final[col] = org_final[col].fillna(0)
        if col.endswith(("_count_volume","_sum_volume", "_mean_volume", "_median_volume")):
            org_final[col] = org_final[col].fillna(0)
        if col.endswith("_count_volume"):
            org_final.rename(columns={col:col.split("_")[0]+"_count"}, inplace=True)
        if col.startswith("ER_std_"):
            org_final.drop(columns=[col], inplace=True)
    org_final = org_final.reset_index()

    # contacts flattened
    contact_final = contact_summary.unstack(-1)
    contact_final.columns = ["_".join((col_name[-1], col_name[1], col_name[0])) for col_name in contact_final.columns.to_flat_index()]

    #renaming and filling "NaN" with 0 when needed
    for col in contact_final.columns:
        if col.endswith(("_count_volume","_sum_volume", "_mean_volume", "_median_volume")):
            contact_final[col] = contact_final[col].fillna(0)
        if col.endswith("_count_volume"):
            contact_final.rename(columns={col:col.split("_")[0]+"_count"}, inplace=True)
    contact_final = contact_final.reset_index()

    # distributions flattened
    dist_final = dist_summary.unstack(-1)
    dist_final.columns = ["_".join((col_name[1], col_name[0])) for col_name in dist_final.columns.to_flat_index()]
    dist_final = dist_final.reset_index()

    # regions flattened & normalization added
    regions_final = regions_summary.unstack(-1)
    regions_final.columns = ["_".join((col_name[1], col_name[0])) for col_name in regions_final.columns.to_flat_index()]
    regions_final['nuc_area_fraction'] = regions_final['nuc_volume'] / regions_final['cell_volume']
    regions_final = regions_final.reset_index()

    # combining them all
    combined = pd.merge(org_final, contact_final, on=["dataset", "image_name"], how="outer")
    combined = pd.merge(combined, dist_final, on=["dataset", "image_name"], how="outer")
    combined = pd.merge(combined, regions_final, on=["dataset", "image_name"], how="outer").set_index(["dataset", "image_name"])
    combined.columns = [col.replace('sum', 'total') for col in combined.columns]

    ###################
    # export summary sheets
    ###################
    org_summary.to_csv(out_path + f"/{out_preffix}per_org_summarystats.csv")
    contact_summary.to_csv(out_path + f"/{out_preffix}per_contact_summarystats.csv")
    dist_summary.to_csv(out_path + f"/{out_preffix}distribution_summarystats.csv")
    regions_summary.to_csv(out_path + f"/{out_preffix}per_region_summarystats.csv")
    combined.to_csv(out_path + f"/{out_preffix}summarystats_combined.csv")

    print(f"Processing of {fl_count} files from {ds_count} dataset(s) is complete.")
    return f"{fl_count} files from {ds_count} dataset(s) were processed"

## **Run prototype `_batch_summary_stats` function**

In [57]:
out=_batch_summary_stats(csv_path_list=["E:/Experiments (C2-109 - C2-116)/Paper1-data_CHH_VD\ImagingData/VD_data/VD-0428/20230918_VD-0428_3D-analysis/20240328_VD-0428_quant",
                                        "E:/Experiments (C2-109 - C2-116)/Paper1-data_CHH_VD\ImagingData/VD_data/VD-0505/20230920_VD-0505_3D-analysis/20240401_VD-0505_quant",
                                        "E:/Experiments (C2-109 - C2-116)/Paper1-data_CHH_VD\ImagingData/VD_data/VD-0512/20230920_VD-0512_3D-analysis/20240404_VD-0512_quant",
                                        "E:/Experiments (C2-109 - C2-116)/Paper1-data_CHH_VD\ImagingData/VD_data/VD-0609/20230921_VD-0609_3D-analysis/20240415_VD-0609_quant"],
                         out_path="D:/Paper 1 - Neuron vs astrocyte multispectral/Data/RAW summary data - UPDATED20240712",
                         out_preffix="20240712_10per_neuron_")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  contact_cnt[["orgA", "orgB"]] = contact_cnt["object"].str.split('X', expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  contact_cnt[["orgA", "orgB"]] = contact_cnt["object"].str.split('X', expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  contact_cnt[["A_ID", "B_ID"]] = contact_c

Processing of 20 files from 5 dataset(s) is complete.
