This notebook performs profile aggregation.

In [1]:
import argparse
import os
import pathlib
import sys

import pandas as pd
from arg_parsing_utils import parse_args
from notebook_init_utils import bandicoot_check, init_notebook
from pycytominer import aggregate

root_dir, in_notebook = init_notebook()

profile_base_dir = bandicoot_check(
    pathlib.Path(os.path.expanduser("~/mnt/bandicoot/NF1_organoid_data")).resolve(),
    root_dir,
)

In [None]:
if not in_notebook:
    args = parse_args()
    patient = args["patient"]
    image_based_profiles_subparent_name = args["image_based_profiles_subparent_name"]

else:
    patient = "NF0014_T1"
    image_based_profiles_subparent_name = "image_based_profiles"

In [3]:
# pathing
sc_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/4.feature_selected_profiles/sc_fs.parquet"
).resolve(strict=True)
organoid_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)


# output path
sc_agg_well_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve()
sc_agg_well_parent_organoid_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve()
sc_consensus_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/sc_consensus.parquet"
).resolve()

organoid_agg_well_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve()
organoid_consensus_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/organoid_consensus.parquet"
).resolve()

organoid_consensus_output_path.parent.mkdir(parents=True, exist_ok=True)

In [4]:
# read in the data
sc_fs = pd.read_parquet(sc_fs_path)
organoid_fs = pd.read_parquet(organoid_fs_path)

### Aggregate the single cell profiles
We will aggregated with a few different stratifications:
1. Well
2. Well and parent organoid
3. Treatment - i.e. the consensus profile for each treatment

In [5]:
sc_fs.head()

Unnamed: 0,Metadata_patient_tumor,Metadata_object_id,Metadata_unit,Metadata_dose,Metadata_treatment,Metadata_image_set,Metadata_Well,Metadata_parent_organoid,Metadata_Target,Metadata_Class,...,Texture_Cytoplasm_Mito_Difference.Entropy_256.3,Texture_Cytoplasm_Mito_Difference.Variance_256.3,Texture_Cytoplasm_Mito_Entropy_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Cytoplasm_Mito_Inverse.Difference.Moment_256.3,Texture_Cytoplasm_Mito_Sum.Average_256.3,Texture_Cytoplasm_Mito_Sum.Entropy_256.3,Texture_Cytoplasm_Mito_Sum.Variance_256.3,Texture_Cytoplasm_Mito_Variance_256.3
0,NF0014_T1,255,uM,1,Fimepinostat,E5-2,E5,19,PI3K and HDAC inhibitor,Small Molecule,...,0.239311,-0.127454,0.168712,-1.395844,1.089556,-0.11375,0.598492,0.194094,1.482163,1.461434
1,NF0014_T1,25,uM,1,Fimepinostat,D5-2,D5,-1,PI3K and HDAC inhibitor,Small Molecule,...,-0.007093,0.034392,-0.025312,-0.979456,0.38783,0.035037,0.224652,-0.018084,0.921696,0.942498
2,NF0014_T1,51,uM,1,Fimepinostat,D5-2,D5,-1,PI3K and HDAC inhibitor,Small Molecule,...,-0.151367,0.126512,-0.136218,-0.348196,-0.148967,0.120089,-0.087394,-0.141902,0.149061,0.171092
3,NF0014_T1,63,uM,1,Fimepinostat,D5-2,D5,-1,PI3K and HDAC inhibitor,Small Molecule,...,0.18248,-0.0987,0.130216,-1.293351,0.954294,-0.08787,0.724839,0.139665,2.285482,2.302174
4,NF0014_T1,76,uM,1,Fimepinostat,D5-2,D5,-1,PI3K and HDAC inhibitor,Small Molecule,...,-0.11754,0.105099,-0.11387,-0.336922,-0.066265,0.100326,-0.030359,-0.11677,0.264447,0.300243


In [6]:
sc_metadata_columns = [x for x in sc_fs.columns if "Metadata" in x]

sc_metadata_columns += [
    "Area.Size.Shape_Cell_CENTER.X",
    "Area.Size.Shape_Cell_CENTER.Y",
    "Area.Size.Shape_Cell_CENTER.Z",
]
sc_features_columns = [col for col in sc_fs.columns if col not in sc_metadata_columns]
sc_features_df = sc_fs.drop(columns=sc_metadata_columns, errors="ignore")

In [7]:
# stratification approach #1
sc_well_agg = aggregate(
    population_df=sc_fs,
    strata=[
        "Metadata_Well",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=sc_features_columns,
    operation="median",
)
sc_well_agg.to_parquet(sc_agg_well_output_path, index=False)

# stratification approach #2
sc_well_parent_organoid_agg = aggregate(
    population_df=sc_fs,
    strata=[
        "Metadata_Well",
        "Metadata_parent_organoid",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=sc_features_columns,
    operation="median",
)
sc_well_parent_organoid_agg.to_parquet(
    sc_agg_well_parent_organoid_output_path, index=False
)
# stratification approach #3
sc_consensus = aggregate(  # a.k.a. consensus
    population_df=sc_fs,
    strata=[
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=sc_features_columns,
    operation="median",
)
sc_consensus.to_parquet(sc_consensus_output_path, index=False)

### Aggregate the organoid profiles
We will aggregated with a few different stratifications:
1. Well
2. Treatment - i.e. the consensus profile for each treatment

In [8]:
organoid_fs.head()

Unnamed: 0,Metadata_patient_tumor,Metadata_object_id,Metadata_unit,Metadata_dose,Metadata_treatment,Metadata_image_set,Metadata_Well,Metadata_single_cell_count,Metadata_Target,Metadata_Class,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,NF0014_T1,19,uM,1,Fimepinostat,E5-2,E5,1.0,PI3K and HDAC inhibitor,Small Molecule,...,-0.744079,0.929269,-0.876963,-0.149888,-0.453953,0.924721,-1.012954,-0.840821,-0.826647,-0.812516
1,NF0014_T1,15,uM,1,Fimepinostat,D5-2,D5,11.0,PI3K and HDAC inhibitor,Small Molecule,...,-0.018758,0.263903,-0.497285,1.297494,-1.614111,0.248123,-1.166291,-0.38611,-1.094204,-1.08214
2,NF0014_T1,47,uM,1,Ketotifen,G6-1,G6,9.0,histamine H1 receptor antagonist,Small Molecule,...,-0.825789,0.752942,-0.748346,0.050014,-0.465444,0.661318,-0.523751,-0.848608,-0.527859,-0.511635
3,NF0014_T1,40,uM,10,Mirdametinib,G8-1,G8,8.0,MEK1/2 inhibitor,Small Molecule,...,-1.928917,1.998597,-1.865807,-0.72397,-1.48379,2.00297,-1.380694,-1.952115,-0.974235,-0.986668
4,NF0014_T1,37,uM,10,Mirdametinib,C9-2,C9,13.0,MEK1/2 inhibitor,Small Molecule,...,1.306989,-0.62237,1.532605,0.813374,0.531467,-0.458024,-0.573092,1.218386,-0.648673,-0.629151


In [9]:
organoid_metadata_columns = [x for x in organoid_fs.columns if "Metadata" in x]
organoid_metadata_columns += [
    "Area.Size.Shape_Organoid_CENTER.X",
    "Area.Size.Shape_Organoid_CENTER.Y",
    "Area.Size.Shape_Organoid_CENTER.Z",
]
organoid_features_columns = [
    col for col in organoid_fs.columns if col not in organoid_metadata_columns
]
organoid_features_columns = [
    col for col in organoid_fs.columns if col not in organoid_metadata_columns
]
organoid_features_df = organoid_fs.drop(
    columns=organoid_metadata_columns, errors="ignore"
)

In [10]:
# stratification approach #1
organoid_well_agg = aggregate(
    population_df=organoid_fs,
    strata=[
        "Metadata_Well",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=organoid_features_columns,
    operation="median",
)
organoid_well_agg.to_parquet(organoid_agg_well_output_path, index=False)

# stratification approach #2
organoid_consensus = aggregate(  # a.k.a. consensus
    population_df=organoid_fs,
    strata=[
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=organoid_features_columns,
    operation="median",
)
organoid_consensus.to_parquet(organoid_consensus_output_path, index=False)

In [11]:
organoid_well_agg.head()

Unnamed: 0,Metadata_Well,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Organoid_VOLUME,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_EQUIVALENT.DIAMETER,Area.Size.Shape_Organoid_SURFACE.AREA,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,-1.907398,-0.774468,-0.091052,-2.910473,-1.359157,...,-2.200828,2.1928,-2.072028,-0.875531,-2.210441,2.119545,-0.947962,-2.229484,-0.216904,-0.223611
1,C11,Staurosporine,Apoptosis induction,Small Molecule,Experimental,-1.958855,0.275607,-0.40883,-2.635003,-1.355216,...,-2.016012,1.980768,-1.941205,-1.121747,-1.327253,1.900736,-0.719686,-2.041573,-0.066488,-0.075136
2,C2,Staurosporine,Apoptosis induction,Small Molecule,Experimental,-2.396214,0.798653,-0.40883,-4.098658,-1.703629,...,-1.294653,1.531299,-1.220701,-0.523885,-0.580973,1.60654,-1.21494,-1.33961,-0.8632,-0.869579
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,-0.315227,0.118466,0.206224,-0.885091,0.615943,...,0.356584,-0.423521,0.35367,0.180659,-0.344154,-0.874914,0.388202,0.414876,-0.360947,-0.325891
4,C4,DMSO,Control,Control,Control,0.612802,0.279199,-0.398579,0.662549,0.54784,...,1.080094,-1.139295,1.124611,0.693211,0.390285,-0.840632,-0.735402,1.101735,-1.033262,-1.030406


In [12]:
organoid_consensus.head()

Unnamed: 0,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Organoid_VOLUME,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_EQUIVALENT.DIAMETER,Area.Size.Shape_Organoid_SURFACE.AREA,Colocalization_Organoid_AGP.ER_MEAN.CORRELATION.COEFF,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,-1.72812,-0.369556,-0.40883,-2.119682,-1.08392,0.144596,...,-0.950405,1.237115,-0.916919,-0.536033,-0.509656,1.211934,-0.713008,-0.966048,-0.334656,-0.341451
1,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,-1.274364,0.175019,-0.40883,-1.38704,-1.05568,0.198763,...,-1.183392,1.307441,-1.107695,-0.330781,-0.135412,1.274671,-0.462022,-1.115253,-0.121476,-0.133688
2,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor,-1.711816,-0.426591,-0.388329,-2.128265,-1.137457,0.513982,...,-0.878585,0.888504,-1.097366,-0.06493,-0.428256,0.820093,-0.660566,-1.056182,-0.35308,-0.306369
3,DMSO,Control,Control,Control,-0.107466,-0.075439,-0.40883,0.030752,-0.458887,0.135426,...,-0.19356,0.252646,-0.401586,-0.332516,0.170799,0.179581,-0.215574,-0.313514,-0.446058,-0.429369
4,Digoxin,Na+/K+ pump inhibitor,Small Molecule,Cardiac Glycosides,-1.752131,-0.218225,-0.388329,-2.167267,-1.283841,-0.549066,...,-1.110601,1.091358,-0.894211,-0.555353,-0.090244,1.278373,-1.177798,-1.048933,-0.997766,-0.993813
