This notebook performs profile aggregation.

In [1]:
import argparse
import os
import pathlib
import sys

import pandas as pd
from arg_parsing_utils import parse_args
from notebook_init_utils import bandicoot_check, init_notebook
from pycytominer import aggregate

root_dir, in_notebook = init_notebook()

profile_base_dir = bandicoot_check(
    pathlib.Path(os.path.expanduser("~/mnt/bandicoot/NF1_organoid_data")).resolve(),
    root_dir,
)

In [2]:
if not in_notebook:
    args = parse_args()
    patient = args["patient"]
    image_based_profiles_subparent_name = args["image_based_profiles_subparent_name"]

else:
    patient = "NF0014_T1"
    image_based_profiles_subparent_name = "image_based_profiles"

In [None]:
# pathing
sc_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.feature_selected_profiles/sc_fs.parquet"
).resolve(strict=True)
organoid_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)


# output path
sc_agg_well_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.aggregated_profiles/sc_agg_well_level.parquet"
).resolve()
sc_agg_well_parent_organoid_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve()
sc_consensus_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.aggregated_profiles/sc_consensus.parquet"
).resolve()

organoid_agg_well_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve()
organoid_consensus_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.aggregated_profiles/organoid_consensus.parquet"
).resolve()

organoid_consensus_output_path.parent.mkdir(parents=True, exist_ok=True)

In [4]:
# read in the data
sc_fs = pd.read_parquet(sc_fs_path)
organoid_fs = pd.read_parquet(organoid_fs_path)

### Aggregate the single cell profiles
We will aggregated with a few different stratifications:
1. Well
2. Well and parent organoid
3. Treatment - i.e. the consensus profile for each treatment

In [5]:
sc_fs.head()

Unnamed: 0,Metadata_patient_tumor,Metadata_object_id,Metadata_unit,Metadata_dose,Metadata_treatment,Metadata_image_set,Metadata_Well,Metadata_parent_organoid,Metadata_Target,Metadata_Class,...,Texture_Cytoplasm_Mito_Difference.Entropy_256.3,Texture_Cytoplasm_Mito_Difference.Variance_256.3,Texture_Cytoplasm_Mito_Entropy_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Cytoplasm_Mito_Inverse.Difference.Moment_256.3,Texture_Cytoplasm_Mito_Sum.Average_256.3,Texture_Cytoplasm_Mito_Sum.Entropy_256.3,Texture_Cytoplasm_Mito_Sum.Variance_256.3,Texture_Cytoplasm_Mito_Variance_256.3
0,NF0014_T1,61,uM,1,Fimepinostat,E5-2,E5,-1,PI3K and HDAC inhibitor,Small Molecule,...,-0.853288,0.666296,-0.744992,-1.727884,-0.795478,0.744107,-0.887714,-0.778888,-0.756731,-0.807663
1,NF0014_T1,114,uM,1,Fimepinostat,E5-2,E5,-1,PI3K and HDAC inhibitor,Small Molecule,...,0.234114,-0.146387,0.204914,-0.566253,0.5074,-0.180214,0.69977,0.221479,1.005875,1.00938
2,NF0014_T1,184,uM,1,Fimepinostat,E5-2,E5,-1,PI3K and HDAC inhibitor,Small Molecule,...,-0.292831,0.26572,-0.284591,-0.33757,-0.143982,0.291757,-0.415339,-0.290942,-0.489394,-0.508018
3,NF0014_T1,219,uM,1,Fimepinostat,E5-2,E5,-1,PI3K and HDAC inhibitor,Small Molecule,...,-0.292831,0.26572,-0.284591,-0.33757,-0.143982,0.291757,-0.415339,-0.290942,-0.489394,-0.508018
4,NF0014_T1,255,uM,1,Fimepinostat,E5-2,E5,-1,PI3K and HDAC inhibitor,Small Molecule,...,-0.341044,0.297917,-0.323744,-0.28141,-0.210875,0.324488,-0.010976,-0.334375,0.466926,0.503383


In [6]:
sc_metadata_columns = [x for x in sc_fs.columns if "Metadata" in x]

sc_metadata_columns += [
    "Area.Size.Shape_Cell_CENTER.X",
    "Area.Size.Shape_Cell_CENTER.Y",
    "Area.Size.Shape_Cell_CENTER.Z",
]
sc_features_columns = [col for col in sc_fs.columns if col not in sc_metadata_columns]
sc_features_df = sc_fs.drop(columns=sc_metadata_columns, errors="ignore")

In [7]:
# stratification approach #1
sc_well_agg = aggregate(
    population_df=sc_fs,
    strata=[
        "Metadata_Well",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=sc_features_columns,
    operation="median",
)
sc_well_agg.to_parquet(sc_agg_well_output_path, index=False)

# stratification approach #2
sc_well_parent_organoid_agg = aggregate(
    population_df=sc_fs,
    strata=[
        "Metadata_Well",
        "Metadata_parent_organoid",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=sc_features_columns,
    operation="median",
)
sc_well_parent_organoid_agg.to_parquet(
    sc_agg_well_parent_organoid_output_path, index=False
)
# stratification approach #3
sc_consensus = aggregate(  # a.k.a. consensus
    population_df=sc_fs,
    strata=[
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=sc_features_columns,
    operation="median",
)
sc_consensus.to_parquet(sc_consensus_output_path, index=False)

### Aggregate the organoid profiles
We will aggregated with a few different stratifications:
1. Well
2. Treatment - i.e. the consensus profile for each treatment

In [8]:
organoid_fs.head()

Unnamed: 0,Metadata_patient_tumor,Metadata_object_id,Metadata_unit,Metadata_dose,Metadata_treatment,Metadata_image_set,Metadata_Well,Metadata_single_cell_count,Metadata_Target,Metadata_Class,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,NF0014_T1,2,uM,1,Fimepinostat,E5-2,E5,,PI3K and HDAC inhibitor,Small Molecule,...,-0.7997,0.727493,-0.707564,1.408761,-0.908726,0.723078,-0.593903,-0.73303,-0.528032,-0.561465
1,NF0014_T1,4,uM,1,Fimepinostat,E5-2,E5,,PI3K and HDAC inhibitor,Small Molecule,...,-0.790156,0.72277,-0.701173,-0.081791,-0.791176,0.718448,-0.589805,-0.725724,-0.523691,-0.555467
2,NF0014_T1,7,uM,1,Fimepinostat,E5-2,E5,,PI3K and HDAC inhibitor,Small Molecule,...,-0.799328,0.727338,-0.707337,1.40876,-0.908707,0.722927,-0.593751,-0.732765,-0.527863,-0.561134
3,NF0014_T1,19,uM,1,Fimepinostat,E5-2,E5,,PI3K and HDAC inhibitor,Small Molecule,...,-0.797577,0.726521,-0.706253,1.309233,-0.902352,0.722125,-0.59309,-0.731511,-0.527244,-0.559971
4,NF0014_T1,23,uM,1,Fimepinostat,E5-2,E5,,PI3K and HDAC inhibitor,Small Molecule,...,0.990027,-0.917486,0.84717,-1.158874,1.342547,-0.94117,0.691929,0.914296,0.408973,0.434582


In [9]:
organoid_metadata_columns = [x for x in organoid_fs.columns if "Metadata" in x]
organoid_metadata_columns += [
    "Area.Size.Shape_Organoid_CENTER.X",
    "Area.Size.Shape_Organoid_CENTER.Y",
    "Area.Size.Shape_Organoid_CENTER.Z",
]
organoid_features_columns = [
    col for col in organoid_fs.columns if col not in organoid_metadata_columns
]
organoid_features_columns = [
    col for col in organoid_fs.columns if col not in organoid_metadata_columns
]
organoid_features_df = organoid_fs.drop(
    columns=organoid_metadata_columns, errors="ignore"
)

In [10]:
# stratification approach #1
organoid_well_agg = aggregate(
    population_df=organoid_fs,
    strata=[
        "Metadata_Well",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=organoid_features_columns,
    operation="median",
)
organoid_well_agg.to_parquet(organoid_agg_well_output_path, index=False)

# stratification approach #2
organoid_consensus = aggregate(  # a.k.a. consensus
    population_df=organoid_fs,
    strata=[
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    features=organoid_features_columns,
    operation="median",
)
organoid_consensus.to_parquet(organoid_consensus_output_path, index=False)

In [11]:
organoid_well_agg.head()

Unnamed: 0,Metadata_Well,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Organoid_VOLUME,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_EQUIVALENT.DIAMETER,Colocalization_Organoid_AGP.ER_MEAN.CORRELATION.COEFF,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,-0.591452,-1.230203,-0.12921,-1.113271,-1.3384,...,-0.798152,0.726798,-0.706602,1.173773,-0.897471,0.722397,-0.593127,-0.731915,-0.527009,-0.559613
1,C11,Staurosporine,Apoptosis induction,Small Molecule,Experimental,-0.501584,0.576996,-0.439983,-0.244405,1.078882,...,-0.536968,0.559472,-0.512897,-0.492253,-0.172738,0.558685,-0.417298,-0.520129,-0.297328,-0.282938
2,C2,Staurosporine,Apoptosis induction,Small Molecule,Experimental,-0.484823,1.111736,-0.439983,-0.187503,0.129828,...,0.776953,-0.674829,0.401309,0.20712,-0.05742,-0.6955,-0.047049,0.459399,-0.244794,-0.151237
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,-0.318224,0.132529,-0.422718,0.179282,0.229687,...,0.055229,-0.020141,-0.116865,0.162751,-0.30507,-0.023754,-0.284274,-0.085431,-0.37715,-0.372186
4,C4,DMSO,Control,Control,Control,-0.383653,0.847258,-0.439983,0.068458,-0.714151,...,-0.234563,0.272459,-0.323862,0.165533,-0.433194,0.271977,-0.318435,-0.309889,-0.307572,-0.339127


In [12]:
organoid_consensus.head()

Unnamed: 0,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Organoid_VOLUME,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_EQUIVALENT.DIAMETER,Colocalization_Organoid_AGP.ER_MEAN.CORRELATION.COEFF,Colocalization_Organoid_AGP.ER_MEDIAN.CORRELATION.COEFF,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,-0.38505,0.336724,-0.439983,0.065562,0.228735,0.228735,...,0.107344,-0.014896,-0.011921,-0.410368,-0.039506,-0.006378,-0.324999,0.021132,-0.34529,-0.325798
1,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,-0.373091,0.552631,-0.439983,0.08985,1.001985,1.001985,...,0.131717,-0.060752,-0.010866,-0.878018,0.388157,-0.063349,-0.078009,0.019869,-0.048137,0.017315
2,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor,-0.304822,0.300302,-0.43135,0.209167,0.96223,0.96223,...,0.294554,-0.221053,0.107023,-0.214056,0.455153,-0.225346,0.024684,0.146524,-0.066725,-0.00394
3,DMSO,Control,Control,Control,-0.45932,0.264174,-0.422718,-0.111352,0.218597,0.218597,...,-0.408411,0.438545,-0.421052,0.169433,-0.41651,0.439834,-0.445022,-0.425364,-0.417596,-0.413548
4,Digoxin,Na+/K+ pump inhibitor,Small Molecule,Cardiac Glycosides,-0.354641,0.648939,-0.439983,0.124979,0.362138,0.362138,...,0.110393,-0.061719,-0.024501,-0.227871,0.244182,-0.06293,-0.325151,0.004068,-0.396754,-0.372238
