This notebook performs profile aggregation.

In [1]:
import argparse
import os
import pathlib
import sys

import pandas as pd
from arg_parsing_utils import parse_args
from notebook_init_utils import bandicoot_check, init_notebook
from pycytominer import aggregate

root_dir, in_notebook = init_notebook()

profile_base_dir = bandicoot_check(
    pathlib.Path(os.path.expanduser("~/mnt/bandicoot/NF1_organoid_data")).resolve(),
    root_dir,
)

In [2]:
if not in_notebook:
    args = parse_args()
    patient = args["patient"]
    image_based_profiles_subparent_name = args["image_based_profiles_subparent_name"]

else:
    patient = "NF0014_T1"
    image_based_profiles_subparent_name = "image_based_profiles"

### Merge the sc and organoid profiles after aggregation
1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
2. The well level profiles are merged together
3. The consensus profiles are merged together


In [3]:
# group the import paths by the type of aggregation
#######################################################################
# 1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
organoid_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)
sc_agg_well_parent_organoid_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_parent_organoid_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.merged_profiles/sc-organoid_sc_agg_well_parent_organoid_level.parquet"
).resolve()

########################################################################

# 2. The well level profiles are merged together
sc_agg_well_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve(strict=True)

organoid_agg_well_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_merge_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.merged_profiles/sc-organoid_agg_well_level.parquet"
).resolve()

###################################################################################

# 3. The consensus profiles are merged together

sc_consensus_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/sc_consensus.parquet"
).resolve(strict=True)

organoid_consensus_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/organoid_consensus.parquet"
).resolve(strict=True)

# output merged path
organoid_consensus_merge_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.merged_profiles/sc-organoid_consensus.parquet"
).resolve()


organoid_consensus_merge_path.parent.mkdir(parents=True, exist_ok=True)

###############################################################################

In [4]:
organoid_fs = pd.read_parquet(organoid_fs_path)
sc_agg_well_parent_organoid = pd.read_parquet(sc_agg_well_parent_organoid_path)
sc_agg_well_parent_organoid_merge = sc_agg_well_parent_organoid.merge(
    organoid_fs,
    left_on=[
        "Metadata_Well",
        "Metadata_parent_organoid",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    right_on=[
        "Metadata_Well",
        "Metadata_object_id",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
)

sc_agg_well_parent_organoid_merge.to_parquet(
    organoid_agg_well_parent_organoid_path, index=False
)
sc_agg_well_parent_organoid_merge.head()

Unnamed: 0,Metadata_Well,Metadata_parent_organoid,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_EXTENT,Area.Size.Shape_Nuclei_EULER.NUMBER,Area.Size.Shape_Nuclei_EQUIVALENT.DIAMETER,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,C10,44,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,...,1.365951,-1.211246,1.604928,-1.691238,1.896986,-1.16738,1.691978,1.503987,2.337016,2.258017
1,C10,77,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,...,-0.045236,0.174906,-0.050069,-1.438629,0.778702,0.182411,-0.010664,-0.046385,0.261849,0.272558
2,C11,24,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,...,0.824277,-0.697454,0.836412,-1.692977,1.572439,-0.69443,1.124395,0.85301,1.496822,1.454236
3,C2,5,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,...,1.554499,-1.618067,1.087642,0.006212,0.619088,-1.721398,0.500176,1.175875,-0.02065,0.093095
4,C2,5,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,...,0.537459,-0.2032,0.344058,-0.344484,0.576041,-0.168408,-0.163134,0.344895,-0.240919,-0.212026


In [5]:
sc_agg_well = pd.read_parquet(sc_agg_well_path)
organoid_agg_well = pd.read_parquet(organoid_agg_well_path)
sc_agg_well_merge = sc_agg_well.merge(
    organoid_agg_well,
    on=[
        "Metadata_Well",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
)
sc_agg_well_merge.to_parquet(organoid_agg_well_merge_path, index=False)
sc_agg_well_merge.head()

Unnamed: 0,Metadata_Well,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_EXTENT,Area.Size.Shape_Nuclei_EULER.NUMBER,Area.Size.Shape_Nuclei_EQUIVALENT.DIAMETER,Area.Size.Shape_Nuclei_SURFACE.AREA,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,,...,-0.798152,0.726798,-0.706602,1.173773,-0.897471,0.722397,-0.593127,-0.731915,-0.527009,-0.559613
1,C11,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,,...,-0.536968,0.559472,-0.512897,-0.492253,-0.172738,0.558685,-0.417298,-0.520129,-0.297328,-0.282938
2,C2,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,,...,0.776953,-0.674829,0.401309,0.20712,-0.05742,-0.6955,-0.047049,0.459399,-0.244794,-0.151237
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,0.064078,-0.282483,-0.347125,0.281758,-0.1486,...,0.055229,-0.020141,-0.116865,0.162751,-0.30507,-0.023754,-0.284274,-0.085431,-0.37715,-0.372186
4,C4,DMSO,Control,Control,Control,,,,,,...,-0.234563,0.272459,-0.323862,0.165533,-0.433194,0.271977,-0.318435,-0.309889,-0.307572,-0.339127


In [6]:
sc_consensus = pd.read_parquet(sc_consensus_path)
organoid_consensus = pd.read_parquet(organoid_consensus_path)
sc_consensus_merge = sc_consensus.merge(
    organoid_consensus,
    on=[
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
)
sc_consensus_merge.to_parquet(organoid_consensus_merge_path, index=False)
sc_consensus_merge.head()

Unnamed: 0,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_EXTENT,Area.Size.Shape_Nuclei_EULER.NUMBER,Area.Size.Shape_Nuclei_EQUIVALENT.DIAMETER,Area.Size.Shape_Nuclei_SURFACE.AREA,Colocalization_Nuclei_AGP.ER_MEAN.CORRELATION.COEFF,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,,,...,0.107344,-0.014896,-0.011921,-0.410368,-0.039506,-0.006378,-0.324999,0.021132,-0.34529,-0.325798
1,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,-1.354901,0.608619,-0.347125,-1.291328,-0.550939,1.482401,...,0.131717,-0.060752,-0.010866,-0.878018,0.388157,-0.063349,-0.078009,0.019869,-0.048137,0.017315
2,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor,-0.082168,0.039398,-0.347125,0.175471,-0.166325,0.244708,...,0.294554,-0.221053,0.107023,-0.214056,0.455153,-0.225346,0.024684,0.146524,-0.066725,-0.00394
3,DMSO,Control,Control,Control,-0.084579,0.306109,-0.347125,0.173661,-0.190014,0.192119,...,-0.408411,0.438545,-0.421052,0.169433,-0.41651,0.439834,-0.445022,-0.425364,-0.417596,-0.413548
4,Digoxin,Na+/K+ pump inhibitor,Small Molecule,Cardiac Glycosides,-0.378245,0.289705,-0.347125,-0.062721,-0.255781,-0.925656,...,0.110393,-0.061719,-0.024501,-0.227871,0.244182,-0.06293,-0.325151,0.004068,-0.396754,-0.372238
