This notebook performs profile aggregation.

In [1]:
import argparse
import os
import pathlib
import sys

import pandas as pd
from arg_parsing_utils import parse_args
from notebook_init_utils import bandicoot_check, init_notebook
from pycytominer import aggregate

root_dir, in_notebook = init_notebook()

profile_base_dir = bandicoot_check(
    pathlib.Path(os.path.expanduser("~/mnt/bandicoot/NF1_organoid_data")).resolve(),
    root_dir,
)

In [None]:
if not in_notebook:
    args = parse_args()
    patient = args["patient"]
    image_based_profiles_subparent_name = args["image_based_profiles_subparent_name"]

else:
    patient = "NF0014_T1"
    image_based_profiles_subparent_name = "image_based_profiles"

### Merge the sc and organoid profiles after aggregation
1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
2. The well level profiles are merged together
3. The consensus profiles are merged together


In [3]:
# group the import paths by the type of aggregation
#######################################################################
# 1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
organoid_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)
sc_agg_well_parent_organoid_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_parent_organoid_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.merged_profiles/sc-organoid_sc_agg_well_parent_organoid_level.parquet"
).resolve()

########################################################################

# 2. The well level profiles are merged together
sc_agg_well_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve(strict=True)

organoid_agg_well_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_merge_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.merged_profiles/sc-organoid_agg_well_level.parquet"
).resolve()

###################################################################################

# 3. The consensus profiles are merged together

sc_consensus_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/sc_consensus.parquet"
).resolve(strict=True)

organoid_consensus_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/5.aggregated_profiles/organoid_consensus.parquet"
).resolve(strict=True)

# output merged path
organoid_consensus_merge_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/6.merged_profiles/sc-organoid_consensus.parquet"
).resolve()


organoid_consensus_merge_path.parent.mkdir(parents=True, exist_ok=True)

###############################################################################

In [4]:
organoid_fs = pd.read_parquet(organoid_fs_path)
sc_agg_well_parent_organoid = pd.read_parquet(sc_agg_well_parent_organoid_path)
sc_agg_well_parent_organoid_merge = sc_agg_well_parent_organoid.merge(
    organoid_fs,
    left_on=[
        "Metadata_Well",
        "Metadata_parent_organoid",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    right_on=[
        "Metadata_Well",
        "Metadata_object_id",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
)

sc_agg_well_parent_organoid_merge.to_parquet(
    organoid_agg_well_parent_organoid_path, index=False
)
sc_agg_well_parent_organoid_merge.head()

Unnamed: 0,Metadata_Well,Metadata_parent_organoid,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_EXTENT,Area.Size.Shape_Nuclei_EULER.NUMBER,Area.Size.Shape_Nuclei_EQUIVALENT.DIAMETER,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,C10,20,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,...,-2.82737,2.712306,-2.76068,-0.768975,-3.965833,2.590806,-1.478506,-2.90744,-0.806448,-0.798866
1,C10,44,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,...,-1.574285,1.673293,-1.383376,-0.982086,-0.455049,1.648283,-0.417419,-1.551527,0.372641,0.351645
2,C11,24,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,...,-2.114656,2.067514,-2.039884,-1.277562,-1.414007,1.985814,-0.788816,-2.131792,-0.13421,-0.146381
3,C11,30,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,...,-1.917369,1.894022,-1.842526,-0.965931,-1.2405,1.815657,-0.650556,-1.951354,0.001235,-0.00389
4,C2,6,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,...,-1.294653,1.531299,-1.220701,-0.523885,-0.580973,1.60654,-1.21494,-1.33961,-0.8632,-0.869579


In [5]:
sc_agg_well = pd.read_parquet(sc_agg_well_path)
organoid_agg_well = pd.read_parquet(organoid_agg_well_path)
sc_agg_well_merge = sc_agg_well.merge(
    organoid_agg_well,
    on=[
        "Metadata_Well",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
)
sc_agg_well_merge.to_parquet(organoid_agg_well_merge_path, index=False)
sc_agg_well_merge.head()

Unnamed: 0,Metadata_Well,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_EXTENT,Area.Size.Shape_Nuclei_EULER.NUMBER,Area.Size.Shape_Nuclei_EQUIVALENT.DIAMETER,Area.Size.Shape_Nuclei_SURFACE.AREA,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,,...,-2.200828,2.1928,-2.072028,-0.875531,-2.210441,2.119545,-0.947962,-2.229484,-0.216904,-0.223611
1,C11,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,,...,-2.016012,1.980768,-1.941205,-1.121747,-1.327253,1.900736,-0.719686,-2.041573,-0.066488,-0.075136
2,C2,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,,...,-1.294653,1.531299,-1.220701,-0.523885,-0.580973,1.60654,-1.21494,-1.33961,-0.8632,-0.869579
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,-1.445965,1.067737,-0.421967,-2.089193,-0.437222,...,0.356584,-0.423521,0.35367,0.180659,-0.344154,-0.874914,0.388202,0.414876,-0.360947,-0.325891
4,C4,DMSO,Control,Control,Control,,,,,,...,1.080094,-1.139295,1.124611,0.693211,0.390285,-0.840632,-0.735402,1.101735,-1.033262,-1.030406


In [6]:
sc_consensus = pd.read_parquet(sc_consensus_path)
organoid_consensus = pd.read_parquet(organoid_consensus_path)
sc_consensus_merge = sc_consensus.merge(
    organoid_consensus,
    on=[
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
)
sc_consensus_merge.to_parquet(organoid_consensus_merge_path, index=False)
sc_consensus_merge.head()

Unnamed: 0,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_EXTENT,Area.Size.Shape_Nuclei_EULER.NUMBER,Area.Size.Shape_Nuclei_EQUIVALENT.DIAMETER,Area.Size.Shape_Nuclei_SURFACE.AREA,Colocalization_Nuclei_AGP.ER_MEAN.CORRELATION.COEFF,...,Texture_Organoid_Mito_Difference.Entropy_256.3,Texture_Organoid_Mito_Difference.Variance_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3
0,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,,,...,-0.950405,1.237115,-0.916919,-0.536033,-0.509656,1.211934,-0.713008,-0.966048,-0.334656,-0.341451
1,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,-0.327364,0.529431,-0.421967,-0.127189,-0.332779,-0.050066,...,-1.183392,1.307441,-1.107695,-0.330781,-0.135412,1.274671,-0.462022,-1.115253,-0.121476,-0.133688
2,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor,-0.684585,0.500752,-0.421967,-0.554352,-0.357909,0.474626,...,-0.878585,0.888504,-1.097366,-0.06493,-0.428256,0.820093,-0.660566,-1.056182,-0.35308,-0.306369
3,DMSO,Control,Control,Control,-0.051973,0.286682,-0.421967,0.156901,-0.268933,0.17627,...,-0.19356,0.252646,-0.401586,-0.332516,0.170799,0.179581,-0.215574,-0.313514,-0.446058,-0.429369
4,Digoxin,Na+/K+ pump inhibitor,Small Molecule,Cardiac Glycosides,-0.468722,0.476059,-0.421967,-0.284002,-0.32216,-0.017971,...,-1.110601,1.091358,-0.894211,-0.555353,-0.090244,1.278373,-1.177798,-1.048933,-0.997766,-0.993813
