This notebook performs profile aggregation.

In [None]:
import argparse
import os
import pathlib
import sys

import pandas as pd
from pycytominer import aggregate

cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd
else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break
sys.path.append(str(root_dir / "utils"))
from arg_parsing_utils import parse_args
from notebook_init_utils import bandicoot_check, init_notebook

root_dir, in_notebook = init_notebook()

profile_base_dir = bandicoot_check(
    pathlib.Path(os.path.expanduser("~/mnt/bandicoot/NF1_organoid_data")).resolve(),
    root_dir,
)

In [2]:
if not in_notebook:
    args = parse_args()
    patient = args["patient"]

else:
    patient = "NF0014_T1"

### Merge the sc and organoid profiles after aggregation
1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
2. The well level profiles are merged together
3. The consensus profiles are merged together


In [3]:
# group the import paths by the type of aggregation
#######################################################################
# 1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
organoid_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)
sc_agg_well_parent_organoid_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_parent_organoid_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_sc_agg_well_parent_organoid_level.parquet"
).resolve()

########################################################################

# 2. The well level profiles are merged together
sc_agg_well_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve(strict=True)

organoid_agg_well_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_merge_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_agg_well_level.parquet"
).resolve()

###################################################################################

# 3. The consensus profiles are merged together

sc_consensus_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_consensus.parquet"
).resolve(strict=True)

organoid_consensus_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_consensus.parquet"
).resolve(strict=True)

# output merged path
organoid_consensus_merge_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_consensus.parquet"
).resolve()


organoid_consensus_merge_path.parent.mkdir(parents=True, exist_ok=True)

###############################################################################

In [4]:
organoid_fs = pd.read_parquet(organoid_fs_path)
sc_agg_well_parent_organoid = pd.read_parquet(sc_agg_well_parent_organoid_path)
sc_agg_well_parent_organoid_merge = sc_agg_well_parent_organoid.merge(
    organoid_fs,
    left_on=[
        "Metadata_Well",
        "Metadata_parent_organoid",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
    right_on=[
        "Metadata_Well",
        "Metadata_object_id",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
)

sc_agg_well_parent_organoid_merge.to_parquet(
    organoid_agg_well_parent_organoid_path, index=False
)
sc_agg_well_parent_organoid_merge.head()

Unnamed: 0,Metadata_Well,Metadata_parent_organoid,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_EXTENT,Area.Size.Shape_Nuclei_EULER.NUMBER,Area.Size.Shape_Nuclei_EQUIVALENT.DIAMETER,...,Metadata_object_id,Metadata_unit,Metadata_dose,Metadata_image_set,Metadata_single_cell_count,Metadata_patient,Metadata_tumor,Area.Size.Shape_Organoid_CENTER.X,Area.Size.Shape_Organoid_CENTER.Y,Area.Size.Shape_Organoid_CENTER.Z
0,C10,20,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,...,20,uM,1,C10-2,2.0,NF0014,T1,842.243713,438.831757,12.009563
1,C10,44,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,...,44,uM,1,C10-1,4.0,NF0014,T1,699.257141,918.959412,22.504555
2,C11,24,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,...,24,nM,10,C11-2,3.0,NF0014,T1,712.351074,520.134216,11.981676
3,C11,30,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,...,30,nM,10,C11-1,5.0,NF0014,T1,407.227875,606.948059,13.108545
4,C2,6,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,...,6,nM,10,C2-2,5.0,NF0014,T1,965.528259,758.865051,2.804999


In [5]:
sc_agg_well = pd.read_parquet(sc_agg_well_path)
organoid_agg_well = pd.read_parquet(organoid_agg_well_path)
sc_agg_well_merge = sc_agg_well.merge(
    organoid_agg_well,
    on=[
        "Metadata_Well",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
)
sc_agg_well_merge.to_parquet(organoid_agg_well_merge_path, index=False)
sc_agg_well_merge.head()

Unnamed: 0,Metadata_Well,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_EXTENT,Area.Size.Shape_Nuclei_EULER.NUMBER,Area.Size.Shape_Nuclei_EQUIVALENT.DIAMETER,Area.Size.Shape_Nuclei_SURFACE.AREA,...,Texture_Cytoplasm_Mito_Difference.Entropy_256.3,Texture_Cytoplasm_Mito_Difference.Variance_256.3,Texture_Cytoplasm_Mito_Entropy_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Cytoplasm_Mito_Inverse.Difference.Moment_256.3,Texture_Cytoplasm_Mito_Sum.Average_256.3,Texture_Cytoplasm_Mito_Sum.Entropy_256.3,Texture_Cytoplasm_Mito_Sum.Variance_256.3,Texture_Cytoplasm_Mito_Variance_256.3
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,,...,,,,,,,,,,
1,C11,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,,...,,,,,,,,,,
2,C2,Staurosporine,Apoptosis induction,Small Molecule,Experimental,,,,,,...,,,,,,,,,,
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,-1.45226,0.982458,-0.421967,-2.111804,-0.454122,...,-0.266931,0.209412,-0.211973,-0.076199,-0.533754,0.197449,-0.415571,-0.241595,-0.679991,-0.724884
4,C4,DMSO,Control,Control,Control,,,,,,...,,,,,,,,,,


In [6]:
sc_consensus = pd.read_parquet(sc_consensus_path)
organoid_consensus = pd.read_parquet(organoid_consensus_path)
sc_consensus_merge = sc_consensus.merge(
    organoid_consensus,
    on=[
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic_Categories",
    ],
)
sc_consensus_merge.to_parquet(organoid_consensus_merge_path, index=False)
sc_consensus_merge.head()

Unnamed: 0,Metadata_treatment,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_EXTENT,Area.Size.Shape_Nuclei_EULER.NUMBER,Area.Size.Shape_Nuclei_EQUIVALENT.DIAMETER,Area.Size.Shape_Nuclei_SURFACE.AREA,Colocalization_Nuclei_AGP.ER_MEAN.CORRELATION.COEFF,...,Texture_Cytoplasm_Mito_Difference.Entropy_256.3,Texture_Cytoplasm_Mito_Difference.Variance_256.3,Texture_Cytoplasm_Mito_Entropy_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.1_256.3,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Cytoplasm_Mito_Inverse.Difference.Moment_256.3,Texture_Cytoplasm_Mito_Sum.Average_256.3,Texture_Cytoplasm_Mito_Sum.Entropy_256.3,Texture_Cytoplasm_Mito_Sum.Variance_256.3,Texture_Cytoplasm_Mito_Variance_256.3
0,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,,,,,,...,,,,,,,,,,
1,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,-0.507646,0.591561,-0.421967,-0.330198,-0.345086,0.175563,...,0.001961,0.02474,-0.006267,-0.655633,0.441171,0.026547,-0.083584,-0.00432,-0.218478,-0.219965
2,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor,-0.608475,0.129085,-0.421967,-0.454643,-0.316723,0.598941,...,-0.179814,0.1495,-0.15651,0.430072,-0.394568,0.142018,-0.29177,-0.165569,-0.50508,-0.523002
3,DMSO,Control,Control,Control,-0.086,0.367764,-0.421967,0.123959,-0.270009,0.067529,...,-0.164783,0.133793,-0.15325,0.017119,-0.28097,0.129386,-0.21306,-0.159389,-0.282345,-0.297657
4,Digoxin,Na+/K+ pump inhibitor,Small Molecule,Cardiac Glycosides,-0.242325,0.522359,-0.421967,-0.033767,-0.315468,-0.169515,...,-0.157031,0.129289,-0.140101,-0.057806,-0.224133,0.122743,-0.16482,-0.147147,-0.273195,-0.251743
