This notebook performs profile aggregation.

In [None]:
import argparse
import pathlib
import sys

import pandas as pd
from pycytominer import aggregate

cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd
else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break
sys.path.append(str(root_dir / "utils"))
from notebook_init_utils import bandicoot_check, init_notebook
from segmentation_init_utils import parse_segmentation_args

root_dir, in_notebook = init_notebook()

profile_base_dir = bandicoot_check(
    pathlib.Path("/home/lippincm/mnt/bandicoot").resolve(), root_dir
)

In [None]:
if not in_notebook:
    args = parse_segmentation_args()
    patient = args["patient"]

else:
    patient = "SARCO361"

### Merge the sc and organoid profiles after aggregation
1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
2. The well level profiles are merged together
3. The consensus profiles are merged together


In [None]:
# group the import paths by the type of aggregation
#######################################################################
# 1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
organoid_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)
sc_agg_well_parent_organoid_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_parent_organoid_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_sc_agg_well_parent_organoid_level.parquet"
).resolve()

########################################################################

# 2. The well level profiles are merged together
sc_agg_well_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve(strict=True)

organoid_agg_well_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_merge_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_agg_well_level.parquet"
).resolve()

###################################################################################

# 3. The consensus profiles are merged together

sc_consensus_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_consensus.parquet"
).resolve(strict=True)

organoid_consensus_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_consensus.parquet"
).resolve(strict=True)

# output merged path
organoid_consensus_merge_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_consensus.parquet"
).resolve()


organoid_consensus_merge_path.parent.mkdir(parents=True, exist_ok=True)

###############################################################################

In [None]:
organoid_fs = pd.read_parquet(organoid_fs_path)
sc_agg_well_parent_organoid = pd.read_parquet(sc_agg_well_parent_organoid_path)
sc_agg_well_parent_organoid_merge = sc_agg_well_parent_organoid.merge(
    organoid_fs,
    left_on=[
        "Metadata_Well",
        "Metadata_parent_organoid",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic Categories",
    ],
    right_on=[
        "Metadata_Well",
        "Metadata_object_id",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic Categories",
    ],
)

sc_agg_well_parent_organoid_merge.to_parquet(
    organoid_agg_well_parent_organoid_path, index=False
)
sc_agg_well_parent_organoid_merge.head()

Unnamed: 0,Well,parent_organoid,treatment,Target,Class,Therapeutic Categories,Colocalization_Nuclei_AGP.ER_MAX.CORRELATION.COEFF,Colocalization_Nuclei_AGP.ER_MAX.OVERLAP.COEFF,Colocalization_Nuclei_AGP.ER_MAX.K1,Colocalization_Nuclei_AGP.ER_MIN.K2,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Correlation_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3
0,C11,20,Staurosporine,Apoptosis induction,Small Molecule,Experimental,1.45929,-0.188,0.112862,-0.748131,...,0.198379,-0.488562,1.112722,0.944171,0.666372,-0.533178,1.208587,0.531436,0.722882,0.700276
1,C2,21,Staurosporine,Apoptosis induction,Small Molecule,Experimental,0.497327,-0.1389,-0.201379,0.469727,...,-0.874566,-0.803983,-0.658658,-0.856086,-0.745645,-0.580098,-1.08858,-0.148757,-0.726909,-0.829121
2,C3,11,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,0.388381,-0.153897,-0.303701,0.3261,...,-0.651099,1.091993,-0.424172,0.023342,-0.079483,0.016535,-0.522679,-0.01555,-0.304982,-0.420016
3,C3,28,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,0.534654,-0.11321,-0.199108,0.335545,...,-0.623361,-0.865889,-0.149747,-0.803374,-0.75929,0.72867,-0.677638,0.602457,0.295029,-0.079904
4,C4,8,DMSO,Control,Control,Control,0.894751,-0.146389,0.302144,-0.23085,...,-0.394264,0.546128,-0.990453,-0.684949,-0.007817,-0.256007,0.09446,-1.009237,-0.845391,-0.469627


In [None]:
sc_agg_well = pd.read_parquet(sc_agg_well_path)
organoid_agg_well = pd.read_parquet(organoid_agg_well_path)
sc_agg_well_merge = sc_agg_well.merge(
    organoid_agg_well,
    on=[
        "Metadata_Well",
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic Categories",
    ],
)
sc_agg_well_merge.to_parquet(organoid_agg_well_merge_path, index=False)
sc_agg_well_merge.head()

Unnamed: 0,Well,treatment,Target,Class,Therapeutic Categories,Colocalization_Nuclei_AGP.ER_MAX.CORRELATION.COEFF,Colocalization_Nuclei_AGP.ER_MAX.OVERLAP.COEFF,Colocalization_Nuclei_AGP.ER_MAX.K1,Colocalization_Nuclei_AGP.ER_MIN.K2,Colocalization_Nuclei_AGP.ER_MEDIAN.MANDERS.COEFF.COSTES.M1,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Correlation_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,-0.052205,-0.163579,-0.120095,-0.026526,0.009154,...,-0.895035,0.474909,-0.76981,-0.476254,-0.388394,0.617448,-1.096899,-0.61864,-0.543824,-0.304456
1,C11,Staurosporine,Apoptosis induction,Small Molecule,Experimental,1.45929,-0.188,0.112862,-0.748131,0.751168,...,-0.250226,-0.322065,0.517574,-0.003127,0.007014,-0.021853,-0.222301,0.531436,0.437657,0.308168
2,C2,Staurosporine,Apoptosis induction,Small Molecule,Experimental,0.497327,-0.138156,-0.172283,0.469085,0.751336,...,-0.75624,-0.50978,-0.36301,-0.72241,-0.69043,0.035457,-0.88318,-0.473739,-0.470381,-0.592172
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,0.392228,-0.12508,-0.138325,0.583364,0.30062,...,-0.917505,-0.408504,-0.955972,-0.856135,-0.622091,0.016535,-0.677638,-0.966458,-0.705885,-0.420016
4,C4,DMSO,Control,Control,Control,0.790491,-0.144793,-0.045662,0.117338,0.751039,...,-0.469886,0.269059,-0.296264,-0.616152,-0.122494,-0.500012,0.279797,-0.385757,-0.707071,-0.495633


In [None]:
sc_consensus = pd.read_parquet(sc_consensus_path)
organoid_consensus = pd.read_parquet(organoid_consensus_path)
sc_consensus_merge = sc_consensus.merge(
    organoid_consensus,
    on=[
        "Metadata_treatment",
        "Metadata_Target",
        "Metadata_Class",
        "Metadata_Therapeutic Categories",
    ],
)
sc_consensus_merge.to_parquet(organoid_consensus_merge_path, index=False)
sc_consensus_merge.head()

Unnamed: 0,treatment,Target,Class,Therapeutic Categories,Colocalization_Nuclei_AGP.ER_MAX.CORRELATION.COEFF,Colocalization_Nuclei_AGP.ER_MAX.OVERLAP.COEFF,Colocalization_Nuclei_AGP.ER_MAX.K1,Colocalization_Nuclei_AGP.ER_MIN.K2,Colocalization_Nuclei_AGP.ER_MEDIAN.MANDERS.COEFF.COSTES.M1,Colocalization_Nuclei_AGP.Mito_MAX.CORRELATION.COEFF,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Correlation_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3
0,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,-0.327304,-0.169844,-0.111817,-0.273961,0.308977,0.367691,...,-0.515496,-0.379363,-0.270385,-0.473104,-0.292845,-0.105459,0.194937,-0.415425,-0.434973,-0.25278
1,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,-0.514006,-0.106318,-0.024729,0.717128,-0.320471,0.381282,...,-0.490876,0.017371,-0.419601,-0.394175,-0.177493,0.060618,0.199733,-0.598814,-0.264256,-0.031876
2,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor,0.131832,-0.145366,-0.215399,0.233404,0.635739,0.450845,...,-0.797712,-0.748342,-0.432698,-0.734517,-0.633432,-0.345864,-0.324022,-0.460562,-0.565164,-0.573768
3,DMSO,Control,Control,Control,0.034922,-0.165495,-0.169637,-0.180365,0.497068,0.140034,...,-0.17509,-0.335327,0.100941,-0.075388,-0.237172,-0.433695,0.377637,-0.035983,-0.227,-0.452151
4,Digoxin,Na+/K+ pump inhibitor,Small Molecule,Cardiac Glycosides,-0.571223,-0.152403,0.10318,-0.015262,-0.355886,-1.212844,...,-0.737107,-0.623579,-0.430571,-0.713981,-0.595866,-0.480576,-0.169313,-0.605395,-0.495751,-0.575939
