This notebook performs profile aggregation.

In [1]:
import argparse
import pathlib

import pandas as pd
from pycytominer import aggregate

# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="Patient ID to process, e.g. 'P01'",
    )
    args = argparser.parse_args()
    patient = args.patient

else:
    patient = "SARCO361"

### Merge the sc and organoid profiles after aggregation
1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
2. The well level profiles are merged together
3. The consensus profiles are merged together


In [3]:
# group the import paths by the type of aggregation
#######################################################################
# 1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
organoid_fs_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)
sc_agg_well_parent_organoid_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_parent_organoid_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_sc_agg_well_parent_organoid_level.parquet"
).resolve()

########################################################################

# 2. The well level profiles are merged together
sc_agg_well_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve(strict=True)

organoid_agg_well_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_merge_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_agg_well_level.parquet"
).resolve()

###################################################################################

# 3. The consensus profiles are merged together

sc_consensus_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_consensus.parquet"
).resolve(strict=True)

organoid_consensus_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_consensus.parquet"
).resolve(strict=True)

# output merged path
organoid_consensus_merge_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_consensus.parquet"
).resolve()


organoid_consensus_merge_path.parent.mkdir(parents=True, exist_ok=True)

###############################################################################

In [4]:
organoid_fs = pd.read_parquet(organoid_fs_path)
sc_agg_well_parent_organoid = pd.read_parquet(sc_agg_well_parent_organoid_path)
sc_agg_well_parent_organoid_merge = sc_agg_well_parent_organoid.merge(
    organoid_fs,
    left_on=[
        "Well",
        "parent_organoid",
        "treatment",
        "Target",
        "Class",
        "Therapeutic Categories",
    ],
    right_on=[
        "Well",
        "object_id",
        "treatment",
        "Target",
        "Class",
        "Therapeutic Categories",
    ],
)

sc_agg_well_parent_organoid_merge.to_parquet(
    organoid_agg_well_parent_organoid_path, index=False
)
sc_agg_well_parent_organoid_merge.head()

Unnamed: 0,Well,parent_organoid,treatment,Target,Class,Therapeutic Categories,Colocalization_Nuclei_AGP.BF_MEDIAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MAX.OVERLAP.COEFF,Colocalization_Nuclei_AGP.BF_MAX.K1,Colocalization_Nuclei_AGP.BF_MIN.MANDERS.COEFF.COSTES.M1,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_SURFACE.AREA
0,C11,20,Staurosporine,,,,-0.382848,-0.069718,0.774137,-0.128457,...,0.198379,-0.488562,0.944171,0.666372,-0.533178,0.722882,0.700276,2.33373,-0.544115,-0.642648
1,C2,21,Staurosporine,,,,0.626485,-0.183051,-0.380016,0.882015,...,-0.874566,-0.803983,-0.856086,-0.745645,-0.580098,-0.726909,-0.829121,-0.745075,1.240075,0.926309
2,C3,11,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,-0.461062,-0.166454,-0.145549,-1.138929,...,-0.651099,1.091993,0.023342,-0.079483,0.016535,-0.304982,-0.420016,-0.554384,0.110088,0.751681
3,C3,28,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,-0.82228,-0.141761,0.028849,-1.138929,...,-0.623361,-0.865889,-0.803374,-0.75929,0.72867,0.295029,-0.079904,-0.605582,0.288507,1.633384
4,C4,8,DMSO,Control,Control,Control,-0.085559,-0.138928,0.164228,0.882015,...,-0.394264,0.546128,-0.684949,-0.007817,-0.256007,-0.845391,-0.469627,-0.942827,-0.127804,-0.798269


In [5]:
sc_agg_well = pd.read_parquet(sc_agg_well_path)
organoid_agg_well = pd.read_parquet(organoid_agg_well_path)
sc_agg_well_merge = sc_agg_well.merge(
    organoid_agg_well,
    on=["Well", "treatment", "Target", "Class", "Therapeutic Categories"],
)
sc_agg_well_merge.to_parquet(organoid_agg_well_merge_path, index=False)
sc_agg_well_merge.head()

Unnamed: 0,Well,treatment,Target,Class,Therapeutic Categories,Colocalization_Nuclei_AGP.BF_MEDIAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MAX.OVERLAP.COEFF,Colocalization_Nuclei_AGP.BF_MAX.K1,Colocalization_Nuclei_AGP.BF_MIN.MANDERS.COEFF.COSTES.M1,Colocalization_Nuclei_AGP.ER_MAX.CORRELATION.COEFF,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_SURFACE.AREA
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,0.559485,-0.162935,-0.07312,0.882015,-0.052205,...,-0.895035,0.474909,-0.476254,-0.388394,0.617448,-0.543824,-0.304456,0.589525,-0.276486,-0.218371
1,C11,Staurosporine,,,,-0.382848,-0.069718,0.774137,-0.128457,1.45929,...,-0.250226,-0.322065,-0.003127,0.007014,-0.021853,0.437657,0.308168,2.33373,-0.544115,-0.806159
2,C2,Staurosporine,,,,-0.212091,-0.17093,-0.208814,-1.138929,0.497327,...,-0.75624,-0.50978,-0.72241,-0.69043,0.035457,-0.470381,-0.592172,-0.647005,1.240075,-0.663832
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,-0.123717,-0.161116,-0.072546,-1.138929,0.392228,...,-0.917505,-0.408504,-0.856135,-0.622091,0.016535,-0.705885,-0.420016,-0.554384,0.110088,-0.697587
4,C4,DMSO,Control,Control,Control,-0.137224,-0.171965,-0.178687,0.882015,0.790491,...,-0.469886,0.269059,-0.616152,-0.122494,-0.500012,-0.707071,-0.495633,-0.335279,-0.425169,-0.739511


In [6]:
sc_consensus = pd.read_parquet(sc_consensus_path)
organoid_consensus = pd.read_parquet(organoid_consensus_path)
sc_consensus_merge = sc_consensus.merge(
    organoid_consensus, on=["treatment", "Target", "Class", "Therapeutic Categories"]
)
sc_consensus_merge.to_parquet(organoid_consensus_merge_path, index=False)
sc_consensus_merge.head()

Unnamed: 0,treatment,Target,Class,Therapeutic Categories,Colocalization_Nuclei_AGP.BF_MEDIAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MAX.OVERLAP.COEFF,Colocalization_Nuclei_AGP.BF_MAX.K1,Colocalization_Nuclei_AGP.BF_MIN.MANDERS.COEFF.COSTES.M1,Colocalization_Nuclei_AGP.ER_MAX.CORRELATION.COEFF,Colocalization_Nuclei_AGP.ER_MAX.OVERLAP.COEFF,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_SURFACE.AREA
0,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,0.032436,-0.171403,-0.16752,0.882015,-0.327304,-0.169844,...,-0.515496,-0.379363,-0.473104,-0.292845,-0.105459,-0.434973,-0.25278,1.158796,-0.484642,-0.787673
1,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,-0.3649,-0.15786,-0.142479,-1.138929,-0.514006,-0.106318,...,-0.490876,0.017371,-0.394175,-0.177493,0.060618,-0.264256,-0.031876,-0.52106,-0.306223,-0.169901
2,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor,-0.168606,-0.17404,-0.221283,-1.120369,0.131832,-0.145366,...,-0.797712,-0.748342,-0.734517,-0.633432,-0.345864,-0.565164,-0.573768,0.625863,-0.484642,-0.922188
3,DMSO,Control,Control,Control,0.023943,-0.167206,-0.187586,0.882015,0.034922,-0.165495,...,-0.17509,-0.335327,-0.075388,-0.237172,-0.433695,-0.227,-0.452151,-0.315895,-0.425169,-0.256043
4,Digoxin,Na+/K+ pump inhibitor,Small Molecule,Cardiac Glycosides,-0.177425,-0.164186,-0.128185,-0.128457,-0.571223,-0.152403,...,-0.737107,-0.623579,-0.713981,-0.595866,-0.480576,-0.495751,-0.575939,1.664419,-0.484642,-0.809067
