This notebook performs profile aggregation.

In [1]:
import argparse
import pathlib

import pandas as pd
from pycytominer import aggregate

# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="Patient ID to process, e.g. 'P01'",
    )
    args = argparser.parse_args()
    patient = args.patient

else:
    patient = "SARCO361"

### Merge the sc and organoid profiles after aggregation
1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
2. The well level profiles are merged together
3. The consensus profiles are merged together


In [3]:
# group the import paths by the type of aggregation
#######################################################################
# 1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
organoid_fs_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)
sc_agg_well_parent_organoid_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_parent_organoid_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_sc_agg_well_parent_organoid_level.parquet"
).resolve()

########################################################################

# 2. The well level profiles are merged together
sc_agg_well_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve(strict=True)

organoid_agg_well_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_merge_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_agg_well_level.parquet"
).resolve()

###################################################################################

# 3. The consensus profiles are merged together

sc_consensus_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_consensus.parquet"
).resolve(strict=True)

organoid_consensus_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_consensus.parquet"
).resolve(strict=True)

# output merged path
organoid_consensus_merge_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/6.merged_profiles/sc-organoid_consensus.parquet"
).resolve()


organoid_consensus_merge_path.parent.mkdir(parents=True, exist_ok=True)

###############################################################################

In [4]:
organoid_fs = pd.read_parquet(organoid_fs_path)
sc_agg_well_parent_organoid = pd.read_parquet(sc_agg_well_parent_organoid_path)
sc_agg_well_parent_organoid_merge = sc_agg_well_parent_organoid.merge(
    organoid_fs,
    left_on=[
        "Well",
        "parent_organoid",
        "treatment",
        "Target",
        "Class",
        "Therapeutic Categories",
    ],
    right_on=[
        "Well",
        "object_id",
        "treatment",
        "Target",
        "Class",
        "Therapeutic Categories",
    ],
)

sc_agg_well_parent_organoid_merge.to_parquet(
    organoid_agg_well_parent_organoid_path, index=False
)
sc_agg_well_parent_organoid_merge.head()

Unnamed: 0,Well,parent_organoid,treatment,Target,Class,Therapeutic Categories,Colocalization_Nuclei_AGP.BF_MEDIAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MAX.OVERLAP.COEFF,Colocalization_Nuclei_AGP.BF_MIN.MANDERS.COEFF.COSTES.M1,Colocalization_Nuclei_AGP.ER_MEDIAN.CORRELATION.COEFF,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER
0,C11,20,Staurosporine,Apoptosis induction,Small Molecule,Experimental,-0.225521,-0.153075,-0.061436,1.002403,...,0.745819,-1.038989,1.754503,0.74791,-0.311159,2.17373,4.764272,8.666428,2.07028,-0.926212
1,C2,21,Staurosporine,Apoptosis induction,Small Molecule,Experimental,0.766932,-0.311126,0.94054,0.021842,...,-1.149945,-1.458611,-0.96861,-1.285881,-0.658284,-0.701748,-0.401646,-1.769757,-0.801651,11.898257
2,C3,11,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,-0.302427,-0.28798,-1.063412,-0.089211,...,-0.755107,1.063709,0.361635,-0.326378,3.755802,-0.157946,1.101772,1.021862,-0.623774,3.776094
3,C3,28,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,-0.657605,-0.253544,-1.063412,0.059891,...,-0.706097,-1.540967,-0.888877,-1.305535,9.024406,0.128921,3.23974,3.342695,-0.671532,5.058541
4,C4,8,DMSO,Control,Control,Control,0.066796,-0.249594,0.94054,0.426949,...,-0.30131,0.337516,-0.709745,-0.223154,1.739445,-1.157694,-0.823819,0.683332,-0.986116,2.066164


In [5]:
sc_agg_well = pd.read_parquet(sc_agg_well_path)
organoid_agg_well = pd.read_parquet(organoid_agg_well_path)
sc_agg_well_merge = sc_agg_well.merge(
    organoid_agg_well,
    on=["Well", "treatment", "Target", "Class", "Therapeutic Categories"],
)
sc_agg_well_merge.to_parquet(organoid_agg_well_merge_path, index=False)
sc_agg_well_merge.head()

Unnamed: 0,Well,treatment,Target,Class,Therapeutic Categories,Colocalization_Nuclei_AGP.BF_MEDIAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MAX.OVERLAP.COEFF,Colocalization_Nuclei_AGP.BF_MIN.MANDERS.COEFF.COSTES.M1,Colocalization_Nuclei_AGP.ER_MEDIAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.ER_MIN.OVERLAP.COEFF,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,0.701053,-0.283072,0.94054,-0.538315,-0.414609,...,-1.186113,0.242768,-0.394067,-0.771317,8.201545,-0.782291,0.250727,1.810413,0.443273,0.997459
1,C11,Staurosporine,Apoptosis induction,Small Molecule,Experimental,-0.225521,-0.153075,-0.061436,1.002403,-0.450699,...,-0.046812,-0.817489,0.321596,-0.201793,3.471792,1.12111,3.747953,5.990788,2.07028,-0.926212
2,C2,Staurosporine,Apoptosis induction,Small Molecule,Experimental,-0.05762,-0.294223,-1.063412,0.021842,-0.377039,...,-0.940878,-1.067217,-0.766408,-1.206353,3.895786,-0.250086,0.51242,-0.152883,-0.710172,11.898257
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,0.029276,-0.280536,-1.063412,-0.085289,-0.357715,...,-1.225814,-0.932483,-0.968684,-1.107921,3.755802,-1.105964,-0.32673,1.021862,-0.623774,3.776094
4,C4,DMSO,Control,Control,Control,0.015995,-0.295665,0.94054,0.320674,-0.386847,...,-0.434924,-0.031085,-0.605679,-0.388329,-0.065785,-0.153162,-0.330956,0.505876,-0.419391,-0.071247


In [6]:
sc_consensus = pd.read_parquet(sc_consensus_path)
organoid_consensus = pd.read_parquet(organoid_consensus_path)
sc_consensus_merge = sc_consensus.merge(
    organoid_consensus, on=["treatment", "Target", "Class", "Therapeutic Categories"]
)
sc_consensus_merge.to_parquet(organoid_consensus_merge_path, index=False)
sc_consensus_merge.head()

Unnamed: 0,treatment,Target,Class,Therapeutic Categories,Colocalization_Nuclei_AGP.BF_MEDIAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MAX.OVERLAP.COEFF,Colocalization_Nuclei_AGP.BF_MIN.MANDERS.COEFF.COSTES.M1,Colocalization_Nuclei_AGP.ER_MEDIAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.ER_MIN.OVERLAP.COEFF,Colocalization_Nuclei_AGP.ER_MEAN.K1,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER
0,DMSO,Control,Control,Control,0.015995,-0.295665,0.94054,0.320674,-0.386847,-0.346198,...,-0.434924,-0.031085,-0.605679,-0.388329,-0.065785,-0.153162,-0.330956,0.505876,-0.419391,-0.071247
1,Everolimus,mTOR inhibitor,Small Molecule,Kinase Inhibitor,0.3195,-0.270872,0.94054,-0.920604,-0.445714,-0.391276,...,2.062207,-1.292088,-0.056668,-0.669209,-1.219586,1.266832,1.334162,1.250306,1.756287,-0.926212
2,Imatinib,tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,-0.934827,-0.10336,-1.063412,-2.047326,-0.132182,0.55696,...,-1.566427,-1.6528,-1.30257,-1.344844,-2.685136,-2.637659,-1.828158,-2.646947,1.420775,-0.498729
3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,0.029276,-0.280536,-1.063412,-0.085289,-0.357715,-0.39663,...,-1.225814,-0.932483,-0.968684,-1.107921,3.755802,-1.105964,-0.32673,1.021862,-0.623774,3.776094
4,Staurosporine,Apoptosis induction,Small Molecule,Experimental,-0.05762,-0.256303,-1.063412,0.609432,-0.446117,-0.299799,...,-0.903557,-0.928239,-0.430579,-0.836819,3.683789,-0.16003,0.896262,2.705209,0.701789,-0.498729
