This notebook performs profile aggregation.

In [1]:
import argparse
import pathlib

import pandas as pd
from pycytominer import aggregate

# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")
try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="Patient ID to process, e.g. 'P01'",
    )
    args = argparser.parse_args()
    patient = args.patient

else:
    patient = "SARCO361"

In [3]:
# pathing
sc_fs_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/sc_fs.parquet"
).resolve(strict=True)
organoid_fs_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)


# output path
sc_agg_well_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve()
sc_agg_well_parent_organoid_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve()
sc_consensus_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_consensus.parquet"
).resolve()

organoid_agg_well_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve()
organoid_consensus_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_consensus.parquet"
).resolve()

organoid_consensus_output_path.parent.mkdir(parents=True, exist_ok=True)

In [4]:
# read in the data
sc_fs = pd.read_parquet(sc_fs_path)
organoid_fs = pd.read_parquet(organoid_fs_path)

### Aggregate the single cell profiles
We will aggregated with a few different stratifications:
1. Well
2. Well and parent organoid
3. Treatment - i.e. the consensus profile for each treatment

In [5]:
sc_fs.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Target,Class,Therapeutic Categories,Well,...,Granularity_Cytoplasm_Mito_GRANULARITY.1,Texture_Cytoplasm_BF_Angular.Second.Moment_256.3,Texture_Cytoplasm_BF_Information.Measure.of.Correlation.1_256.3,Texture_Cytoplasm_BF_Sum.Variance_256.3,Texture_Cytoplasm_DNA_Contrast_256.3,Texture_Cytoplasm_ER_Contrast_256.3,Texture_Cytoplasm_Mito_Correlation_256.3,Texture_Cytoplasm_Mito_Variance_256.3,Area.Size.Shape_Cytoplasm_EXTENT,Area.Size.Shape_Cytoplasm_EULER.NUMBER
0,SARCO361,19,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.506677,0.777007,-0.712073,-0.915372,-0.719147,-0.463082,0.34481,-0.91479,-0.836526,0.12712
1,SARCO361,39,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.005694,0.757187,-0.436066,-0.608477,-0.596133,1.034804,0.086968,0.654268,-1.300594,-4.544532
2,SARCO361,58,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.331001,1.170315,-1.135125,-0.903262,-1.096134,-0.902594,0.29377,-0.425628,0.529536,0.230934
3,SARCO361,78,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.438188,0.471413,-1.288118,-0.705702,-0.295453,0.036339,0.796643,0.908627,-0.478178,-1.430098
4,SARCO361,98,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.002442,1.051855,0.016516,-0.883929,-1.14358,-0.443873,-0.303698,-0.099879,-0.710484,-1.533912


In [6]:
sc_metadata_columns = [
    "patient",
    "object_id",
    "unit",
    "dose",
    "treatment",
    "Target",
    "Class",
    "Therapeutic Categories",
    "image_set",
    "Well",
    "parent_organoid",
]
sc_features_columns = [col for col in sc_fs.columns if col not in sc_metadata_columns]
sc_features_df = sc_fs.drop(columns=sc_metadata_columns, errors="ignore")

In [7]:
# stratification approach #1
sc_well_agg = aggregate(
    population_df=sc_fs,
    strata=["Well", "treatment", "Target", "Class", "Therapeutic Categories"],
    features=sc_features_columns,
    operation="median",
)
sc_well_agg.to_parquet(sc_agg_well_output_path, index=False)

# stratification approach #2
sc_well_parent_organoid_agg = aggregate(
    population_df=sc_fs,
    strata=[
        "Well",
        "parent_organoid",
        "treatment",
        "Target",
        "Class",
        "Therapeutic Categories",
    ],
    features=sc_features_columns,
    operation="median",
)
sc_well_parent_organoid_agg.to_parquet(
    sc_agg_well_parent_organoid_output_path, index=False
)
# stratification approach #3
sc_consensus = aggregate(  # a.k.a. consensus
    population_df=sc_fs,
    strata=["treatment", "Target", "Class", "Therapeutic Categories"],
    features=sc_features_columns,
    operation="median",
)
sc_consensus.to_parquet(sc_consensus_output_path, index=False)

### Aggregate the organoid profiles
We will aggregated with a few different stratifications:
1. Well
2. Treatment - i.e. the consensus profile for each treatment

In [8]:
organoid_fs.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Target,Class,Therapeutic Categories,Well,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER
0,SARCO361,45,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,1.545744,-0.675457,0.390916,-0.513617,6.944647,1.094299,4.284505,8.313099,-0.274677,0.356235
1,SARCO361,4,nM,10,Staurosporine,C11-2,Apoptosis induction,Small Molecule,Experimental,C11,...,0.701343,-0.817489,0.321596,-1.000235,3.471792,1.12111,3.747953,1.082282,2.066893,0.356235
2,SARCO361,27,uM,1,Everolimus,C5-6,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,3.267712,-1.38443,-0.226361,-0.824801,-1.341468,1.439365,1.430394,2.258404,2.453575,-0.926212
3,SARCO361,5,uM,1,Onalespib,C3-3,HSP90 inhibitor,Small Molecule,Investigational,C3,...,-1.263533,-0.932483,-0.992392,-1.107921,12.843089,-1.438886,-0.32673,3.720659,-0.451312,-0.071247
4,SARCO361,28,uM,1,Onalespib,C3-4,HSP90 inhibitor,Small Molecule,Investigational,C3,...,-0.706097,-1.540967,-0.888877,-1.305535,9.024406,0.128921,3.23974,3.342695,-0.671532,5.058541


In [9]:
organoid_metadata_columns = [
    "patient",
    "object_id",
    "unit",
    "dose",
    "treatment",
    "image_set",
    "Target",
    "Class",
    "Therapeutic Categories",
    "Well",
    "parent_organoid",
    "MOA",
]
organoidfeatures_columns = [
    col for col in organoid_fs.columns if col not in organoid_metadata_columns
]
organoid_features_df = organoid_fs.drop(columns=sc_metadata_columns, errors="ignore")

In [10]:
# stratification approach #1
organoid_well_agg = aggregate(
    population_df=organoid_fs,
    strata=["Well", "treatment", "Target", "Class", "Therapeutic Categories"],
    features=organoidfeatures_columns,
    operation="median",
)
organoid_well_agg.to_parquet(organoid_agg_well_output_path, index=False)

# stratification approach #2
organoid_consensus = aggregate(  # a.k.a. consensus
    population_df=organoid_fs,
    strata=["treatment", "Target", "Class", "Therapeutic Categories"],
    features=organoidfeatures_columns,
    operation="median",
)
organoid_consensus.to_parquet(organoid_consensus_output_path, index=False)

In [11]:
organoid_well_agg.head()

Unnamed: 0,Well,treatment,Target,Class,Therapeutic Categories,single_cell_count,Colocalization_Organoid_AGP.BF_MEAN.CORRELATION.COEFF,Colocalization_Organoid_AGP.BF_MEAN.MANDERS.COEFF.M2,Colocalization_Organoid_AGP.BF_MEDIAN.OVERLAP.COEFF,Colocalization_Organoid_AGP.BF_MEDIAN.MANDERS.COEFF.COSTES.M1,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,-0.902985,-0.778913,-0.793028,0.178981,...,-1.186113,0.242768,-0.394067,-0.771317,8.201545,-0.782291,0.250727,1.810413,0.443273,0.997459
1,C11,Staurosporine,Apoptosis induction,Small Molecule,Experimental,6.0,-0.095928,-0.495318,-0.879237,-0.75734,...,-0.046812,-0.817489,0.321596,-0.201793,3.471792,1.12111,3.747953,5.990788,2.07028,-0.926212
2,C2,Staurosporine,Apoptosis induction,Small Molecule,Experimental,1.0,0.33977,-1.448281,-0.997342,-0.484307,...,-0.940878,-1.067217,-0.766408,-1.206353,3.895786,-0.250086,0.51242,-0.152883,-0.710172,11.898257
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,6.0,-0.212266,-1.261935,-0.59621,3.514781,...,-1.225814,-0.932483,-0.968684,-1.107921,3.755802,-1.105964,-0.32673,1.021862,-0.623774,3.776094
4,C4,DMSO,Control,Control,Control,6.0,-0.030456,0.13006,-0.455398,-0.609894,...,-0.434924,-0.031085,-0.605679,-0.388329,-0.065785,-0.153162,-0.330956,0.505876,-0.419391,-0.071247


In [12]:
organoid_consensus.head()

Unnamed: 0,treatment,Target,Class,Therapeutic Categories,single_cell_count,Colocalization_Organoid_AGP.BF_MEAN.CORRELATION.COEFF,Colocalization_Organoid_AGP.BF_MEAN.MANDERS.COEFF.M2,Colocalization_Organoid_AGP.BF_MEDIAN.OVERLAP.COEFF,Colocalization_Organoid_AGP.BF_MEDIAN.MANDERS.COEFF.COSTES.M1,Colocalization_Organoid_AGP.ER_MIN.CORRELATION.COEFF,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER
0,DMSO,Control,Control,Control,6.0,-0.030456,0.13006,-0.455398,-0.609894,0.574481,...,-0.434924,-0.031085,-0.605679,-0.388329,-0.065785,-0.153162,-0.330956,0.505876,-0.419391,-0.071247
1,Everolimus,mTOR inhibitor,Small Molecule,Kinase Inhibitor,8.0,-0.193188,1.061743,-0.118095,48.871162,0.556567,...,2.062207,-1.292088,-0.056668,-0.669209,-1.219586,1.266832,1.334162,1.250306,1.756287,-0.926212
2,Imatinib,tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,,2.261743,6.382486,-0.955312,198.256778,-5.193207,...,-1.566427,-1.6528,-1.30257,-1.344844,-2.685136,-2.637659,-1.828158,-2.646947,1.420775,-0.498729
3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,6.0,-0.212266,-1.261935,-0.59621,3.514781,-3.245362,...,-1.225814,-0.932483,-0.968684,-1.107921,3.755802,-1.105964,-0.32673,1.021862,-0.623774,3.776094
4,Staurosporine,Apoptosis induction,Small Molecule,Experimental,3.5,-0.035707,-1.323333,-0.93829,-0.672737,-0.253388,...,-0.903557,-0.928239,-0.430579,-0.836819,3.683789,-0.16003,0.896262,2.705209,0.701789,-0.498729
