This notebook performs profile aggregation.

In [1]:
import argparse
import pathlib

import pandas as pd
from pycytominer import aggregate

# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")
try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="Patient ID to process, e.g. 'P01'",
    )
    args = argparser.parse_args()
    patient = args.patient

else:
    patient = "SARCO361"

In [3]:
# pathing
sc_fs_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/sc_fs.parquet"
).resolve(strict=True)
organoid_fs_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)


# output path
sc_agg_well_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve()
sc_agg_well_parent_organoid_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve()
sc_consensus_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_consensus.parquet"
).resolve()

organoid_agg_well_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve()
organoid_consensus_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_consensus.parquet"
).resolve()

organoid_consensus_output_path.parent.mkdir(parents=True, exist_ok=True)

In [4]:
# read in the data
sc_fs = pd.read_parquet(sc_fs_path)
organoid_fs = pd.read_parquet(organoid_fs_path)

### Aggregate the single cell profiles
We will aggregated with a few different stratifications:
1. Well
2. Well and parent organoid
3. Treatment - i.e. the consensus profile for each treatment

In [5]:
sc_fs.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Target,Class,Therapeutic Categories,Well,...,Granularity_Cytoplasm_BF_GRANULARITY.2,Texture_Cytoplasm_AGP_Angular.Second.Moment_256.3,Texture_Cytoplasm_AGP_Sum.Variance_256.3,Texture_Cytoplasm_BF_Information.Measure.of.Correlation.1_256.3,Texture_Cytoplasm_BF_Sum.Variance_256.3,Texture_Cytoplasm_DNA_Contrast_256.3,Texture_Cytoplasm_DNA_Correlation_256.3,Texture_Cytoplasm_DNA_Sum.Variance_256.3,Area.Size.Shape_Cytoplasm_EXTENT,Area.Size.Shape_Cytoplasm_EULER.NUMBER
0,SARCO361,63,uM,1,Nilotinib,F6-4,tyrosine kinase inhibitor,Small Molecule,Investigational,F6,...,0.177022,-0.238446,-0.244723,-0.902567,0.672952,0.00228,0.034022,-0.210443,1.447654,0.029134
1,SARCO361,127,uM,1,Nilotinib,F6-4,tyrosine kinase inhibitor,Small Molecule,Investigational,F6,...,0.181449,-0.815101,0.477238,-1.425577,1.158644,0.226669,1.210875,0.752661,1.797869,-0.810362
2,SARCO361,191,uM,1,Nilotinib,F6-4,tyrosine kinase inhibitor,Small Molecule,Investigational,F6,...,0.166467,0.152044,-0.789297,1.194696,-0.133684,1.315882,-1.100122,-0.027451,0.303446,-0.250698
3,SARCO361,19,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.186102,0.778623,-0.414778,-0.551235,-0.906984,-0.89662,0.324211,-0.662453,-1.005302,0.308966
4,SARCO361,39,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.192541,0.761778,-0.081625,-0.278053,-0.662951,-0.77001,0.343953,-0.571636,-1.477648,-3.888512


In [6]:
sc_metadata_columns = [
    "patient",
    "object_id",
    "unit",
    "dose",
    "treatment",
    "Target",
    "Class",
    "Therapeutic Categories",
    "image_set",
    "Well",
    "parent_organoid",
]
sc_features_columns = [col for col in sc_fs.columns if col not in sc_metadata_columns]
sc_features_df = sc_fs.drop(columns=sc_metadata_columns, errors="ignore")

In [7]:
# stratification approach #1
sc_well_agg = aggregate(
    population_df=sc_fs,
    strata=["Well", "treatment", "Target", "Class", "Therapeutic Categories"],
    features=sc_features_columns,
    operation="median",
)
sc_well_agg.to_parquet(sc_agg_well_output_path, index=False)

# stratification approach #2
sc_well_parent_organoid_agg = aggregate(
    population_df=sc_fs,
    strata=[
        "Well",
        "parent_organoid",
        "treatment",
        "Target",
        "Class",
        "Therapeutic Categories",
    ],
    features=sc_features_columns,
    operation="median",
)
sc_well_parent_organoid_agg.to_parquet(
    sc_agg_well_parent_organoid_output_path, index=False
)
# stratification approach #3
sc_consensus = aggregate(  # a.k.a. consensus
    population_df=sc_fs,
    strata=["treatment", "Target", "Class", "Therapeutic Categories"],
    features=sc_features_columns,
    operation="median",
)
sc_consensus.to_parquet(sc_consensus_output_path, index=False)

### Aggregate the organoid profiles
We will aggregated with a few different stratifications:
1. Well
2. Treatment - i.e. the consensus profile for each treatment

In [8]:
organoid_fs.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Target,Class,Therapeutic Categories,Well,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_SURFACE.AREA
0,SARCO361,3,uM,1,Selumetinib,G10-7,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,G10,...,-1.109763,-0.947271,-1.076803,-0.786205,-0.851938,-1.127175,-0.957166,-0.848919,-0.187277,-1.168251
1,SARCO361,12,uM,1,Nilotinib,F6-4,tyrosine kinase inhibitor,Small Molecule,Investigational,F6,...,-1.107423,-0.912819,-1.075108,-0.780288,-0.833558,-1.12575,-0.952059,-1.24354,3.975833,-1.145283
2,SARCO361,3,uM,10,Binimetinib,C8-7,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,C8,...,-1.109387,-0.942641,-1.076499,-0.785398,-0.849212,-1.126907,-0.956399,-1.214684,2.7269,-1.155802
3,SARCO361,45,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.651113,-0.215303,0.0427,-0.209479,0.447558,0.588238,0.648497,-0.180141,-0.365696,2.178146
4,SARCO361,5,uM,10,Trametinib,F10-3,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,F10,...,-0.935827,-0.537656,-0.892315,-0.535576,-0.417425,-0.906514,-0.523087,2.951274,-0.544115,-1.131081


In [9]:
organoid_metadata_columns = [
    "patient",
    "object_id",
    "unit",
    "dose",
    "treatment",
    "image_set",
    "Target",
    "Class",
    "Therapeutic Categories",
    "Well",
    "parent_organoid",
    "MOA",
]
organoidfeatures_columns = [
    col for col in organoid_fs.columns if col not in organoid_metadata_columns
]
organoid_features_df = organoid_fs.drop(columns=sc_metadata_columns, errors="ignore")

In [10]:
# stratification approach #1
organoid_well_agg = aggregate(
    population_df=organoid_fs,
    strata=["Well", "treatment", "Target", "Class", "Therapeutic Categories"],
    features=organoidfeatures_columns,
    operation="median",
)
organoid_well_agg.to_parquet(organoid_agg_well_output_path, index=False)

# stratification approach #2
organoid_consensus = aggregate(  # a.k.a. consensus
    population_df=organoid_fs,
    strata=["treatment", "Target", "Class", "Therapeutic Categories"],
    features=organoidfeatures_columns,
    operation="median",
)
organoid_consensus.to_parquet(organoid_consensus_output_path, index=False)

In [11]:
organoid_well_agg.head()

Unnamed: 0,Well,treatment,Target,Class,Therapeutic Categories,single_cell_count,Colocalization_Organoid_AGP.BF_MAX.CORRELATION.COEFF,Colocalization_Organoid_AGP.BF_MIN.MANDERS.COEFF.M2,Colocalization_Organoid_AGP.BF_MIN.OVERLAP.COEFF,Colocalization_Organoid_AGP.BF_MIN.K2,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_SURFACE.AREA
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,-0.702124,-0.111418,-0.378393,0.975233,...,-0.895035,0.474909,-0.476254,-0.388394,0.617448,-0.543824,-0.304456,0.589525,-0.276486,-0.218371
1,C11,Staurosporine,,,,6.0,-0.142355,0.147213,-0.438721,0.772732,...,-0.250226,-0.322065,-0.003127,0.007014,-0.021853,0.437657,0.308168,2.33373,-0.544115,-0.806159
2,C2,Staurosporine,,,,1.0,0.159843,-0.721864,-0.521371,-0.097855,...,-0.75624,-0.50978,-0.72241,-0.69043,0.035457,-0.470381,-0.592172,-0.647005,1.240075,-0.663832
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,6.0,-0.223045,-0.551922,-0.24066,0.00251,...,-0.917505,-0.408504,-0.856135,-0.622091,0.016535,-0.705885,-0.420016,-0.554384,0.110088,-0.697587
4,C4,DMSO,Control,Control,Control,6.0,-0.096943,0.717542,-0.14212,-0.385034,...,-0.469886,0.269059,-0.616152,-0.122494,-0.500012,-0.707071,-0.495633,-0.335279,-0.425169,-0.739511


In [12]:
organoid_consensus.head()

Unnamed: 0,treatment,Target,Class,Therapeutic Categories,single_cell_count,Colocalization_Organoid_AGP.BF_MAX.CORRELATION.COEFF,Colocalization_Organoid_AGP.BF_MIN.MANDERS.COEFF.M2,Colocalization_Organoid_AGP.BF_MIN.OVERLAP.COEFF,Colocalization_Organoid_AGP.BF_MIN.K2,Colocalization_Organoid_AGP.BF_MEAN.MANDERS.COEFF.COSTES.M1,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_SURFACE.AREA
0,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,2.0,0.108556,-0.059387,-0.273709,-0.212329,-0.582425,...,-0.515496,-0.379363,-0.473104,-0.292845,-0.105459,-0.434973,-0.25278,1.158796,-0.484642,-0.787673
1,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,6.0,-0.242231,-0.509574,-0.20177,-0.189627,-0.580275,...,-0.490876,0.017371,-0.394175,-0.177493,0.060618,-0.264256,-0.031876,-0.52106,-0.306223,-0.169901
2,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor,3.0,0.152561,-0.507652,-0.604403,-0.324748,-0.524391,...,-0.797712,-0.748342,-0.734517,-0.633432,-0.345864,-0.565164,-0.573768,0.625863,-0.484642,-0.922188
3,DMSO,Control,Control,Control,5.5,0.007301,-0.260941,-0.223622,-0.290239,-0.578616,...,-0.17509,-0.335327,-0.075388,-0.237172,-0.433695,-0.227,-0.452151,-0.315895,-0.425169,-0.256043
4,Digoxin,Na+/K+ pump inhibitor,Small Molecule,Cardiac Glycosides,3.5,0.1486,0.371397,-0.452256,0.035579,-0.581458,...,-0.737107,-0.623579,-0.713981,-0.595866,-0.480576,-0.495751,-0.575939,1.664419,-0.484642,-0.809067
