This notebook performs profile aggregation.

In [None]:
import argparse
import pathlib
import sys

import pandas as pd
from pycytominer import aggregate

cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd
else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break
sys.path.append(str(root_dir / "utils"))
from notebook_init_utils import bandicoot_check, init_notebook
from segmentation_init_utils import parse_segmentation_args

root_dir, in_notebook = init_notebook()

profile_base_dir = bandicoot_check(
    pathlib.Path("/home/lippincm/mnt/bandicoot").resolve(), root_dir
)

In [None]:
if not in_notebook:
    args = parse_segmentation_args()
    patient = args["patient"]

else:
    patient = "SARCO361"

In [None]:
# pathing
sc_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/sc_fs.parquet"
).resolve(strict=True)
organoid_fs_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/4.feature_selected_profiles/organoid_fs.parquet"
).resolve(strict=True)


# output path
sc_agg_well_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_well_level.parquet"
).resolve()
sc_agg_well_parent_organoid_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_agg_parent_organoid_level.parquet"
).resolve()
sc_consensus_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/sc_consensus.parquet"
).resolve()

organoid_agg_well_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_agg_well_level.parquet"
).resolve()
organoid_consensus_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/image_based_profiles/5.aggregated_profiles/organoid_consensus.parquet"
).resolve()

organoid_consensus_output_path.parent.mkdir(parents=True, exist_ok=True)

In [4]:
# read in the data
sc_fs = pd.read_parquet(sc_fs_path)
organoid_fs = pd.read_parquet(organoid_fs_path)

### Aggregate the single cell profiles
We will aggregated with a few different stratifications:
1. Well
2. Well and parent organoid
3. Treatment - i.e. the consensus profile for each treatment

In [5]:
sc_fs.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Target,Class,Therapeutic Categories,Well,...,Texture_Cytoplasm_ER_Contrast_256.3,Texture_Cytoplasm_ER_Sum.Average_256.3,Texture_Cytoplasm_Mito_Angular.Second.Moment_256.3,Texture_Cytoplasm_Mito_Correlation_256.3,Texture_Cytoplasm_Mito_Entropy_256.3,Texture_Cytoplasm_Mito_Sum.Average_256.3,Area.Size.Shape_Cytoplasm_VOLUME,Area.Size.Shape_Cytoplasm_EXTENT,Area.Size.Shape_Cytoplasm_EULER.NUMBER,Area.Size.Shape_Cytoplasm_SURFACE.AREA
0,SARCO361,63,uM,1,Nilotinib,F6-4,tyrosine kinase inhibitor,Small Molecule,Investigational,F6,...,-0.480502,-0.185199,-0.23843,1.106339,0.462662,0.027847,-0.172863,1.447654,0.029134,-0.517441
1,SARCO361,127,uM,1,Nilotinib,F6-4,tyrosine kinase inhibitor,Small Molecule,Investigational,F6,...,0.796857,1.439825,-0.81506,1.509295,0.932089,-0.245931,0.323172,1.797869,-0.810362,-0.246756
2,SARCO361,191,uM,1,Nilotinib,F6-4,tyrosine kinase inhibitor,Small Molecule,Investigational,F6,...,-0.933175,-0.799619,0.152032,-0.950358,-0.172071,0.150518,-0.528759,0.303446,-0.250698,-0.46411
3,SARCO361,19,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,-0.521002,-0.654759,0.778598,0.209751,-0.780719,-1.056773,-0.05598,-1.005302,0.308966,0.941926
4,SARCO361,39,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.880093,-0.227139,0.761749,-0.051743,-0.798949,-0.329617,-0.047708,-1.477648,-3.888512,3.051615


In [None]:
sc_metadata_columns = [x for x in sc_fs.columns if "Metadata" in x]

sc_metadata_columns += [
    "Area.Size.Shape_Cell_CENTER.X",
    "Area.Size.Shape_Cell_CENTER.Y",
    "Area.Size.Shape_Cell_CENTER.Z",
]
sc_features_columns = [col for col in sc_fs.columns if col not in sc_metadata_columns]
sc_features_df = sc_fs.drop(columns=sc_metadata_columns, errors="ignore")

In [7]:
# stratification approach #1
sc_well_agg = aggregate(
    population_df=sc_fs,
    strata=["Well", "treatment", "Target", "Class", "Therapeutic Categories"],
    features=sc_features_columns,
    operation="median",
)
sc_well_agg.to_parquet(sc_agg_well_output_path, index=False)

# stratification approach #2
sc_well_parent_organoid_agg = aggregate(
    population_df=sc_fs,
    strata=[
        "Well",
        "parent_organoid",
        "treatment",
        "Target",
        "Class",
        "Therapeutic Categories",
    ],
    features=sc_features_columns,
    operation="median",
)
sc_well_parent_organoid_agg.to_parquet(
    sc_agg_well_parent_organoid_output_path, index=False
)
# stratification approach #3
sc_consensus = aggregate(  # a.k.a. consensus
    population_df=sc_fs,
    strata=["treatment", "Target", "Class", "Therapeutic Categories"],
    features=sc_features_columns,
    operation="median",
)
sc_consensus.to_parquet(sc_consensus_output_path, index=False)

### Aggregate the organoid profiles
We will aggregated with a few different stratifications:
1. Well
2. Treatment - i.e. the consensus profile for each treatment

In [8]:
organoid_fs.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Target,Class,Therapeutic Categories,Well,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Correlation_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3
0,SARCO361,3,uM,1,Selumetinib,G10-7,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,G10,...,-1.109763,-0.947271,-1.955246,-1.076803,-0.786205,-0.851938,-2.453619,-1.189122,-1.127175,-0.957166
1,SARCO361,12,uM,1,Nilotinib,F6-4,tyrosine kinase inhibitor,Small Molecule,Investigational,F6,...,-1.107423,-0.912819,-1.949752,-1.075108,-0.780288,-0.833558,-2.431424,-1.188376,-1.12575,-0.952059
2,SARCO361,3,uM,10,Binimetinib,C8-7,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,C8,...,-1.109387,-0.942641,-1.952621,-1.076499,-0.785398,-0.849212,-2.415649,-1.188926,-1.126907,-0.956399
3,SARCO361,45,uM,1,Everolimus,C5-4,mTOR inhibitor,Small Molecule,Kinase Inhibitor,C5,...,0.651113,-0.215303,0.487508,0.0427,-0.209479,0.447558,0.540493,0.122834,0.588238,0.648497
4,SARCO361,5,uM,10,Trametinib,F10-3,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,F10,...,-0.935827,-0.537656,-1.079954,-0.892315,-0.535576,-0.417425,0.300017,-1.08809,-0.906514,-0.523087


In [None]:
organoid_metadata_columns = [x for x in organoid_fs.columns if "Metadata" in x]
organoid_metadata_columns += [
    "Area.Size.Shape_Organoid_CENTER.X",
    "Area.Size.Shape_Organoid_CENTER.Y",
    "Area.Size.Shape_Organoid_CENTER.Z",
]
organoid_features_columns = [
    col for col in organoid_fs.columns if col not in organoid_metadata_columns
]
organoid_features_columns = [
    col for col in organoid_fs.columns if col not in organoid_metadata_columns
]
organoid_features_df = organoid_fs.drop(
    columns=organoid_metadata_columns, errors="ignore"
)

In [None]:
# stratification approach #1
organoid_well_agg = aggregate(
    population_df=organoid_fs,
    strata=["Well", "treatment", "Target", "Class", "Therapeutic Categories"],
    features=organoid_features_columns,
    operation="median",
)
organoid_well_agg.to_parquet(organoid_agg_well_output_path, index=False)

# stratification approach #2
organoid_consensus = aggregate(  # a.k.a. consensus
    population_df=organoid_fs,
    strata=["treatment", "Target", "Class", "Therapeutic Categories"],
    features=organoid_features_columns,
    operation="median",
)
organoid_consensus.to_parquet(organoid_consensus_output_path, index=False)

In [11]:
organoid_well_agg.head()

Unnamed: 0,Well,treatment,Target,Class,Therapeutic Categories,single_cell_count,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_EQUIVALENT.DIAMETER,Area.Size.Shape_Organoid_SURFACE.AREA,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Correlation_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3
0,C10,Trametinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,,0.589525,-0.276486,-0.248119,-0.218371,...,-0.895035,0.474909,-0.76981,-0.476254,-0.388394,0.617448,-1.096899,-0.61864,-0.543824,-0.304456
1,C11,Staurosporine,Apoptosis induction,Small Molecule,Experimental,6.0,2.33373,-0.544115,0.369018,-0.806159,...,-0.250226,-0.322065,0.517574,-0.003127,0.007014,-0.021853,-0.222301,0.531436,0.437657,0.308168
2,C2,Staurosporine,Apoptosis induction,Small Molecule,Experimental,1.0,-0.647005,1.240075,-0.59805,-0.663832,...,-0.75624,-0.50978,-0.36301,-0.72241,-0.69043,0.035457,-0.88318,-0.473739,-0.470381,-0.592172
3,C3,Onalespib,HSP90 inhibitor,Small Molecule,Investigational,6.0,-0.554384,0.110088,-0.782054,-0.697587,...,-0.917505,-0.408504,-0.955972,-0.856135,-0.622091,0.016535,-0.677638,-0.966458,-0.705885,-0.420016
4,C4,DMSO,Control,Control,Control,6.0,-0.335279,-0.425169,-0.186652,-0.739511,...,-0.469886,0.269059,-0.296264,-0.616152,-0.122494,-0.500012,0.279797,-0.385757,-0.707071,-0.495633


In [12]:
organoid_consensus.head()

Unnamed: 0,treatment,Target,Class,Therapeutic Categories,single_cell_count,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_EQUIVALENT.DIAMETER,Area.Size.Shape_Organoid_SURFACE.AREA,Colocalization_Organoid_AGP.ER_MEDIAN.CORRELATION.COEFF,...,Texture_Organoid_DNA_Sum.Variance_256.3,Texture_Organoid_ER_Contrast_256.3,Texture_Organoid_ER_Information.Measure.of.Correlation.2_256.3,Texture_Organoid_ER_Sum.Average_256.3,Texture_Organoid_ER_Variance_256.3,Texture_Organoid_Mito_Contrast_256.3,Texture_Organoid_Mito_Correlation_256.3,Texture_Organoid_Mito_Entropy_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Variance_256.3
0,Binimetinib,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor,2.0,1.158796,-0.484642,0.076143,-0.787673,0.454419,...,-0.515496,-0.379363,-0.270385,-0.473104,-0.292845,-0.105459,0.194937,-0.415425,-0.434973,-0.25278
1,Cabozantinib,receptor tyrosine kinase inhibitor,Small Molecule,Kinase Inhibitor,6.0,-0.52106,-0.306223,-0.028757,-0.169901,0.407492,...,-0.490876,0.017371,-0.419601,-0.394175,-0.177493,0.060618,0.199733,-0.598814,-0.264256,-0.031876
2,Copanlisib,PI3K inhibitor,Small Molecule,Kinase Inhibitor,3.0,0.625863,-0.484642,-0.246301,-0.922188,0.05675,...,-0.797712,-0.748342,-0.432698,-0.734517,-0.633432,-0.345864,-0.324022,-0.460562,-0.565164,-0.573768
3,DMSO,Control,Control,Control,5.5,-0.315895,-0.425169,0.196636,-0.256043,0.48007,...,-0.17509,-0.335327,0.100941,-0.075388,-0.237172,-0.433695,0.377637,-0.035983,-0.227,-0.452151
4,Digoxin,Na+/K+ pump inhibitor,Small Molecule,Cardiac Glycosides,3.5,1.664419,-0.484642,-0.42292,-0.809067,0.041986,...,-0.737107,-0.623579,-0.430571,-0.713981,-0.595866,-0.480576,-0.169313,-0.605395,-0.495751,-0.575939
