This notebook performs profile aggregation.

In [1]:
import argparse
import pathlib

import pandas as pd
from pycytominer import aggregate

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="Patient ID to process, e.g. 'P01'",
    )
    args = argparser.parse_args()
    patient = args.patient

else:
    patient = "NF0014"

### Merge the sc and organoid profiles after aggregation
1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
2. The well level profiles are merged together
3. The consensus profiles are merged together


In [3]:
# group the import paths by the type of aggregation
#######################################################################
# 1. The single-cell parent organoid aggregated profile is merged with the fs organoid profile
organoid_fs_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/3.organoid_fs_profiles.parquet"
).resolve(strict=True)
sc_agg_well_parent_organoid_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/4.sc_agg_well_parent_organoid_level_profiles.parquet"
).resolve(strict=True)

# ouput merged path
organoid_agg_well_parent_organoid_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/5.sc-organoid-sc_well_agg_parent_organoid_level_profiles.parquet"
).resolve()

########################################################################

# 2. The well level profiles are merged together
sc_agg_well_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/4.sc_agg_well_level_profiles.parquet"
).resolve(strict=True)

organoid_agg_well_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/4.organoid_agg_well_level_profiles.parquet"
).resolve(strict=True)

# output merged path
organoid_agg_well_merge_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/5.sc-organoid-sc_well_agg_merge_profiles.parquet"
).resolve()

###################################################################################

# 3. The consensus profiles are merged together

sc_consensus_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/4.sc_consensus_profiles.parquet"
).resolve(strict=True)

organoid_consensus_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/4.organoid_consensus_profiles.parquet"
).resolve(strict=True)

# output merged path
organoid_consensus_merge_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/5.sc-organoid_consensus_profiles.parquet"
).resolve()

###############################################################################

In [4]:
organoid_fs = pd.read_parquet(organoid_fs_path)
sc_agg_well_parent_organoid = pd.read_parquet(sc_agg_well_parent_organoid_path)
sc_agg_well_parent_organoid_merge = sc_agg_well_parent_organoid.merge(
    organoid_fs,
    left_on=["Well", "parent_organoid"],
    right_on=["Well", "object_id"],
)

sc_agg_well_parent_organoid_merge.to_parquet(
    organoid_agg_well_parent_organoid_path, index=False
)
sc_agg_well_parent_organoid_merge.head()

Unnamed: 0,Well,parent_organoid,Area.Size.Shape_Nuclei_EXTENT,Colocalization_Nuclei_AGP.BF_MIN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MIN.OVERLAP.COEFF,Colocalization_Nuclei_AGP.BF_MEDIAN.K2,Colocalization_Nuclei_AGP.BF_MIN.MANDERS.COEFF.COSTES.M1,Colocalization_Nuclei_AGP.ER_MEAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.ER_MEDIAN.OVERLAP.COEFF,Colocalization_Nuclei_AGP.ER_MEAN.K1,...,Texture_Organoid_BF_Variance_256.1,Texture_Organoid_DNA_Contrast_256.1,Texture_Organoid_DNA_Variance_256.1,Texture_Organoid_ER_Contrast_256.1,Texture_Organoid_ER_Information.Measure.of.Correlation.1_256.1,Texture_Organoid_ER_Sum.Average_256.1,Texture_Organoid_ER_Variance_256.1,Texture_Organoid_Mito_Contrast_256.1,Texture_Organoid_Mito_Sum.Average_256.1,Texture_Organoid_Mito_Variance_256.1
0,C10,20,0.03253,0.47466,-0.225378,0.439943,0.217904,0.333916,-0.121818,-0.465793,...,-2.14483,-0.791422,-1.250711,-0.624226,-2.11996,-1.37905,-0.951777,-0.543091,-1.49387,-0.814296
1,C10,44,0.260439,0.272242,-0.224774,0.400522,0.87118,-0.443557,-0.127671,-0.691352,...,-1.262567,-0.947948,-0.76161,-0.677544,-1.477256,-0.965806,-0.764784,-0.740854,-0.42339,0.345606
2,C11,24,1.069454,-1.096034,-0.224948,0.167213,-1.325093,1.615419,-0.128931,-0.526353,...,-1.49294,-1.238702,-1.289316,-0.57108,-2.782956,-1.053659,-0.78533,-0.505358,-0.79139,-0.147683
3,C11,30,0.756402,-1.176695,-0.225016,0.353732,-1.325093,1.460497,-0.129286,-0.604075,...,-1.045857,-1.218586,-1.249619,-0.598743,-2.739134,-1.073709,-0.834201,-0.34198,-0.647428,-0.001812
4,C2,6,-0.944134,-0.111287,-0.213888,1.727864,-1.291833,2.149684,-0.130097,-0.828387,...,-1.392898,-0.576882,-0.537603,-0.684635,-1.928213,-1.240097,-0.975188,-0.877364,-1.217033,-0.870099


In [5]:
sc_agg_well = pd.read_parquet(sc_agg_well_path)
organoid_agg_well = pd.read_parquet(organoid_agg_well_path)
sc_agg_well_merge = sc_agg_well.merge(
    organoid_agg_well,
    on=["Well"],
)
sc_agg_well_merge.to_parquet(organoid_agg_well_merge_path, index=False)
sc_agg_well_merge.head()

Unnamed: 0,Well,Area.Size.Shape_Nuclei_EXTENT,Colocalization_Nuclei_AGP.BF_MIN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MIN.OVERLAP.COEFF,Colocalization_Nuclei_AGP.BF_MEDIAN.K2,Colocalization_Nuclei_AGP.BF_MIN.MANDERS.COEFF.COSTES.M1,Colocalization_Nuclei_AGP.ER_MEAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.ER_MEDIAN.OVERLAP.COEFF,Colocalization_Nuclei_AGP.ER_MEAN.K1,Colocalization_Nuclei_AGP.ER_MIN.K2,...,Texture_Organoid_BF_Variance_256.1,Texture_Organoid_DNA_Contrast_256.1,Texture_Organoid_DNA_Variance_256.1,Texture_Organoid_ER_Contrast_256.1,Texture_Organoid_ER_Information.Measure.of.Correlation.1_256.1,Texture_Organoid_ER_Sum.Average_256.1,Texture_Organoid_ER_Variance_256.1,Texture_Organoid_Mito_Contrast_256.1,Texture_Organoid_Mito_Sum.Average_256.1,Texture_Organoid_Mito_Variance_256.1
0,C10,0.24955,0.169837,-0.224742,0.651267,0.083261,-0.439232,-0.127535,-0.64798,-0.321009,...,-1.703699,-0.869685,-1.006161,-0.650885,-1.798608,-1.172428,-0.85828,-0.641972,-0.95863,-0.234345
1,C11,0.912928,-1.136364,-0.225,0.251055,-1.325093,1.537958,-0.129109,-0.589686,-1.383946,...,-1.269399,-1.228644,-1.269467,-0.584912,-2.761045,-1.063684,-0.809766,-0.423669,-0.719409,-0.074747
2,C2,-0.944134,-0.111287,-0.213888,1.727864,-1.291833,2.149684,-0.130097,-0.828387,-0.813906,...,-1.392898,-0.576882,-0.537603,-0.684635,-1.928213,-1.240097,-0.975188,-0.877364,-1.217033,-0.870099
3,C3,-0.094436,-0.262012,-0.224826,-0.205275,-0.339518,1.19206,-0.129253,-0.588332,-0.915991,...,-0.498791,-0.433155,-1.183203,-0.287713,0.07769,-0.670042,-0.886138,1.74025,0.388875,-0.328403
4,C4,0.440495,-0.035523,-0.22466,-0.238421,0.871217,-0.094054,-0.107299,-0.095976,0.129004,...,0.925085,1.720782,1.271465,1.513276,1.033728,0.756794,0.302486,-0.537314,-0.741116,-1.032983


In [6]:
sc_consensus = pd.read_parquet(sc_consensus_path)
organoid_consensus = pd.read_parquet(organoid_consensus_path)
sc_consensus_merge = sc_consensus.merge(organoid_consensus, on=["treatment"])
sc_consensus_merge.to_parquet(organoid_consensus_merge_path, index=False)
sc_consensus_merge.head()

Unnamed: 0,treatment,Area.Size.Shape_Nuclei_EXTENT,Colocalization_Nuclei_AGP.BF_MIN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MIN.OVERLAP.COEFF,Colocalization_Nuclei_AGP.BF_MEDIAN.K2,Colocalization_Nuclei_AGP.BF_MIN.MANDERS.COEFF.COSTES.M1,Colocalization_Nuclei_AGP.ER_MEAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.ER_MEDIAN.OVERLAP.COEFF,Colocalization_Nuclei_AGP.ER_MEAN.K1,Colocalization_Nuclei_AGP.ER_MIN.K2,...,Texture_Organoid_BF_Variance_256.1,Texture_Organoid_DNA_Contrast_256.1,Texture_Organoid_DNA_Variance_256.1,Texture_Organoid_ER_Contrast_256.1,Texture_Organoid_ER_Information.Measure.of.Correlation.1_256.1,Texture_Organoid_ER_Sum.Average_256.1,Texture_Organoid_ER_Variance_256.1,Texture_Organoid_Mito_Contrast_256.1,Texture_Organoid_Mito_Sum.Average_256.1,Texture_Organoid_Mito_Variance_256.1
0,Binimetinib,0.275096,-0.154379,-0.224587,0.608598,0.053906,0.500932,-0.122673,-0.699251,0.042775,...,-0.818273,-0.909273,-1.12619,-0.680504,-0.513943,-1.088284,-0.907953,-0.633646,-0.722304,-0.340528
1,Cabozantinib,0.32918,-0.142624,-0.224661,-0.031776,-0.331916,0.237225,-0.122674,-0.612458,0.282566,...,-0.639507,-0.928523,-1.284898,-0.442474,-1.366897,-0.685942,-0.674219,-0.581858,-0.466476,-0.136768
2,Copanlisib,0.38281,-0.022008,-0.224689,0.885835,-0.027362,0.831204,-0.124853,-0.613907,-0.322652,...,-1.00613,-0.665874,-0.766402,-0.401909,-1.356574,-0.961208,-0.762546,0.440297,-0.650608,-0.305117
3,DMSO,0.358735,0.015825,-0.224567,0.107505,0.871217,-0.002969,-0.115328,-0.460623,0.096228,...,-0.002123,-0.358536,0.032497,-0.394669,-0.188289,-0.294784,-0.358874,-0.234232,-0.19462,-0.409733
4,Digoxin,0.456005,-0.002706,-0.226011,-0.410811,-0.195338,1.339546,-0.129317,-0.380297,-1.365735,...,-0.595122,-0.983785,-1.291815,-0.658006,-2.141181,-1.133485,-0.956781,-0.622903,-1.17674,-0.994933
