This notebook performs profile annotation.
The platemap is mapped back to the profile to retain the sample metadata.


In [1]:
import argparse
import pathlib

import pandas as pd

# Get the current working directory
cwd = pathlib.Path.cwd()

if (cwd / ".git").is_dir():
    root_dir = cwd

else:
    root_dir = None
    for parent in cwd.parents:
        if (parent / ".git").is_dir():
            root_dir = parent
            break

# Check if a Git root directory was found
if root_dir is None:
    raise FileNotFoundError("No Git root directory found.")
try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="Patient ID to process, e.g. 'P01'",
    )
    args = argparser.parse_args()
    patient = args.patient

else:
    patient = "SARCO361"

In [3]:
def annotate_profiles(
    profile_df: pd.DataFrame, platemap_df: pd.DataFrame, patient: str
) -> pd.DataFrame:
    """
    Annotate profiles with treatment, dose, and unit information from the platemap.

        Parameters
        ----------
        profile_df : pd.DataFrame
            Profile DataFrame containing image_set information.
            Could be either single-cell or organoid profiles.
        platemap_df : pd.DataFrame
            Platmap DataFrame containing well_position, treatment, dose, and unit.
        patient : str
            Patient ID to annotate the profiles with.

        Returns
        -------
        pd.DataFrame
            Annotated profile DataFrame with additional columns for treatment, dose, and unit.
    """
    drug_information = pd.read_csv(
        pathlib.Path(
            f"{root_dir}/4.processing_image_based_profiles/data/drugs/drug_information.csv"
        )
    )
    profile_df["Well"] = profile_df["image_set"].str.split("-").str[0]
    profile_df.insert(2, "Well", profile_df.pop("Well"))
    profile_df = pd.merge(
        profile_df,
        platemap_df[["well_position", "treatment", "dose", "unit"]],
        left_on="Well",
        right_on="well_position",
        how="left",
    ).drop(columns=["well_position"])
    profile_df = profile_df.merge(
        drug_information, how="left", left_on="treatment", right_on="Treatment"
    )
    profile_df.drop(columns=["Treatment"], inplace=True)
    for col in ["treatment", "dose", "unit"]:
        profile_df.insert(1, col, profile_df.pop(col))
    profile_df.insert(0, "patient", patient)
    return profile_df

In [4]:
# pathing
sc_merged_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/1.combined_profiles/sc.parquet"
).resolve(strict=True)
organoid_merged_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/1.combined_profiles/organoid.parquet"
).resolve(strict=True)

platemap_path = pathlib.Path(
    f"{root_dir}/data/{patient}/platemap/platemap.csv"
).resolve(strict=True)

# output path
sc_annotated_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/2.annotated_profiles/sc_anno.parquet"
).resolve()
organoid_annotated_output_path = pathlib.Path(
    f"{root_dir}/data/{patient}/image_based_profiles/2.annotated_profiles/organoid_anno.parquet"
).resolve()

organoid_annotated_output_path.parent.mkdir(parents=True, exist_ok=True)

In [5]:
# read data
sc_merged = pd.read_parquet(sc_merged_path)
organoid_merged = pd.read_parquet(organoid_merged_path)
# read platemap
platemap = pd.read_csv(platemap_path)
platemap.head()

Unnamed: 0,WellRow,WellCol,well_position,treatment,dose,unit
0,C,2,C2,Staurosporine,10,nM
1,D,2,D2,Digoxin,1,uM
2,E,2,E2,Digoxin,1,uM
3,F,2,F2,Onalespib,1,uM
4,G,2,G2,Staurosporine,10,nM


In [6]:
sc_merged = annotate_profiles(sc_merged, platemap, patient)
organoid_merged = annotate_profiles(organoid_merged, platemap, patient)

In [7]:
sc_merged.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Well,parent_organoid,Colocalization_Nuclei_AGP.BF_MEAN.CORRELATION.COEFF,Colocalization_Nuclei_AGP.BF_MEDIAN.CORRELATION.COEFF,...,Area.Size.Shape_Cytoplasm_MAX.Y,Area.Size.Shape_Cytoplasm_MIN.Z,Area.Size.Shape_Cytoplasm_MAX.Z,Area.Size.Shape_Cytoplasm_EXTENT,Area.Size.Shape_Cytoplasm_EULER.NUMBER,Area.Size.Shape_Cytoplasm_EQUIVALENT.DIAMETER,Area.Size.Shape_Cytoplasm_SURFACE.AREA,Target,Class,Therapeutic Categories
0,SARCO361,63,uM,1,Nilotinib,F6-4,F6,-1,0.139756,0.139756,...,974.0,0.0,17.0,0.373167,-11.0,63.646595,1293.385132,tyrosine kinase inhibitor,Small Molecule,Investigational
1,SARCO361,127,uM,1,Nilotinib,F6-4,F6,12,0.128704,0.128704,...,510.0,0.0,17.0,0.404097,-20.0,72.924805,1622.490723,tyrosine kinase inhibitor,Small Molecule,Investigational
2,SARCO361,191,uM,1,Nilotinib,F6-4,F6,-1,-0.016275,-0.016275,...,1065.0,0.0,17.0,0.272115,-14.0,54.799179,1358.22522,tyrosine kinase inhibitor,Small Molecule,Investigational
3,SARCO361,19,uM,1,Everolimus,C5-4,C5,45,0.168109,0.168109,...,829.0,0.0,30.0,0.156531,-8.0,66.073349,3067.721436,mTOR inhibitor,Small Molecule,Kinase Inhibitor
4,SARCO361,39,uM,1,Everolimus,C5-4,C5,45,-0.117636,-0.117636,...,888.0,3.0,59.0,0.114815,-53.0,66.238449,5632.735352,mTOR inhibitor,Small Molecule,Kinase Inhibitor


In [8]:
organoid_merged.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Well,single_cell_count,Colocalization_Organoid_AGP.BF_MEAN.CORRELATION.COEFF,Colocalization_Organoid_AGP.BF_MEDIAN.CORRELATION.COEFF,...,Area.Size.Shape_Organoid_MAX.Y,Area.Size.Shape_Organoid_MIN.Z,Area.Size.Shape_Organoid_MAX.Z,Area.Size.Shape_Organoid_EXTENT,Area.Size.Shape_Organoid_EULER.NUMBER,Area.Size.Shape_Organoid_EQUIVALENT.DIAMETER,Area.Size.Shape_Organoid_SURFACE.AREA,Target,Class,Therapeutic Categories
0,SARCO361,3,uM,1,Selumetinib,G10-7,G10,,-0.051867,-0.051867,...,949.0,0.0,4.0,0.066092,7.0,5.600968,20.176451,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor
1,SARCO361,12,uM,1,Nilotinib,F6-4,F6,1.0,-0.064466,-0.064466,...,1099.0,2.0,16.0,5.8e-05,77.0,10.818614,233.427032,tyrosine kinase inhibitor,Small Molecule,Investigational
2,SARCO361,3,uM,10,Binimetinib,C8-7,C8,,-0.009716,-0.009716,...,701.0,0.0,4.0,0.004887,56.0,9.350074,135.764496,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor
3,SARCO361,45,uM,1,Everolimus,C5-4,C5,12.0,-0.073847,-0.073847,...,1505.0,0.0,59.0,0.178001,4.0,271.688354,31089.646484,mTOR inhibitor,Small Molecule,Kinase Inhibitor
4,SARCO361,5,uM,10,Trametinib,F10-3,F10,,0.055337,0.055337,...,1539.0,15.0,21.0,0.701991,1.0,78.334473,365.284912,MEK1/2 inhibitor,Small Molecule,Kinase Inhibitor


In [9]:
# save annotated profiles
sc_merged.to_parquet(sc_annotated_output_path, index=False)
organoid_merged.to_parquet(organoid_annotated_output_path, index=False)