This notebook performs profile annotation.
The platemap is mapped back to the profile to retain the sample metadata.


In [1]:
import argparse
import os
import pathlib
import sys

import pandas as pd
from arg_parsing_utils import parse_args
from notebook_init_utils import bandicoot_check, init_notebook

root_dir, in_notebook = init_notebook()

profile_base_dir = bandicoot_check(
    pathlib.Path(os.path.expanduser("~/mnt/bandicoot/NF1_organoid_data")).resolve(),
    root_dir,
)

In [2]:
if not in_notebook:
    args = parse_args()
    patient = args["patient"]
    image_based_profiles_subparent_name = args["image_based_profiles_subparent_name"]

else:
    patient = "NF0037_T1-Z-0.1"
    image_based_profiles_subparent_name = "image_based_profiles"

In [3]:
def annotate_profiles(
    profile_df: pd.DataFrame, platemap_df: pd.DataFrame, patient: str
) -> pd.DataFrame:
    """
    Annotate profiles with treatment, dose, and unit information from the platemap.

        Parameters
        ----------
        profile_df : pd.DataFrame
            Profile DataFrame containing image_set information.
            Could be either single-cell or organoid profiles.
        platemap_df : pd.DataFrame
            Platmap DataFrame containing well_position, treatment, dose, and unit.
        patient : str
            Patient ID to annotate the profiles with.

        Returns
        -------
        pd.DataFrame
            Annotated profile DataFrame with additional columns for treatment, dose, and unit.
    """
    drug_information = pd.read_csv(
        pathlib.Path(
            f"{root_dir}/4.processing_image_based_profiles/data/drugs/drug_information.csv"
        )
    )
    profile_df["Well"] = profile_df["image_set"].str.split("-").str[0]
    profile_df.insert(2, "Well", profile_df.pop("Well"))
    profile_df = pd.merge(
        profile_df,
        platemap_df[["well_position", "treatment", "dose", "unit"]],
        left_on="Well",
        right_on="well_position",
        how="left",
    ).drop(columns=["well_position"])
    profile_df = profile_df.merge(
        drug_information, how="left", left_on="treatment", right_on="Treatment"
    )
    profile_df.drop(columns=["Treatment"], inplace=True)
    for col in ["treatment", "dose", "unit"]:
        profile_df.insert(1, col, profile_df.pop(col))
    profile_df.insert(0, "patient", patient)
    return profile_df

## pathing 

In [4]:
sc_merged_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/1.combined_profiles/sc.parquet"
).resolve(strict=True)
organoid_merged_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/1.combined_profiles/organoid.parquet"
).resolve(strict=True)

platemap_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/platemap/platemap.csv"
).resolve(strict=True)

# output path
sc_annotated_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/2.annotated_profiles/sc_anno.parquet"
).resolve()
organoid_annotated_output_path = pathlib.Path(
    f"{profile_base_dir}/data/{patient}/{image_based_profiles_subparent_name}/2.annotated_profiles/organoid_anno.parquet"
).resolve()

organoid_annotated_output_path.parent.mkdir(parents=True, exist_ok=True)

FileNotFoundError: [Errno 2] No such file or directory: '/home/lippincm/mnt/bandicoot/NF1_organoid_data/data/NF0037_T1-Z-0.1/image_based_profiles/1.combined_profiles/sc.parquet'

In [5]:
# read data
sc_merged = pd.read_parquet(sc_merged_path)
organoid_merged = pd.read_parquet(organoid_merged_path)
# read platemap
platemap = pd.read_csv(platemap_path)
platemap.head()

Unnamed: 0,WellRow,WellCol,well_position,treatment,dose,unit
0,B,2,B2,ARV-825,1,uM
1,C,2,C2,STAURO 10,10,nM
2,D,2,D2,Digoxin 1,1,uM
3,E,2,E2,Digoxin 1,1,uM
4,F,2,F2,Onalespib 1,1,uM


In [6]:
sc_merged = annotate_profiles(sc_merged, platemap, patient)
organoid_merged = annotate_profiles(organoid_merged, platemap, patient)

In [7]:
sc_merged.rename(columns={"patient": "patient_tumor"}, inplace=True)
organoid_merged.rename(columns={"patient": "patient_tumor"}, inplace=True)
sc_merged[["patient", "tumor"]] = sc_merged["patient_tumor"].str.split("_", expand=True)
organoid_merged[["patient", "tumor"]] = organoid_merged["patient_tumor"].str.split(
    "_", expand=True
)

In [8]:
metadata_features_list = [
    "patient_tumor",
    "patient",
    "tumor",
    "object_id",
    "unit",
    "dose",
    "Well",
    "treatment",
    "image_set",
    "parent_organoid",
    "single_cell_count",
    "Target",
    "Class",
    "Therapeutic_Categories",
]
# prepend "Metadata_" to metadata features
sc_merged = sc_merged.rename(
    columns={col: f"Metadata_{col}" for col in metadata_features_list}
)
organoid_merged = organoid_merged.rename(
    columns={col: f"Metadata_{col}" for col in metadata_features_list}
)

In [9]:
sc_merged.head()

Unnamed: 0,Metadata_patient_tumor,Metadata_object_id,Metadata_unit,Metadata_dose,Metadata_treatment,Metadata_image_set,Metadata_Well,Metadata_parent_organoid,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_CENTER.X,...,Texture_Cytoplasm_Mito_Inverse.Difference.Moment_256.3,Texture_Cytoplasm_Mito_Sum.Average_256.3,Texture_Cytoplasm_Mito_Sum.Entropy_256.3,Texture_Cytoplasm_Mito_Sum.Variance_256.3,Texture_Cytoplasm_Mito_Variance_256.3,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Metadata_patient,Metadata_tumor
0,NF0037_T1-Z-1,36.0,%,1,DMSO 1%,F4-2,F4,71.0,11253.0,1188.314453,...,0.99975,0.044481,0.004512,8.536573,3.043701,,,,NF0037,T1-Z-1
1,NF0037_T1-Z-1,58.0,%,1,DMSO 1%,F4-2,F4,71.0,24819.0,872.092896,...,0.998241,0.316644,0.029361,56.377149,16.500579,,,,NF0037,T1-Z-1
2,NF0037_T1-Z-1,150.0,%,1,DMSO 1%,F4-2,F4,71.0,62608.0,1005.007263,...,0.99946,0.092457,0.009423,16.917697,5.618664,,,,NF0037,T1-Z-1
3,NF0037_T1-Z-1,238.0,%,1,DMSO 1%,F4-2,F4,71.0,19825.0,1242.052368,...,0.999305,0.144222,0.012196,30.912232,9.516243,,,,NF0037,T1-Z-1
4,NF0037_T1-Z-1,,%,1,DMSO 1%,F4-3,F4,,,,...,,,,,,,,,NF0037,T1-Z-1


In [10]:
organoid_merged.head()

Unnamed: 0,Metadata_patient_tumor,Metadata_object_id,Metadata_unit,Metadata_dose,Metadata_treatment,Metadata_image_set,Metadata_Well,Metadata_single_cell_count,Area.Size.Shape_Organoid_VOLUME,Area.Size.Shape_Organoid_CENTER.X,...,Texture_Organoid_Mito_Inverse.Difference.Moment_256.3,Texture_Organoid_Mito_Sum.Average_256.3,Texture_Organoid_Mito_Sum.Entropy_256.3,Texture_Organoid_Mito_Sum.Variance_256.3,Texture_Organoid_Mito_Variance_256.3,Metadata_Target,Metadata_Class,Metadata_Therapeutic_Categories,Metadata_patient,Metadata_tumor
0,NF0037_T1-Z-1,1,%,1,DMSO 1%,F4-2,F4,,122.0,1353.795044,...,0.999998,0.000377,4.5e-05,0.076721,0.035127,,,,NF0037,T1-Z-1
1,NF0037_T1-Z-1,2,%,1,DMSO 1%,F4-2,F4,,889059.0,820.129333,...,0.987619,0.859732,0.137404,64.010783,24.967784,,,,NF0037,T1-Z-1
2,NF0037_T1-Z-1,24,%,1,DMSO 1%,F4-2,F4,,1231610.0,948.151978,...,0.982872,1.338529,0.179836,111.44363,43.500075,,,,NF0037,T1-Z-1
3,NF0037_T1-Z-1,27,%,1,DMSO 1%,F4-2,F4,,2130325.0,915.766785,...,0.970397,2.094138,0.283222,155.788455,61.056217,,,,NF0037,T1-Z-1
4,NF0037_T1-Z-1,29,%,1,DMSO 1%,F4-2,F4,,1663760.0,883.124084,...,0.976884,1.62718,0.229967,121.230278,47.403461,,,,NF0037,T1-Z-1


In [11]:
# save annotated profiles
sc_merged.to_parquet(sc_annotated_output_path, index=False)
organoid_merged.to_parquet(organoid_annotated_output_path, index=False)