This notebook performs profile annotation.
The platemap is mapped back to the profile to retain the sample metadata.


In [1]:
import argparse
import pathlib

import pandas as pd

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="Patient ID to process, e.g. 'P01'",
    )
    args = argparser.parse_args()
    patient = args.patient

else:
    patient = "NF0014"

In [3]:
def annotate_profiles(
    profile_df: pd.DataFrame, platemap_df: pd.DataFrame, patient: str
) -> pd.DataFrame:
    """
    Annotate profiles with treatment, dose, and unit information from the platemap.

        Parameters
        ----------
        profile_df : pd.DataFrame
            Profile DataFrame containing image_set information.
            Could be either single-cell or organoid profiles.
        platemap_df : pd.DataFrame
            Platmap DataFrame containing well_position, treatment, dose, and unit.
        patient : str
            Patient ID to annotate the profiles with.

        Returns
        -------
        pd.DataFrame
            Annotated profile DataFrame with additional columns for treatment, dose, and unit.
    """
    profile_df["Well"] = profile_df["image_set"].str.split("-").str[0]
    profile_df.insert(2, "Well", profile_df.pop("Well"))
    profile_df = pd.merge(
        profile_df,
        platemap_df[["well_position", "treatment", "dose", "unit"]],
        left_on="Well",
        right_on="well_position",
        how="left",
    ).drop(columns=["well_position"])
    for col in ["treatment", "dose", "unit"]:
        profile_df.insert(1, col, profile_df.pop(col))
    profile_df.insert(0, "patient", patient)
    return profile_df

In [4]:
# pathing
sc_merged_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/0.sc_merged_profiles.parquet"
).resolve(strict=True)
organoid_merged_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/0.organoid_merged_profiles.parquet"
).resolve(strict=True)

platemap_path = pathlib.Path("../../data/NF0014/platemap/platemap.csv").resolve(
    strict=True
)

# output path
sc_annotated_output_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/1.sc_annotated_profiles.parquet"
).resolve()
organoid_annotated_output_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/1.organoid_annotated_profiles.parquet"
).resolve()

In [5]:
# read data
sc_merged = pd.read_parquet(sc_merged_path)
organoid_merged = pd.read_parquet(organoid_merged_path)
# read platemap
platemap = pd.read_csv(platemap_path)

In [6]:
sc_merged = annotate_profiles(sc_merged, platemap, patient)
organoid_merged = annotate_profiles(organoid_merged, platemap, patient)

In [7]:
sc_merged.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Well,parent_organoid,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_CENTER.X,...,Texture_Cytoplasm_Mito_Difference.Entropy_256.1,Texture_Cytoplasm_Mito_Difference.Variance_256.1,Texture_Cytoplasm_Mito_Entropy_256.1,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.1_256.1,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.2_256.1,Texture_Cytoplasm_Mito_Inverse.Difference.Moment_256.1,Texture_Cytoplasm_Mito_Sum.Average_256.1,Texture_Cytoplasm_Mito_Sum.Entropy_256.1,Texture_Cytoplasm_Mito_Sum.Variance_256.1,Texture_Cytoplasm_Mito_Variance_256.1
0,NF0014,15,%,1,DMSO,C4-2,C4,32,99661.0,473.778268,...,0.051024,0.003861,0.070853,-0.59996,0.242368,0.996241,0.699428,0.063534,121.523028,32.474614
1,NF0014,26,%,1,DMSO,C4-2,C4,32,156362.0,715.339418,...,0.061713,0.003853,0.091123,-0.59905,0.273598,0.995213,0.716101,0.078503,99.806008,26.505418
2,NF0014,37,%,1,DMSO,C4-2,C4,32,84453.0,503.486353,...,0.030409,0.003874,0.042813,-0.607538,0.191419,0.998169,0.093143,0.037626,3.992738,1.070541
3,NF0014,43,%,1,DMSO,C4-2,C4,32,131041.0,693.588457,...,0.051189,0.00386,0.071099,-0.587976,0.239543,0.996087,0.859732,0.06178,175.059103,46.919265
4,NF0014,51,%,1,DMSO,C4-2,C4,32,69045.0,399.909088,...,0.043201,0.003866,0.061412,-0.619343,0.231458,0.996829,0.5343,0.0539,83.157453,22.02088


In [8]:
organoid_merged.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Well,single_cell_count,Area.Size.Shape_Organoid_VOLUME,Area.Size.Shape_Organoid_CENTER.X,...,Texture_Organoid_Mito_Difference.Entropy_256.1,Texture_Organoid_Mito_Difference.Variance_256.1,Texture_Organoid_Mito_Entropy_256.1,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.1,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.1,Texture_Organoid_Mito_Inverse.Difference.Moment_256.1,Texture_Organoid_Mito_Sum.Average_256.1,Texture_Organoid_Mito_Sum.Entropy_256.1,Texture_Organoid_Mito_Sum.Variance_256.1,Texture_Organoid_Mito_Variance_256.1
0,NF0014,32,%,1,DMSO,C4-2,C4,29,20908636.0,669.720104,...,1.33351,0.002369,2.76259,-0.507738,0.920417,0.831104,8.297708,2.122714,198.636846,50.32471


In [9]:
# save annotated profiles
sc_merged.to_parquet(sc_annotated_output_path, index=False)
organoid_merged.to_parquet(organoid_annotated_output_path, index=False)