This notebook performs profile annotation.
The platemap is mapped back to the profile to retain the sample metadata.


In [1]:
import argparse
import pathlib

import pandas as pd

try:
    cfg = get_ipython().config
    in_notebook = True
except NameError:
    in_notebook = False

In [2]:
if not in_notebook:
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        "--patient",
        type=str,
        required=True,
        help="Patient ID to process, e.g. 'P01'",
    )
    args = argparser.parse_args()
    patient = args.patient

else:
    patient = "NF0014"

In [3]:
def annotate_profiles(
    profile_df: pd.DataFrame, platemap_df: pd.DataFrame, patient: str
) -> pd.DataFrame:
    """
    Annotate profiles with treatment, dose, and unit information from the platemap.

        Parameters
        ----------
        profile_df : pd.DataFrame
            Profile DataFrame containing image_set information.
            Could be either single-cell or organoid profiles.
        platemap_df : pd.DataFrame
            Platmap DataFrame containing well_position, treatment, dose, and unit.
        patient : str
            Patient ID to annotate the profiles with.

        Returns
        -------
        pd.DataFrame
            Annotated profile DataFrame with additional columns for treatment, dose, and unit.
    """
    profile_df["Well"] = profile_df["image_set"].str.split("-").str[0]
    profile_df.insert(2, "Well", profile_df.pop("Well"))
    profile_df = pd.merge(
        profile_df,
        platemap_df[["well_position", "treatment", "dose", "unit"]],
        left_on="Well",
        right_on="well_position",
        how="left",
    ).drop(columns=["well_position"])
    for col in ["treatment", "dose", "unit"]:
        profile_df.insert(1, col, profile_df.pop(col))
    profile_df.insert(0, "patient", patient)
    return profile_df

In [4]:
# pathing
sc_merged_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/0.sc_merged_profiles.parquet"
).resolve(strict=True)
organoid_merged_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/0.organoid_merged_profiles.parquet"
).resolve(strict=True)

platemap_path = pathlib.Path("../../data/NF0014/platemap/platemap.csv").resolve(
    strict=True
)

# output path
sc_annotated_output_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/1.sc_annotated_profiles.parquet"
).resolve()
organoid_annotated_output_path = pathlib.Path(
    f"../../data/{patient}/image_based_profiles/1.organoid_annotated_profiles.parquet"
).resolve()

In [5]:
# read data
sc_merged = pd.read_parquet(sc_merged_path)
organoid_merged = pd.read_parquet(organoid_merged_path)
# read platemap
platemap = pd.read_csv(platemap_path)

In [6]:
sc_merged = annotate_profiles(sc_merged, platemap, patient)
organoid_merged = annotate_profiles(organoid_merged, platemap, patient)

In [7]:
sc_merged.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Well,parent_organoid,Area.Size.Shape_Nuclei_VOLUME,Area.Size.Shape_Nuclei_CENTER.X,...,Texture_Cytoplasm_Mito_Difference.Entropy_256.1,Texture_Cytoplasm_Mito_Difference.Variance_256.1,Texture_Cytoplasm_Mito_Entropy_256.1,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.1_256.1,Texture_Cytoplasm_Mito_Information.Measure.of.Correlation.2_256.1,Texture_Cytoplasm_Mito_Inverse.Difference.Moment_256.1,Texture_Cytoplasm_Mito_Sum.Average_256.1,Texture_Cytoplasm_Mito_Sum.Entropy_256.1,Texture_Cytoplasm_Mito_Sum.Variance_256.1,Texture_Cytoplasm_Mito_Variance_256.1
0,NF0014,70,uM,10,Mirdametinib,G8-1,G8,40,43811.0,728.377691,...,0.051589,0.003864,0.072747,-0.592117,0.243435,0.996572,0.73999,0.063095,161.781247,42.669714
1,NF0014,122,uM,10,Mirdametinib,G8-1,G8,40,59089.0,694.362267,...,0.057172,0.00386,0.082322,-0.608133,0.263337,0.996108,0.806632,0.071535,165.755624,43.49038
2,NF0014,162,uM,10,Mirdametinib,G8-1,G8,40,62055.0,801.064249,...,0.044009,0.003868,0.062735,-0.60307,0.229471,0.997135,0.617856,0.05445,133.682412,35.208834
3,NF0014,165,uM,10,Mirdametinib,G8-1,G8,40,54844.0,767.61994,...,0.02155,0.00388,0.029536,-0.586036,0.155376,0.998651,0.310195,0.025525,70.092415,18.834159
4,NF0014,182,uM,10,Mirdametinib,G8-1,G8,40,37713.0,722.191844,...,0.033084,0.003874,0.04585,-0.6077,0.197999,0.997895,0.484859,0.040119,107.155814,28.216124


In [8]:
organoid_merged.head()

Unnamed: 0,patient,object_id,unit,dose,treatment,image_set,Well,single_cell_count,Area.Size.Shape_Organoid_VOLUME,Area.Size.Shape_Organoid_CENTER.X,...,Texture_Organoid_Mito_Difference.Entropy_256.1,Texture_Organoid_Mito_Difference.Variance_256.1,Texture_Organoid_Mito_Entropy_256.1,Texture_Organoid_Mito_Information.Measure.of.Correlation.1_256.1,Texture_Organoid_Mito_Information.Measure.of.Correlation.2_256.1,Texture_Organoid_Mito_Inverse.Difference.Moment_256.1,Texture_Organoid_Mito_Sum.Average_256.1,Texture_Organoid_Mito_Sum.Entropy_256.1,Texture_Organoid_Mito_Sum.Variance_256.1,Texture_Organoid_Mito_Variance_256.1
0,NF0014,40,uM,10,Mirdametinib,G8-1,G8,8,6416256.0,769.689629,...,0.545338,0.003446,0.976656,-0.609637,0.758225,0.948133,5.216739,0.823947,483.70571,121.888125
1,NF0014,24,uM,1,Fimepinostat,D5-1,D5,5,3727848.0,904.163181,...,1.310975,0.002675,1.979332,-0.295282,0.647958,0.850992,4.07191,1.607417,262.768396,81.736101
2,NF0014,85,uM,1,Mirdametinib,F8-1,F8,9,19925581.0,609.403238,...,1.711179,0.001925,3.50347,-0.448653,0.930773,0.769392,9.46117,2.711064,168.775477,43.661196
3,NF0014,18,nM,10,STAURO,G11-1,G11,1,2721536.0,985.260263,...,0.455617,0.003506,0.780322,-0.633195,0.717287,0.950478,6.900637,0.678164,866.667507,219.817098
4,NF0014,24,uM,1,Binimetinib,G7-1,G7,6,6343511.0,925.204579,...,0.838981,0.003178,1.516453,-0.58463,0.844911,0.908857,14.740146,1.266238,2093.731127,527.137452


In [9]:
# save annotated profiles
sc_merged.to_parquet(sc_annotated_output_path, index=False)
organoid_merged.to_parquet(organoid_annotated_output_path, index=False)