# Annotate merged single cells with metadata from platemap file

## Import libraries

In [1]:
import pathlib
import sys

import pandas as pd
from pycytominer import annotate
from pycytominer.cyto_utils import output

## Set paths and variables

In [2]:
# load in platemap file as a pandas dataframe
platemap_path = pathlib.Path("../../data/").resolve()

# directory where parquet files are located
data_dir = pathlib.Path("../data/converted_data")

# directory where the annotated parquet files are saved to
output_dir = pathlib.Path("../data/annotated_data")
output_dir.mkdir(exist_ok=True)

In [3]:
# dictionary with each run for the cell type
dict_of_inputs = {
    "run_20230920ChromaLiveTL_24hr4ch_MaxIP": {
        "source_path": pathlib.Path(
            f"{data_dir}/20230920ChromaLiveTL_24hr4ch_MaxIP.parquet"
        ).resolve(strict=True),
        "platemap_path": pathlib.Path(f"{platemap_path}/platemap_24h.csv").resolve(
            strict=True
        ),
    },
    "run_20231017ChromaLive_6hr_4ch_MaxIP": {
        "source_path": pathlib.Path(
            f"{data_dir}/20231017ChromaLive_6hr_4ch_MaxIP.parquet"
        ).resolve(strict=True),
        "platemap_path": pathlib.Path(f"{platemap_path}/platemap_6hr_4ch.csv").resolve(
            strict=True
        ),
    },
    "run_20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP": {
        "source_path": pathlib.Path(
            f"{data_dir}/20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP.parquet"
        ).resolve(strict=True),
        "platemap_path": pathlib.Path(
            f"{platemap_path}/platemap_AnnexinV_2ch.csv"
        ).resolve(strict=True),
    },
}

## Annotate merged single cells

In [4]:
single_cell_df = pd.read_parquet(
    f"{data_dir}/20230920ChromaLiveTL_24hr4ch_MaxIP.parquet"
)
platemap_df = pd.read_csv(f"{platemap_path}/platemap_24h.csv")

In [5]:
print(single_cell_df.shape)
single_cell_df.head()

(50357, 2108)


Unnamed: 0,Metadata_ImageNumber,Image_Metadata_FOV,Metadata_number_of_singlecells,Image_Metadata_Time,Image_Metadata_Well,Metadata_Cells_Number_Object_Number,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,Metadata_Nuclei_Number_Object_Number,Cytoplasm_AreaShape_Area,...,Nuclei_TrackObjects_DistanceTraveled_50,Nuclei_TrackObjects_FinalAge_50,Nuclei_TrackObjects_IntegratedDistance_50,Nuclei_TrackObjects_Label_50,Nuclei_TrackObjects_Lifetime_50,Nuclei_TrackObjects_Linearity_50,Nuclei_TrackObjects_ParentImageNumber_50,Nuclei_TrackObjects_ParentObjectNumber_50,Nuclei_TrackObjects_TrajectoryX_50,Nuclei_TrackObjects_TrajectoryY_50
0,1,1,4674,1,C-05,1,1,3,3,3186.0,...,0.0,,0.0,3,1,1.0,0,0,0,0
1,1,1,4674,1,C-05,2,2,4,4,2715.0,...,0.0,,0.0,4,1,1.0,0,0,0,0
2,1,1,4674,1,C-05,7,7,9,9,1974.0,...,0.0,,0.0,9,1,1.0,0,0,0,0
3,1,1,4674,1,C-05,8,8,10,10,1235.0,...,0.0,,0.0,10,1,1.0,0,0,0,0
4,1,1,4674,1,C-05,9,9,11,11,1261.0,...,0.0,,0.0,11,1,1.0,0,0,0,0


In [7]:
for data_run, info in dict_of_inputs.items():
    # load in converted parquet file as df to use in annotate function
    single_cell_df = pd.read_parquet(info["source_path"])
    platemap_df = pd.read_csv(info["platemap_path"])
    output_file = str(pathlib.Path(f"{output_dir}/{data_run}_sc.parquet"))
    print(f"Adding annotations to merged single cells for {data_run}!")

    # add metadata from platemap file to extracted single cell features
    annotated_df = annotate(
        profiles=single_cell_df,
        platemap=platemap_df,
        join_on=["Metadata_well", "Image_Metadata_Well"],
    )

    # move metadata well and single cell count to the front of the df (for easy visualization in python)
    well_column = annotated_df.pop("Metadata_Well")
    singlecell_column = annotated_df.pop("Metadata_number_of_singlecells")
    # insert the column as the second index column in the dataframe
    annotated_df.insert(1, "Metadata_Well", well_column)
    annotated_df.insert(2, "Metadata_number_of_singlecells", singlecell_column)

    # save annotated df as parquet file
    output(
        df=annotated_df,
        output_filename=output_file,
        output_type="parquet",
    )
    print(f"Annotations have been added to {data_run} and saved!")
    # check last annotated df to see if it has been annotated correctly
    print(annotated_df.shape)
    annotated_df.head()

Adding annotations to merged single cells for run_20230920ChromaLiveTL_24hr4ch_MaxIP!
Annotations have been added to run_20230920ChromaLiveTL_24hr4ch_MaxIP and saved!
(74694, 2112)
Adding annotations to merged single cells for run_20231017ChromaLive_6hr_4ch_MaxIP!
Annotations have been added to run_20231017ChromaLive_6hr_4ch_MaxIP and saved!
(197389, 2112)
Adding annotations to merged single cells for run_20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP!
Annotations have been added to run_20231017ChromaLive_endpoint_w_AnnexinV_2ch_MaxIP and saved!
(12879, 1201)
