In [1]:
import pathlib

import pandas as pd
from pycytominer import normalize
from pycytominer.cyto_utils import output

In [2]:
# set path to data

combined_data_path = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_data.parquet"
).resolve(strict=True)

# set output path
normalized_data_output_path = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm.parquet"
).resolve()

# load data
combined_data = pd.read_parquet(combined_data_path)
print(combined_data.shape)
combined_data.head()

(188065, 3873)


Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_90_scDINO,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO
0,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,7,...,0.010555,-0.029674,-0.048203,0.030452,-0.091131,0.030641,0.043315,0.036352,0.043984,0.022231
1,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,9,...,-0.007216,0.010287,-0.03158,0.037308,-0.075982,0.011744,-0.021006,0.074796,0.023915,-0.014053
2,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,10,...,-0.0208,-0.00798,-0.021566,0.042379,-0.095577,-0.016842,0.021705,0.038181,0.037327,-0.001478
3,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,11,...,-0.006244,-0.063925,-0.032741,0.046174,-0.123933,0.014068,-0.016183,0.002253,0.053429,-0.004423
4,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,12,...,-0.028743,-0.025415,-0.011485,0.019648,-0.130011,-0.010214,0.030637,-0.035683,-0.000255,-0.01357


In [3]:
# Get columns that contain "Metadata"
metadata_features = combined_data.columns[
    combined_data.columns.str.contains("Metadata")
].tolist()

# get the feature columns
feature_columns = combined_data.columns.difference(metadata_features).to_list()

In [4]:
# Normalize the single cell data per time point

# get the unique time points
time_points = combined_data.Metadata_Time.unique()

output_dict_of_normalized_dfs = {}

# normalize annotated data
normalized_df = normalize(
    # df with annotated raw merged single cell features
    profiles=combined_data,
    features=feature_columns,
    meta_features=metadata_features,
    # specify samples used as normalization reference (negative control)
    samples="Metadata_compound == 'Staurosporine' and Metadata_dose == '0.0' and Metadata_Time == '0.0'",
    # normalization method used
    method="standardize",
)

output(
    normalized_df,
    output_filename=normalized_data_output_path,
    output_type="parquet",
)
print("Single cells have been normalized!")
# check to see if the features have been normalized
print(normalized_df.shape)
normalized_df.head()

Single cells have been normalized!
(188065, 3873)


Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,7,...,-0.996898,-0.838599,-0.085856,0.080005,0.918828,1.373562,0.980376,-0.12352,2.401852,1.516202
1,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,9,...,0.612554,-0.134966,0.358887,0.486711,0.074517,-1.604183,2.407552,-0.835988,-0.264486,0.153676
2,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,10,...,-0.12317,0.28892,0.687862,-0.039333,-1.202663,0.373103,1.048282,-0.359857,0.659583,0.537619
3,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,11,...,-2.376413,-0.184088,0.934017,-0.800594,0.178355,-1.380925,-0.285477,0.211796,0.443178,1.129714
4,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,12,...,-0.825394,0.715618,-0.786748,-0.963768,-0.90653,0.786611,-1.693796,-1.694061,-0.22899,0.648714
