# Aggregate feature selected profiles

## Import libraries

In [1]:
import pathlib

import pandas as pd
from pycytominer import aggregate

## Set paths and variables

In [2]:
# set paths
input_profile_dir = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm_fs.parquet"
).resolve(strict=True)
output_profile_dir = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm_fs_aggregated.parquet"
).resolve()
fs_df = pd.read_parquet(input_profile_dir)
fs_df.head()

Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_FOV,Metadata_Time,Metadata_Cells_Number_Object_Number,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,7,...,-0.996898,-0.838599,-0.085856,0.080005,0.918828,1.373562,0.980376,-0.12352,2.401852,1.516202
1,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,9,...,0.612554,-0.134966,0.358887,0.486711,0.074517,-1.604183,2.407552,-0.835988,-0.264486,0.153676
2,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,10,...,-0.12317,0.28892,0.687862,-0.039333,-1.202663,0.373103,1.048282,-0.359857,0.659583,0.537619
3,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,11,...,-2.376413,-0.184088,0.934017,-0.800594,0.178355,-1.380925,-0.285477,0.211796,0.443178,1.129714
4,1,C-09,168,Staurosporine,39.06,positive,1,2,0.0,12,...,-0.825394,0.715618,-0.786748,-0.963768,-0.90653,0.786611,-1.693796,-1.694061,-0.22899,0.648714


## Perform aggregation

In [3]:
metadata_cols = fs_df.columns[fs_df.columns.str.contains("Metadata")].to_list()
feature_cols = fs_df.columns[~fs_df.columns.str.contains("Metadata")].to_list()
selected_metadata_cols = [
    "Metadata_Well",
    "Metadata_plate",
    "Metadata_compound",
    "Metadata_dose",
    "Metadata_control",
    "Metadata_Time",
]
feature_cols = fs_df.columns[~fs_df.columns.str.contains("Metadata")].to_list()
feature_cols = ["Metadata_number_of_singlecells"] + feature_cols

aggregated_df = aggregate(
    fs_df,
    features=feature_cols,
    strata=["Metadata_Well", "Metadata_Time", "Metadata_dose"],
    operation="median",
)
aggregated_df = pd.merge(
    aggregated_df,
    fs_df[selected_metadata_cols],
    how="left",
    on=["Metadata_Well", "Metadata_Time", "Metadata_dose"],
)
aggregated_df.drop_duplicates(inplace=True, ignore_index=True)

# rearrange the columns such that the metadata columns are first
for col in reversed(aggregated_df.columns):
    if col.startswith("Metadata_"):
        tmp_pop = aggregated_df.pop(col)
        aggregated_df.insert(0, col, tmp_pop)

print(aggregated_df.shape)
aggregated_df.to_parquet(output_profile_dir)
aggregated_df.head()

(390, 2343)


Unnamed: 0,Metadata_Well,Metadata_Time,Metadata_dose,Metadata_number_of_singlecells,Metadata_plate,Metadata_compound,Metadata_control,Cells_AreaShape_Compactness_CP,Cells_AreaShape_Eccentricity_CP,Cells_AreaShape_Extent_CP,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,C-02,0.0,0.0,178.0,1,Staurosporine,negative,-0.225866,0.362134,0.195628,...,0.123994,-0.169231,-0.000171,-0.000714,0.219109,0.060974,0.073445,-0.099431,-0.202662,0.069599
1,C-02,1.0,0.0,181.0,1,Staurosporine,negative,-0.101367,0.212559,0.160123,...,-0.110603,0.076735,0.057752,0.06512,-0.253076,0.088509,-0.046158,0.118233,-0.090497,-0.116096
2,C-02,10.0,0.0,168.0,1,Staurosporine,negative,0.018758,0.438709,0.017725,...,-0.077459,-0.07605,0.179852,0.079523,-0.53458,0.115206,0.033339,0.17914,-0.031637,-0.142134
3,C-02,11.0,0.0,174.0,1,Staurosporine,negative,-0.041979,0.460182,-0.009282,...,-0.121237,-0.075781,0.19784,0.229394,-0.400539,0.074929,-0.064636,0.225648,-0.063553,-0.139911
4,C-02,12.0,0.0,175.0,1,Staurosporine,negative,0.045544,0.423336,0.05703,...,0.035974,-0.044032,0.49004,0.287544,-0.584386,0.145022,0.071476,0.225753,-0.012832,-0.152288
