# Aggregate feature selected profiles

## Import libraries

In [1]:
import pathlib

import pandas as pd
from pycytominer import aggregate

## Set paths and variables

In [2]:
# set paths
input_profile_dir = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm_fs.parquet"
).resolve(strict=True)
output_profile_dir = pathlib.Path(
    "../data/CP_scDINO_features/combined_CP_scDINO_norm_fs_aggregated.parquet"
).resolve()
fs_df = pd.read_parquet(input_profile_dir)

## Perform aggregation

In [3]:
metadata_cols = fs_df.columns[fs_df.columns.str.contains("Metadata")]
feature_cols = fs_df.columns[~fs_df.columns.str.contains("Metadata")].to_list()

aggregated_df = aggregate(
    fs_df,
    features=feature_cols,
    strata=["Metadata_Well", "Metadata_Time", "Metadata_dose"],
    operation="median",
)

print(aggregated_df.shape)
aggregated_df.to_parquet(output_profile_dir)
aggregated_df.head()

(390, 2341)


Unnamed: 0,Metadata_Well,Metadata_Time,Metadata_dose,Cells_AreaShape_BoundingBoxArea_CP,Cells_AreaShape_Compactness_CP,Cells_AreaShape_Extent_CP,Cells_AreaShape_FormFactor_CP,Cells_AreaShape_MinorAxisLength_CP,Cells_AreaShape_Solidity_CP,Cells_AreaShape_Zernike_1_1_CP,...,channel_DNA_cls_feature_91_scDINO,channel_DNA_cls_feature_92_scDINO,channel_DNA_cls_feature_93_scDINO,channel_DNA_cls_feature_94_scDINO,channel_DNA_cls_feature_95_scDINO,channel_DNA_cls_feature_96_scDINO,channel_DNA_cls_feature_97_scDINO,channel_DNA_cls_feature_98_scDINO,channel_DNA_cls_feature_99_scDINO,channel_DNA_cls_feature_9_scDINO
0,C-02,0.0,0.0,-0.29176,-0.312209,0.311346,0.138225,-0.253509,0.408095,-0.049747,...,0.208938,-0.07363,0.008897,-0.055376,0.131249,-0.053869,0.12186,0.006765,-0.151495,0.121353
1,C-02,1.0,0.0,-0.028198,-0.394414,0.313888,0.26044,0.312064,0.460261,-0.253127,...,-0.080875,0.047516,-0.007985,0.057872,-0.307675,0.226858,-0.097667,0.154435,0.009346,-0.100267
2,C-02,10.0,0.0,0.182481,-0.209134,0.13744,-0.00637,0.309906,0.357322,-0.434222,...,-0.104633,0.049896,0.15582,0.073132,-0.514341,0.12568,0.01213,0.206564,-0.001114,-0.069553
3,C-02,11.0,0.0,0.141672,-0.285466,0.255858,0.099816,0.399589,0.385191,-0.438585,...,-0.087787,-0.101442,0.275884,0.209213,-0.459635,0.113541,-0.091702,0.251352,0.081938,-0.135812
4,C-02,12.0,0.0,0.152554,-0.308668,0.294745,0.133101,0.450444,0.414189,-0.436021,...,0.009973,-0.036503,0.473306,0.311591,-0.641266,0.173728,0.024222,0.272495,0.057495,-0.097702
