In [1]:
import pathlib

import numpy as np
import pandas as pd
import umap

In [2]:
# set paths
# input path
data_path = pathlib.Path(
    "../../1.scDINO_run/outputdir/apoptosis_timelapse/CLS_features/CLS_features_annotated.parquet"
).resolve(strict=True)
# output path
output_path = pathlib.Path(
    "../../1.scDINO_run/outputdir/apoptosis_timelapse/CLS_features/CLS_features_annotated_umap.csv"
).resolve()
# shiny output path
shiny_output_path = pathlib.Path(
    "../temporal_shiny_app/CLS_features_annotated_umap.csv"
).resolve()

In [3]:
# load in data
cls_df = pd.read_parquet(data_path)
cls_df.head()

Unnamed: 0,Metadata_image_path,Metadata_Well,Metadata_FOV,Metadata_Time,Metadata_ImageNumber,Metadata_Nuclei_Number_Object_Number,Metadata_compound,Metadata_dose,Metadata_control,channel488-1_cls_feature_0,...,channel_DNA_cls_feature_90,channel_DNA_cls_feature_91,channel_DNA_cls_feature_92,channel_DNA_cls_feature_93,channel_DNA_cls_feature_94,channel_DNA_cls_feature_95,channel_DNA_cls_feature_96,channel_DNA_cls_feature_97,channel_DNA_cls_feature_98,channel_DNA_cls_feature_99
0,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,10,Staurosporine,0.0,negative,-0.013328,...,-0.026818,-0.025145,0.022802,0.04424,-0.048204,0.003988,0.005564,0.033881,0.082196,0.009121
1,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,100,Staurosporine,0.0,negative,0.00363,...,0.024008,0.002716,-0.05946,0.032852,-0.057521,0.031904,0.0175,0.051653,0.02443,-0.034758
2,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,101,Staurosporine,0.0,negative,0.013816,...,-0.00775,0.024941,0.0273,0.034876,-0.127687,-0.014743,0.033227,0.008936,0.031277,-0.031652
3,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,102,Staurosporine,0.0,negative,0.026384,...,-0.041821,-0.0283,-0.034476,0.032469,-0.054544,0.02106,0.028284,-0.006381,0.101471,-0.018005
4,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,103,Staurosporine,0.0,negative,-0.002657,...,-0.029687,-0.017399,-0.01186,0.045444,-0.077282,0.00077,0.076381,-0.007281,0.015706,-0.011349


In [4]:
# get the metadata
metadata_df = cls_df.columns[cls_df.columns.str.contains("Metadata")]
metadata_df = cls_df[metadata_df]
feature_df = cls_df.drop(metadata_df.columns, axis=1)
print(f"metadata_df shape: {metadata_df.shape}")
print(f"feature_df shape: {feature_df.shape}")

metadata_df shape: (240048, 9)
feature_df shape: (240048, 1536)


In [5]:
# define the UMAP model
umap_model = umap.UMAP(
    n_components=2, random_state=0, n_neighbors=30, min_dist=0.1, metric="euclidean"
)

# fit the UMAP model
umap_embedding = umap_model.fit_transform(feature_df)
umap_embedding_df = pd.DataFrame(umap_embedding, columns=["UMAP1", "UMAP2"])
# add the metadata back
umap_embedding_df = pd.concat([metadata_df, umap_embedding_df], axis=1)
umap_embedding_df.head()

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


Unnamed: 0,Metadata_image_path,Metadata_Well,Metadata_FOV,Metadata_Time,Metadata_ImageNumber,Metadata_Nuclei_Number_Object_Number,Metadata_compound,Metadata_dose,Metadata_control,UMAP1,UMAP2
0,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,10,Staurosporine,0.0,negative,0.3361,2.145964
1,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,100,Staurosporine,0.0,negative,0.330319,5.186306
2,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,101,Staurosporine,0.0,negative,-1.384449,2.012407
3,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,102,Staurosporine,0.0,negative,1.602488,3.677288
4,../../data/processed_images/crops/C-02/image_n...,C-02,1,10,10,103,Staurosporine,0.0,negative,0.403058,3.174097


In [6]:
# save the UMAP embeddings to parquet
umap_embedding_df.to_csv(output_path)

# save the UMAP embeddings to shiny app
umap_embedding_df.to_csv(shiny_output_path)