In [1]:
import numpy as np

In [2]:
def transform_labels(arr: np.ndarray) -> np.ndarray:
    """
    Transform label of the form
    'batch1/FUSHeterozygous/Untreated/rep1/ANXA11'
    into
    'ANXA11_FUSHeterozygous_Untreated_batch1_rep1'.
    """
    new_arr = []
    for s in arr:
        parts = s.split("/")
        # parts = [batch, cellline, condition, rep, marker]
        batch, cellline, condition, rep, marker = parts
        new_str = f"{marker}_{cellline}_{condition}_{batch}_{rep}"
        new_arr.append(new_str)
    return np.array(new_arr)

In [3]:
cyto_emb_folder_path = "/home/projects/hornsteinlab/Collaboration/MOmaps/outputs/models_outputs_cytoself_qsplit9/embeddings/NIH_cytoself/vqindhist1/"
batches = [f'batch{i}' for i in [1,2,3]]

for batch in batches:
    labels = np.load(f"{cyto_emb_folder_path}{batch}/vqindhist1_labels_all.npy")
    batch_labels = transform_labels(labels)
    batch_labels_path = f"{cyto_emb_folder_path}{batch}/testset_labels.npy"
    np.save(batch_labels_path, batch_labels)
    print(f"Saved {len(batch_labels)} labels to {batch_labels_path}")

Saved 15268 labels to /home/projects/hornsteinlab/Collaboration/MOmaps/outputs/models_outputs_cytoself_qsplit9/embeddings/NIH_cytoself/vqindhist1/batch1/testset_labels.npy
Saved 11218 labels to /home/projects/hornsteinlab/Collaboration/MOmaps/outputs/models_outputs_cytoself_qsplit9/embeddings/NIH_cytoself/vqindhist1/batch2/testset_labels.npy
Saved 10906 labels to /home/projects/hornsteinlab/Collaboration/MOmaps/outputs/models_outputs_cytoself_qsplit9/embeddings/NIH_cytoself/vqindhist1/batch3/testset_labels.npy
