In [None]:
def find_closest_cell_types(predicted_embeddings, map_dict):
    closest_cell_types = []
    for pred_embedding in predicted_embeddings:
        # Initialize a dictionary to hold similarity scores for each cell type
        similarity_scores = {}
        for cell_type, ct_embedding in map_dict.items():
            # Calculate similarity; using cosine similarity as an example
            similarity = calculate_distance(pred_embedding, ct_embedding, method="cosine")
            similarity_scores[cell_type] = similarity
        
        # Find the cell type with the highest similarity score
        closest_cell_type = max(similarity_scores, key=similarity_scores.get)
        closest_cell_types.append(closest_cell_type)
    
    return closest_cell_types


In [None]:
def create_map_dict(true_labels, embeddings):
    # Determine the input type for embeddings
    if isinstance(embeddings, pd.Series):
        # Handling pd.Series of lists or arrays
        unique_labels = pd.unique(true_labels)
        unique_embeddings = embeddings.drop_duplicates().tolist()
    else:
        # Assuming embeddings is a numpy array or a list of arrays/lists
        if isinstance(embeddings, list):
            # Convert list of embeddings to a 2D numpy array
            embeddings = np.array(embeddings)
        
        if embeddings.ndim == 2:
            # Handle as a 2D numpy array directly
            _, unique_indices = np.unique(embeddings, axis=0, return_index=True)
            unique_labels = np.array(true_labels)[unique_indices]
            unique_embeddings = embeddings[unique_indices]
        else:
            # Fallback for other structures
            print("Unsupported structure for embeddings.")
            return {}

    # Create Label Map Dictionary
    label_map_dict = {label: embedding for label, embedding in zip(unique_labels, unique_embeddings)}

    return label_map_dict


In [None]:

# Map: Outlier 
Cell_Subject_Out_1_Aut_0_DR_0_Map   = create_map_dict(Cell_Type, subject_data_full["Cell_Type_Embeddings_Outlier"]) 
Subject_Out_1_Aut_0_DR_0_Map   = create_map_dict(Gene_Marker, subject_data_full["Gene_Marker_Embeddings_Outlier"])  
# Map: Autoencoder
Cell_Subject_Out_1_Aut_1_DR_0_Map   = create_map_dict(Cell_Type, subject_data_full["Cell_Type_Embeddings_Autoencoder"]) 
Subject_Out_1_Aut_1_DR_0_Map   = create_map_dict(Gene_Marker, subject_data_full["Gene_Markers_Autoencoder"])  
# Map: Dimension Reduction
Cell_Subject_Out_1_Aut_1_DR_1_Map   = create_map_dict(Cell_Type, subject_data_full["Cell_Types_Dimension"]) 
Subject_Out_1_Aut_1_DR_1_Map   = create_map_dict(Gene_Marker, subject_data_full["Gene_Markers_Dimension"])  
