In [None]:
def get_expression_of_ranked_genes(adata, selected_ranked_key, cluster_label, n_genes=10, report = False):
    top_genes = adata.uns[selected_ranked_key]['names'][cluster_label][:n_genes]
    if adata.raw:
        expression_levels = adata.raw[:, top_genes].X
        [print("Raw Data Extracted") if report else None]
        Expression_Raw = True 
    else:
        expression_levels = adata[:, top_genes].X
        [print("Preprocessed Data Extracted") if report else None]
        Expression_Raw = False 
    
    df_expression = pd.DataFrame(expression_levels.toarray(), columns=top_genes, index=adata.obs_names)
    
    return df_expression, Expression_Raw 

In [None]:
def get_full_profile(adata, selected_ranked_key, cluster_label, n_genes=10):
    top_genes    = adata.uns[selected_ranked_key]['names'][cluster_label][:n_genes]
    p_values     = adata.uns[selected_ranked_key]['pvals'][cluster_label][:n_genes] 
    p_values_adj = adata.uns[selected_ranked_key]['pvals_adj'][cluster_label][:n_genes] 
    scores       = adata.uns[selected_ranked_key]['scores'][cluster_label][:n_genes]  


    # Creating a DataFrame for the gene profiles
    full_profiles = pd.DataFrame({
        "p_values": p_values,
        "p_values_adj": p_values_adj,
        "scores": scores
    }, index=top_genes)
    
    return full_profiles 

In [None]:
def create_Cluster_Dicts(target_gene_number ,#     = target_gene_number, 
                   current_cluster_barcode  ,#= current_cluster_barcode, 
                   selected_cluster_key     ,#= selected_cluster_key, 
                   selected_ranked_key      ,#= selected_ranked_key 
                   report = False
                   ):
    # Step A
    ## Step A.1-) Get the cell barcodes for the current cluster 
    current_cell_type_adata_obs     = adata.obs[adata.obs[f"{selected_cluster_key}"] == current_cluster_barcode].copy() 
    current_cell_type_cell_barcodes = current_cell_type_adata_obs.index.values 
    current_cell_type_cell_barcodes

    # Step B
    ## Step B.1-) Get the expression of the top 10 genes in the current cluster by using current_cluster_barcode
    ## Parameters: Rank Key, Group/Cluster Barcode, Number of Genes 
    ## Output: Gene Expression Levels 
    current_gene_expressions, Expression_Raw = get_expression_of_ranked_genes(adata              = adata, 
                                                                             selected_ranked_key = selected_ranked_key, 
                                                                             cluster_label       = current_cluster_barcode, 
                                                                             n_genes             = target_gene_number,
                                                                             report = report) 
    # Step B.3-) Get the full profiles
    ## Output: Full Profile = P Values, P Values(Adj), Scores 
    full_profiles = get_full_profile(adata              = adata, 
                                    selected_ranked_key = selected_ranked_key, 
                                    cluster_label       = current_cluster_barcode, 
                                    n_genes             = target_gene_number) 


    # Step C: 
    ## Step C.1-) Get RANKED gene expressions 
    ranked_gene_expression = current_gene_expressions.loc[current_cell_type_cell_barcodes] 
    ## Step C.2-) Calculate means of the ranked gene expressions 
    ranked_gene_expression.loc['mean'] = ranked_gene_expression.mean() 
    ranked_gene_expression_means       = ranked_gene_expression.loc["mean":"mean"].values[0] 

    ## Step C.3-) Create a dictionary to store gene and expression level 
    ## Parameters: Gene, Expression Level 
    ## Output    : Gene and Expression Level Dictionary 
        
    # Step D-) Dataframing 
    genes                     = ranked_gene_expression.columns.to_list()  # Convert to list for alignment
    means_of_gene_expressions = ranked_gene_expression_means.tolist()  # Assuming this is already aligned with `genes`
    p_values                  = full_profiles["p_values"].tolist()  # Ensure this aligns with `genes`
    p_values_adj              = full_profiles["p_values_adj"].tolist()
    scores                    = full_profiles["scores"].tolist()
    cluster_labels            = [current_cluster_barcode] * target_gene_number  # Replicate cluster label
    raw_status                = len(full_profiles["scores"]) * [Expression_Raw]
    
    # Construct the DataFrame directly from a dictionary
    Cell_Gene_DF = pd.DataFrame({
        "Gene": genes,
        "Expression": means_of_gene_expressions,
        "P Value": p_values,
        "P Value(Adj)": p_values_adj,
        "Score": scores,
        "Cluster": cluster_labels
    })
    Cell_Gene_DF = Cell_Gene_DF[["Cluster", "Gene", "Expression", "P Value", "P Value(Adj)", "Score"]]

    
    
    
    ## Step C.4-) Dict Map Creation 
    gene_and_expressions = {} 
    full_profiles        = {} 
    for gene, expression,p_value, p_value_adj, score, status in zip(genes, means_of_gene_expressions, p_values, p_values_adj, scores, raw_status):
        # C4.A-) Gene and Expression Level
        gene_and_expressions[gene] = [expression] 
        # C4.B-) Full Profile 
        full_profiles[gene]        = [expression, p_value, p_value_adj, score, status] 

    ## Step C.5-) Create a dictionary to store cell type and gene and expression level 
    ## Step C.5.A-) Only Cluster(Cell Type) & Gene Expressions
    Cell_type_Gene_Gene_expression_Dict = {} 
    Cell_type_Gene_Gene_expression_Dict[ current_cluster_barcode ] = gene_and_expressions 
    Cell_type_Gene_Gene_expression_Dict
    ## Step C.5.B-) Cluster(Cell Type) & Gene Expressions, P Values, P Values(Adj), Scores 
    Full_Profile_Dict = {} 
    Full_Profile_Dict[ current_cluster_barcode ] = full_profiles 
    Full_Profile_Dict

    return Cell_type_Gene_Gene_expression_Dict, Full_Profile_Dict, Cell_Gene_DF

