In [354]:
import os
import sys
import numpy as np
import pandas as pd
sys.path.append('/cellar/users/aklie/opt/gene_program_evaluation')
sys.path.append('/cellar/users/aklie/opt/gene_program_evaluation/src/plotting/dashapp/example_app')
import mudata

from data_processing import load_data

In [355]:
# Insert geneset enrichment into mudata
def insert_enrichment(mdata, df, library="GSEA", prog_key="prog",
                      geneset_index="Term", program_index="program_name",
                      varmap_name_prefix="gsea_varmap"):
    
    # Create a mudata key to column name mapping dictionary
    mudata_keys_dict = {}
    for col in df.columns:
        if col not in [geneset_index, program_index]:
            key = f"{col}_{library}"
            key = key.replace(' ', '_').replace('%', 'percent')
            mudata_keys_dict[key] = col

    print(mudata_keys_dict)

    # Insert the values from the dataframe into the array for each key
    for key, colname in mudata_keys_dict.items():
        
        # Create an empty dataframe with the right dimensions
        all_progs_df = pd.DataFrame(index=df[geneset_index].unique(), 
                                    columns=mdata[prog_key].var.index)
        
        # Pivot the dataframe for gene sets and programs
        pivot_df = df[[geneset_index, program_index, colname]].pivot(index=geneset_index, 
                                                                     columns=program_index, 
                                                                     values=colname)
        
        # Update the empty dataframe with new values
        all_progs_df[pivot_df.columns] = pivot_df
        
        # Convert dataframe to a numpy array
        if all_progs_df.dtypes[0] == 'float64':
            all_progs_array = all_progs_df.T.to_numpy(dtype='float64')
        else:
            all_progs_array = all_progs_df.T.to_numpy()
        
        # Add the array into the MuData object
        mdata[prog_key].varm[key] = all_progs_array
        
    # Add the varmap to the mudata object
    varmap_name = f"{varmap_name_prefix}_{library}"
    mdata[prog_key].uns[varmap_name] = all_progs_df.index


# Now make a function that inverses the above, and takes the values from the mudata object and puts them into a dataframe
def extract_enrichment(
    mdata, library="GSEA", prog_key="prog", geneset_index="Term", program_index="program_name",
    varmap_name_prefix="gsea_varmap"
):
    # Create a mudata key to column name mapping dictionary
    mudata_keys_dict = {}
    for key in mdata[prog_key].varm.keys():
        if library in key:
            colname = key.replace(f"_{library}", "")
            mudata_keys_dict[colname] = key
    print(mudata_keys_dict)
    
    # Unpivot the programs and genesets
    programs = mdata[prog_key].var.index.tolist()
    genesets = mdata[prog_key].uns[f"{varmap_name_prefix}_{library}"].tolist()
    
    # Create an empty dataframe
    df = pd.DataFrame(index=genesets, columns=programs)

    # Melting the dataframe
    df = df.melt(value_vars=programs, var_name=program_index, value_name="value", ignore_index=False).reset_index()
    df.columns = [geneset_index, program_index, "value"]
    df = df.drop(columns="value")
    
    # For each key in the dictionary, extract the values from the mudata object and put them into the dataframe
    for colname, key in mudata_keys_dict.items():
        # Extract the values from the mudata object
        print(colname, key)
        all_progs_array = mdata[prog_key].varm[key].flatten()
        
        # Add the values to the dataframe
        df[colname] = all_progs_array

    # Drop any rows with NaN values
    df = df.dropna()
    
    return df

In [356]:
path_excel = "/cellar/users/aklie/opt/gene_program_evaluation/src/plotting/dashapp/example_data/cNMF_evaluation_output.xlsx"
path_mdata = "/cellar/users/aklie/opt/gene_program_evaluation/src/plotting/dashapp/example_data/cNMF_evaluation_dashapp_data.h5mu"

In [357]:
path_out = "/cellar/users/aklie/opt/gene_program_evaluation/src/plotting/dashapp/example_data/example_data.h5mu"

In [358]:
explained_variance, enrichment_gsea, enrichment_motif, enrichment_gwas = load_data(path_excel)

In [359]:
mdata = mudata.read_h5mu(path_mdata)
mdata



In [360]:
mdata.mod["cNMF"].var_names = pd.Index(["K60_" + x for x in mdata.mod["cNMF"].var_names])

# Add variance explained

In [361]:
mdata.mod["cNMF"].var = mdata.mod["cNMF"].var.merge(explained_variance, left_index=True, right_on="ProgramID").set_index("ProgramID")
mdata.mod["cNMF"].var.head()

Unnamed: 0_level_0,VarianceExplained
ProgramID,Unnamed: 1_level_1
K60_1,0.000643
K60_2,0.000708
K60_3,0.000297
K60_4,0.000783
K60_5,0.000116


# Add GSEA

In [362]:
insert_enrichment(
    mdata, 
    enrichment_gsea, 
    library="GSEA", 
    prog_key="cNMF",
    geneset_index="ID", 
    program_index="ProgramID",
    varmap_name_prefix="gsea_varmap"
)

{'Description_GSEA': 'Description', 'pvalue_GSEA': 'pvalue', 'p.adjust_GSEA': 'p.adjust', 'qvalue_GSEA': 'qvalue'}


In [363]:
inversed_df = extract_enrichment(
    mdata, 
    library="GSEA", 
    prog_key="cNMF",
    geneset_index="ID", 
    program_index="ProgramID",
    varmap_name_prefix="gsea_varmap"
)

{'Description': 'Description_GSEA', 'pvalue': 'pvalue_GSEA', 'p.adjust': 'p.adjust_GSEA', 'qvalue': 'qvalue_GSEA'}
Description Description_GSEA
pvalue pvalue_GSEA
p.adjust p.adjust_GSEA
qvalue qvalue_GSEA


In [364]:
# Find index of GO:0000018 in varmap and of K60_3 in var_names
geneset_index = mdata["cNMF"].uns["gsea_varmap_GSEA"].tolist().index("GO:0000018")
prog_index = mdata["cNMF"].var_names.tolist().index("K60_3")
enrichment_gsea[(enrichment_gsea["ID"] == "GO:0000018") & (enrichment_gsea["ProgramID"] == "K60_3")]

Unnamed: 0,ProgramID,ID,Description,pvalue,p.adjust,qvalue
5119,K60_3,GO:0000018,regulation of DNA recombination,2.03701e-15,6.587377e-14,5.380346e-14


In [365]:
# Find values in mdata.varm
mdata["cNMF"].varm["qvalue_GSEA"][prog_index, geneset_index], mdata["cNMF"].varm["Description_GSEA"][prog_index, geneset_index]

(5.38034585993897e-14, 'regulation of DNA recombination')

In [366]:
# Find values of GO:0000018 K60_3 in original dataframe
inversed_df[(inversed_df["ID"] == "GO:0000018") & (inversed_df["ProgramID"] == "K60_3")]

Unnamed: 0,ID,ProgramID,Description,pvalue,p.adjust,qvalue
11676,GO:0000018,K60_3,regulation of DNA recombination,2.03701e-15,6.587377e-14,5.380346e-14


In [367]:
sorted_df = enrichment_gsea.sort_values(by=["ID", "ProgramID"])
sorted_inversed_df = inversed_df.sort_values(by=["ID", "ProgramID"])

In [368]:
# Check if the pvalue column, Description, ID, ProgramID is the same
assert sorted_df["pvalue"].tolist() == sorted_inversed_df["pvalue"].tolist()
assert sorted_df["qvalue"].tolist() == sorted_inversed_df["qvalue"].tolist()
assert sorted_df["Description"].tolist() == sorted_inversed_df["Description"].tolist()
assert sorted_df["ID"].tolist() == sorted_inversed_df["ID"].tolist()

# Add motif enrichment analysis

In [369]:
enrichment_motif["EPType-TFMotif"] = enrichment_motif["EPType"] + "-" + enrichment_motif["TFMotif"]

In [370]:
insert_enrichment(
    mdata, 
    enrichment_motif, 
    library="Motif", 
    prog_key="cNMF",
    geneset_index="EPType-TFMotif",
    program_index="ProgramID",
    varmap_name_prefix="motif_varmap"
)

{'EPType_Motif': 'EPType', 'TFMotif_Motif': 'TFMotif', 'PValue_Motif': 'PValue', 'FDR_Motif': 'FDR', 'Enrichment_Motif': 'Enrichment'}


In [371]:
inversed_df = extract_enrichment(
    mdata, 
    library="Motif", 
    prog_key="cNMF",
    geneset_index="EPType-TFMotif",
    program_index="ProgramID",
    varmap_name_prefix="motif_varmap"
)

{'EPType': 'EPType_Motif', 'TFMotif': 'TFMotif_Motif', 'PValue': 'PValue_Motif', 'FDR': 'FDR_Motif', 'Enrichment': 'Enrichment_Motif'}
EPType EPType_Motif
TFMotif TFMotif_Motif
PValue PValue_Motif
FDR FDR_Motif
Enrichment Enrichment_Motif


# Add GWAS trait enrichment analysis

In [373]:
insert_enrichment(
    mdata, 
    enrichment_gwas, 
    library="GWAS", 
    prog_key="cNMF",
    geneset_index="Term",
    program_index="ProgramID",
    varmap_name_prefix="gwas_varmap"
)

{'program_name_GWAS': 'program_name', 'Gene_set_GWAS': 'Gene_set', 'P-value_GWAS': 'P-value', 'Adjusted_P-value_GWAS': 'Adjusted P-value', 'Odds_Ratio_GWAS': 'Odds Ratio', 'Combined_Score_GWAS': 'Combined Score', 'Genes_GWAS': 'Genes', 'overlap_numerator_GWAS': 'overlap_numerator', 'overlap_denominator_GWAS': 'overlap_denominator'}


In [374]:
inversed_df = extract_enrichment(
    mdata, 
    library="GWAS", 
    prog_key="cNMF",
    geneset_index="Term",
    program_index="ProgramID",
    varmap_name_prefix="gwas_varmap"
)

{'program_name': 'program_name_GWAS', 'Gene_set': 'Gene_set_GWAS', 'P-value': 'P-value_GWAS', 'Adjusted_P-value': 'Adjusted_P-value_GWAS', 'Odds_Ratio': 'Odds_Ratio_GWAS', 'Combined_Score': 'Combined_Score_GWAS', 'Genes': 'Genes_GWAS', 'overlap_numerator': 'overlap_numerator_GWAS', 'overlap_denominator': 'overlap_denominator_GWAS'}
program_name program_name_GWAS
Gene_set Gene_set_GWAS
P-value P-value_GWAS
Adjusted_P-value Adjusted_P-value_GWAS
Odds_Ratio Odds_Ratio_GWAS
Combined_Score Combined_Score_GWAS
Genes Genes_GWAS
overlap_numerator overlap_numerator_GWAS
overlap_denominator overlap_denominator_GWAS


# Add more modalities

In [385]:
# Create an identical "cNMF" modality that just has the first k var_names
num_progs = [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 21, 23, 25, 27, 29, 30, 35, 40, 45, 50, 55, 60]
for k in num_progs:
    mdata.mod[f"cNMF_{k}"] = mdata.mod["cNMF"][:, :k].copy()
mdata

# Write

In [376]:
mdata["cNMF"].varm["Description_GSEA"]

array([['intrinsic apoptotic signaling pathway in response to endoplasmic reticulum stress',
        'response to endoplasmic reticulum stress',
        'neutral amino acid transmembrane transporter activity', ...,
        nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=object)

In [377]:
# We need to convert each value in varm from an array of objects to an array of strings
for key in mdata["cNMF"].varm.keys():
    # If type is object
    if mdata["cNMF"].varm[key].dtype == "O":
        mdata["cNMF"].varm[key] = mdata["cNMF"].varm[key].astype(str)
        mdata["cNMF_30"].varm[key] = mdata["cNMF_30"].varm[key].astype(str)

In [378]:
mdata["cNMF"].varm["Description_GSEA"]

array([['intrinsic apoptotic signaling pathway in response to endoplasmic reticulum stress',
        'response to endoplasmic reticulum stress',
        'neutral amino acid transmembrane transporter activity', ...,
        'nan', 'nan', 'nan'],
       ['nan', 'nan', 'nan', ..., 'nan', 'nan', 'nan'],
       ['nan', 'nan', 'nan', ..., 'nan', 'nan', 'nan'],
       ...,
       ['nan', 'nan', 'nan', ..., 'nan', 'nan', 'nan'],
       ['nan', 'nan', 'nan', ..., 'nan', 'nan', 'nan'],
       ['nan', 'nan', 'nan', ..., 'nan', 'nan', 'nan']], dtype='<U193')

In [379]:
mdata["cNMF"].uns["gsea_varmap_GSEA"]

Index(['GO:0070059', 'GO:0034976', 'GO:0015175', 'GO:0036499', 'GO:0006986',
       'GO:0006984', 'GO:0034620', 'GO:0036003', 'GO:0015179', 'GO:0015171',
       ...
       'GO:0061900', 'GO:0032461', 'GO:0061333', 'GO:0044232', 'GO:0016461',
       'GO:0005640', 'GO:0044233', 'GO:0097440', 'GO:0003382', 'GO:1904754'],
      dtype='object', length=4461)

In [380]:
# We need to convert each value in uns from an array of objects to an array of strings
# use string_array = np.array([str(element) for element in object_array])a
for key in mdata["cNMF"].uns.keys():
    # If type is object
    if mdata["cNMF"].uns[key].dtype == "O":
        print(key)
        mdata["cNMF"].uns[key] = np.array([str(element) for element in mdata["cNMF"].uns[key]])
        mdata["cNMF_30"].uns[key] = np.array([str(element) for element in mdata["cNMF_30"].uns[key]])

var_names
gsea_varmap_GSEA
motif_varmap_Motif
gwas_varmap_GWAS


In [381]:
mdata["cNMF"].uns["gsea_varmap_GSEA"]

array(['GO:0070059', 'GO:0034976', 'GO:0015175', ..., 'GO:0097440',
       'GO:0003382', 'GO:1904754'], dtype='<U10')

In [382]:
mdata.write_h5mu(path_out)



# Downsample version

In [383]:
sel_idx = []
for batch in mdata['cNMF'].obs['batch'].unique():
    for samp in mdata['cNMF'].obs['sample'].unique():
        mdata_obs_ = mdata['cNMF'].obs.loc[(mdata['cNMF'].obs['batch']==batch) & \
                                           (mdata['cNMF'].obs['sample']==samp)]

        sel_idx.extend(mdata_obs_.iloc[:200].index.tolist())

mdata = mdata[sel_idx].copy()
mdata



In [386]:
mdata.write_h5mu(path_out.replace(".h5mu", "_small.h5mu"))



# Display

In [388]:
with mudata.set_options(display_style = "html", display_html_expand = 0b000):
    display(mdata)

0,1,2
rna:barcodes,object,ANKEF1:GAAGGGACATCATTCACGCCT:AAACCCAAGAAGTCAT-scRNAseq_2kG_11AMDox_1...
rna:n_genes,int64,"3609,1904,4473,3306,3570,3256,3225,2933,3045,3827,..."
rna:n_counts,float32,"12474.00,5743.00,21423.00,13837.00,13278.00,11613...."
rna:sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
rna:batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."
cNMF:sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
cNMF:batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."
cNMF_30:sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
cNMF_30:batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."
cNMF_3:sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."

0,1,2,3
cNMF,bool,numpy.ndarray,
rna,bool,numpy.ndarray,
cNMF_30,bool,numpy.ndarray,
cNMF_3,bool,numpy.ndarray,
cNMF_4,bool,numpy.ndarray,
cNMF_5,bool,numpy.ndarray,
cNMF_6,bool,numpy.ndarray,
cNMF_7,bool,numpy.ndarray,
cNMF_8,bool,numpy.ndarray,
cNMF_9,bool,numpy.ndarray,

0,1,2
barcodes,object,ANKEF1:GAAGGGACATCATTCACGCCT:AAACCCAAGAAGTCAT-scRNAseq_2kG_11AMDox_1...
n_genes,int64,"3609,1904,4473,3306,3570,3256,3225,2933,3045,3827,..."
n_counts,float32,"12474.00,5743.00,21423.00,13837.00,13278.00,11613...."
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."

0,1,2
sample,category,"11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,11AMDox_1,..."
batch,category,"11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11AMDox,11..."

0,1,2,3
var_names,numpy.ndarray,17472 elements,"FAM87B:ENSG00000177757,FAM41C:ENSG00000230368,SAMD..."
gsea_varmap_GSEA,numpy.ndarray,4461 elements,"GO:0070059,GO:0034976,GO:0015175,GO:0036499,GO:000..."
motif_varmap_Motif,numpy.ndarray,1267 elements,"Promoter-AHR,Promoter-AIRE,Promoter-ALX1,Promoter-..."
gwas_varmap_GWAS,numpy.ndarray,630 elements,"EFO_0000275,EFO_0000280,EFO_0000284,EFO_0000305,EF..."


In [390]:
mdata["cNMF_3"]

AnnData object with n_obs × n_vars = 4000 × 3
    obs: 'sample', 'batch'
    var: 'VarianceExplained'
    uns: 'var_names', 'gsea_varmap_GSEA', 'motif_varmap_Motif', 'gwas_varmap_GWAS'
    varm: 'loadings', 'loadings_zscore', 'Description_GSEA', 'pvalue_GSEA', 'p.adjust_GSEA', 'qvalue_GSEA', 'EPType_Motif', 'TFMotif_Motif', 'PValue_Motif', 'FDR_Motif', 'Enrichment_Motif', 'program_name_GWAS', 'Gene_set_GWAS', 'P-value_GWAS', 'Adjusted_P-value_GWAS', 'Odds_Ratio_GWAS', 'Combined_Score_GWAS', 'Genes_GWAS', 'overlap_numerator_GWAS', 'overlap_denominator_GWAS'

---