In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd

In [None]:
sc_ge_sa = pd.read_csv("data/nmf_k9_w.tsv", sep="\t", index_col=0)

sc_ge_sa

In [None]:
gm = pd.read_csv(
    "data/h.all.v7.1.symbols.gmt", sep="\t", header=None, index_col=0
).drop(1, axis=1)

gm

In [None]:
import gsea

## Single sample GSEA

In [None]:
en_se_sa = gsea.run_single_sample_gsea(
    sc_ge_sa,  # Gene-by-sample score; DataFrame
    gm,  # Gene sets; set-to-genes dict or DataFrame
    no="-0-",  # Normalization method; "-0-", "0-1", "1234", "log"
    mi=5,  # Minimum gene set size; int
    ma=500,  # Maximum gene set size; int
    we=1.0,  # Weight used for "ks" and "auc"; float
    me="ks",  # Enrichment method; "ks", "auc", "js"
    pa="single_sample_gsea.tsv",  # .tsv file path to write the gene-set-by-sample output; str
)

en_se_sa

## Prerank GSEA

In [None]:
ge_sc = sc_ge_sa.iloc[:, 0]

nu_se_st = gsea.run_prerank_gsea(
    ge_sc,  # Gene scores; Series
    gm,  # Gene sets; set-to-genes dict or DataFrame
    mi=5,  # Minimum gene set size; int
    ma=500,  # Maximum gene set size; int
    we=1,  # Weight used for "ks" and "auc"; float
    me="ks",  # Enrichment method; "ks", "auc", "js"
    se=1729,  # Random seed; int
    pe="label",  # Permutation type; "gene_set"
    n_pe=1000,  # Number of permutations; int
    n_pl=25,  # Number of extreme gene sets to plot; int
    ad=None,  # Additional gene sets to plot; list of str
    pa="~/Downloads/",  # directory path to write the gene-set-by-statistic and plots; str
)

nu_se_st

## GSEA

In [None]:
sa_la = pd.Series([0, 0, 0, 1, 1, 1], dtype=float)

sa_la.index = ["Sample {}".format(ie) for ie in sa_la.index]

nu_se_st = gsea.run_gsea(
    sc_ge_sa,  # Gene-by-sample score; DataFrame
    sa_la,  # Sample label; Series
    gm,  # Gene sets; set-to-genes dict or DataFrame
    no="-0-",  # Normalization method; "-0-", "0-1", "1234", "log"
    ra="si",  # Ranking method; "ic", "si", "co", "tt", "di", "ra", "lo"
    mi=5,  # Minimum gene set size; int
    ma=500,  # Maximum gene set size; int
    we=1.0,  # Weight used for "ks" and "auc"; float
    me="ks",  # Enrichment method; "ks", "auc", "js"
    se=1729,  # Random seed; int
    pe="label",  # Permutation type; "gene_set", "label"
    n_pe=1000,  # Number of permutations; int
    n_pl=25,  # Number of extreme gene sets to plot; int
    ad=None,  # Additional gene sets to plot; list of str
    pa="~/Downloads/",  # directory path to write the gene-set-by-statistic and plots; str
)

nu_se_st