***Setting up the notebook***

In [None]:
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import upsetplot
import panel as pn

import GSForge as gsf

import holoviews as hv
hv.extension("bokeh")
pn.extension()

***Declare paths used***

In [None]:
# OS-independent path management.
from os import fspath, environ
from pathlib import Path

In [None]:
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data/osfstorage")).expanduser()
HYDRO_NORMED_GEM_PATH = OSF_PATH.joinpath("AnnotatedGEMs", "oryza_sativa_hydro_normed_raw.nc")
LIT_DGE_GSC_PATH = OSF_PATH.joinpath("Collections", "literature", "DGE")
LIT_TF_PATH = OSF_PATH.joinpath("Collections", "literature", "TF")
BORUTA_GSC_PATH = OSF_PATH.joinpath("Collections", "boruta")
assert HYDRO_NORMED_GEM_PATH.exists()

***Load an AnnotatedGEM***

In [None]:
agem = gsf.AnnotatedGEM(HYDRO_NORMED_GEM_PATH)
agem

In [None]:
agem.count_array_names

In [None]:
# upsetplot.from_contents(lit_dge_coll.as_dict())

***Load GeneSetCollections***

In [None]:
# %%time
lit_dge_coll = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=LIT_DGE_GSC_PATH, name="Literature DGE")
# lit_tf_coll = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=LIT_TF_PATH, name="Literature TF")
# boruta_gsc = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=BORUTA_GSC_PATH, name="Boruta Results")
# tf_geneset = gsf.GeneSet.from_GeneSets(*list(lit_tf_coll.gene_sets.values()), name='transcription factors')
# combined_gsc = gsf.GeneSetCollection(gem=agem, gene_sets={**boruta_gsc.gene_sets, 
#                                                           **lit_dge_coll.gene_sets,
#                                                           'transcription factors': tf_geneset})

In [None]:
gene_list = list(lit_dge_coll.union())
print(*gene_list[:5])

In [None]:
len(gene_list)

In [None]:
import gseapy as gp

In [None]:
enr = gp.enrichr(gene_list=gene_list,
                 gene_sets=['KEGG_2016','KEGG_2013'],
                 organism='Human', # don't forget to set organism to the one you desired! e.g. Yeast
                 description='test_name',
                 outdir='test/enrichr_kegg',
                 # no_plot=True,
                 cutoff=0.5 # test dataset, use lower value from range(0,1)
                )