***Setting up the notebook***

In [None]:
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import upsetplot

import GSForge as gsf

import holoviews as hv
hv.extension("bokeh")

***Declare paths used***

In [None]:
# OS-independent path management.
from os import fspath, environ
from pathlib import Path

In [None]:
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data/osfstorage")).expanduser()
HYDRO_GEM_PATH = OSF_PATH.joinpath("AnnotatedGEMs", "oryza_sativa_hydro_raw.nc")
LIT_DGE_GSC_PATH = OSF_PATH.joinpath("Collections", "literature", "DGE")
LIT_TF_PATH = OSF_PATH.joinpath("Collections", "literature", "TF")
BORUTA_GSC_PATH = OSF_PATH.joinpath("Collections", "boruta")
assert HYDRO_GEM_PATH.exists()

***Load an AnnotatedGEM***

In [None]:
agem = gsf.AnnotatedGEM(HYDRO_GEM_PATH)
agem

***Load GeneSetCollections***

In [None]:
%%time
lit_dge_coll = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=LIT_DGE_GSC_PATH, name="Literature DGE")
lit_tf_coll = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=LIT_TF_PATH, name="Literature TF")
boruta_gsc = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=BORUTA_GSC_PATH, name="Boruta Results")
tf_geneset = gsf.GeneSet.from_GeneSets(*list(lit_tf_coll.gene_sets.values()), name='transcription factors')
combined_gsc = gsf.GeneSetCollection(gem=agem, gene_sets={**boruta_gsc.gene_sets, 
                                                          **lit_dge_coll.gene_sets,
                                                          'transcription factors': tf_geneset})

In [None]:
lit_dge_coll

In [None]:
combined_gsc

In [None]:
covariance_control_vs_heat = gsf.plots.gem.GroupedGeneCovariance(combined_gsc, group_variable='treatment', x_group_label='CONTROL', y_group_label='HEAT',
                                                                 selected_gene_sets=['HEAT_UP', 'HEAT_DOWN'],
                                                                  count_transform=lambda c: np.log(c + 0.25)).opts(size=1.5, width=300, height=300)
covariance_control_vs_heat

In [None]:
overlap_heatmap = gsf.plots.collections.WithinCollectionOverlapHeatMap(combined_gsc)
percent_overlap_heatmap = gsf.plots.collections.WithinCollectionOverlapHeatMap(combined_gsc, mode='percent')

# (overlap_heatmap + percent_overlap_heatmap).opts(hv.opts.HeatMap(width=550, height=500))

## Selected Genes vs Expression

In [None]:
list(combined_gsc.gene_sets.keys())

In [None]:
gsf.plots.gem.GeneVsCountsScatter(
    combined_gsc,
    count_transform=lambda ds: np.log2(ds.where(ds > 0)),
    selected_gene_sets=['Boruta_treatment', 'RECOV_DROUGHT_DOWN'],
    hue='treatment',
    gene_set_mode='intersection')