# UMAP Panel and Views

Here the use of the UMAP panel interface.

***Setting up the notebook***

In [None]:
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import upsetplot
import itertools

import GSForge as gsf

import holoviews as hv
hv.extension("bokeh")

***Declare paths used***

In [None]:
# OS-independent path management.
from os import fspath, environ
from pathlib import Path

In [None]:
OSF_PATH = Path(environ.get("GSFORGE_DEMO_DATA", default="~/GSForge_demo_data/osfstorage")).expanduser()
HYDRO_NORMED_GEM_PATH = OSF_PATH.joinpath("AnnotatedGEMs", "oryza_sativa_hydro_normed.nc")
LIT_DGE_GSC_PATH = OSF_PATH.joinpath("Collections", "literature", "DGE")
LIT_TF_PATH = OSF_PATH.joinpath("Collections", "literature", "TF")
BORUTA_GSC_PATH = OSF_PATH.joinpath("Collections", "boruta")
assert HYDRO_NORMED_GEM_PATH.exists()

***Load an AnnotatedGEM***

In [None]:
agem = gsf.AnnotatedGEM(HYDRO_NORMED_GEM_PATH)
agem

In [None]:
agem.count_array_names

***Load GeneSetCollections***

In [None]:
%%time
lit_dge_coll = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=LIT_DGE_GSC_PATH, name="Literature DGE")
lit_tf_coll = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=LIT_TF_PATH, name="Literature TF")
boruta_gsc = gsf.GeneSetCollection.from_folder(gem=agem, target_dir=BORUTA_GSC_PATH, name="Boruta Results")
tf_geneset = gsf.GeneSet.from_GeneSets(*list(lit_tf_coll.gene_sets.values()), name='transcription factors')
combined_gsc = gsf.GeneSetCollection(gem=agem, gene_sets={**boruta_gsc.gene_sets, 
                                                          **lit_dge_coll.gene_sets,
                                                          'transcription factors': tf_geneset})

In [None]:
combined_gsc

## Create and Explore the Panel

Setting the random state to some value is important for reproducibility. You should not pick an especially 'unique' random state -- you should instead notice rotations of the same basic layout.

In [None]:
umap_panel = gsf.panels.UMAP_Interface(combined_gsc, random_state=400, n_neighbors=421)
# umap_panel.panel()

## Generate and Save Views

Often times I explore settings using the application above, then write script to generaet the plots I am interested in quickly.

In [None]:
sets_of_interest = {
    'all_genes': [None],
    'all_selections': list(combined_gsc.gene_sets.keys()),
    'all_sans_genotype': [x for x in list(combined_gsc.gene_sets.keys()) if x != 'Boruta_genotype'],
    'lit_dge': list(lit_dge_coll.gene_sets.keys()),
    'lit_tf': ['transcription factors'],
    'boruta': list(boruta_gsc.gene_sets.keys()),
    'boruta_treatment': ['Boruta_treatment'],
    'treatment_sel': ['Boruta_treatment'] + list(lit_dge_coll.gene_sets.keys()),
}

And we are surely interested in viewing the colors mapped to treatment and genotype.

In [None]:
hues = ['treatment', 'genotype']

In [None]:
for (key, gene_selection), hue, count_var in itertools.product(sets_of_interest.items(), hues, agem.count_array_names):
    plot = umap_panel.view(gene_set_mode='union', selected_gene_sets=gene_selection, hue=hue).opts(
        legend_position='bottom_left', width=500, height=500)
    hv.save(plot, f'figures/UMAP_{key}_of_{count_var}_hue_{hue}.png', dpi=300, toolbar=None)

In [None]:
# %load_ext watermark
# %watermark -v -p numpy,scipy,pandas,bokeh,holoviews,datashader,jupyterlab