# FUNC-E Demo Notebook

Load the required dependencies

In [1]:
import sys
import pandas as pd
import numpy as np

sys.path.append("../../")
from func_e.FUNC_E import FUNC_E
import func_e.vocabs.all as vocabs

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

## FUNC-E Setup

Create a FUNC-e object and set the parameters for the functional enrichment analysis

In [3]:
fe = FUNC_E()

fe.setVerbosity(1)

fe.setEnrichmentSettings({
    'ecut': 0.01
})

fe.setClusteringSettings({
    'similarity_term_overlap': 3,
    'percent_similarity': 0.50,
    'initial_group_membership': 3, 
    'multiple_linkage_threshold': 0.50,
    'final_group_membership':  3,
    'similarity_threshold': 0.5
})


## Load Terms

Load the most current terms (this takes a few moments). For the demo we won't need KEGG terms but it is included here to demonstrate use.

In [None]:
terms = vocabs.getTerms(['GO', 'KEGG', 'IPR'])

View 10 random rows in terms dataframe to understand how this data frame looks.

In [None]:
terms.sample(10)

In [None]:
fe.setTerms(terms)

## Load the Terms to Features Mapping

This steps loads into a dataframe the term IDs and the genes they are associated with.

In [None]:
genes2GO = pd.read_csv('oryza_sativa.MSU_v7_0.genes2GO.txt', header=None, sep="\t")
genes2GO.columns = ['Feature', 'Term']
genes2GO.head()

In [None]:
genes2IPR = pd.read_csv('oryza_sativa.MSU_v7_0.genes2IPR.txt', header=None, sep="\t")
genes2IPR.columns = ['Feature', 'Term']
genes2IPR.head()

In [None]:
terms2features = pd.concat([genes2GO, genes2IPR])
terms2features.set_index('Feature', drop=False, inplace=True)
terms2features.head()

In [None]:
fe.setTerms2Features(terms2features)

## Load the query file
The query file contains the set of genes and the "module" or groups they belong to. Functional enrichment will be performed for each module.

In [None]:
fe.importQueryFile('demo_query.txt')
fe.query.head()

## Load the background file
The background file is the list of all possible genes.

In [None]:
fe.importBackgroundFile('oryza_sativa.MSU_v7_0.genes.txt')
fe.background.sample(5)

## Ready Check
Before we proceed let's make sure the object has all the data needed.

In [None]:
fe.isReady()

## Perform functional enrichment

Run the functional enrichment using the input files and the parameters settings provided at the top of this notebook

In [None]:
from func_e.FUNC_E import FUNC_E
fe.run()

Explore the enrichment data frame

In [None]:
fe.enrichment.sort_values(['Module', 'Fishers p-value']).head()

Explore the clusters for each module

In [None]:
fe.clusters.sort_values(['Module','Cluster Index', 'EASE Score']).head()

Explore the terms per cluster

In [None]:
fe.cluster_terms.sort_values('Fishers p-value').head()

Save the output to CSV files

In [None]:
fe.enrichment.sort_values(['Module', 'Fishers p-value']).to_csv('FUNC-E.enriched_terms.tsv', sep="\t", index=None)

fe.clusters.sort_values(['Module','Cluster Index', 'EASE Score']).to_csv('FUNC-E.clusters.tsv', sep="\t", index=None)

fe.cluster_terms.sort_values(['Module','Cluster Index', 'Fishers p-value']).to_csv('FUNC-E.cluster_terms.tsv', sep="\t", index=None)