## Stage 2: Functional Enrichment Analysis
- __Author__ : Cancer Molecular Dynamics Laboratory
- __Version__: 1.0
* IPA Plots
* GOEA Plot

## 1. Ingenuity Pathways Analysis (IPA)

In [1]:
import pandas as pd
from numpy import log10
# IPA xlsx file from 
df = pd.read_excel("../data/Proteomica_Humana_CO/IPA.xlsx", skiprows=1)
df.rename(columns = {'Unnamed: 3':'Regulation Type'}, inplace = True)
df['-log10 adj p-value'] = df['p-value of overlap'].apply(lambda x : -log10(x))
df.head(3)

Unnamed: 0,Upstream Regulator,Expr Fold Change,Molecule Type,Regulation Type,Activation z-score,p-value of overlap,Target Molecules in Dataset,-log10 adj p-value
0,MYC,,transcription regulator,Activated,2.604,1.3e-45,"ACLY,ADD1,ALB,ALCAM,ALDOA,APOC3,APP,ARG1,ASS1,...",44.886057
1,dexamethasone,,chemical drug,Inhibited,-3.053,1.12e-28,"ACLY,ACOT7,ADIPOQ,AEBP1,AGT,AKR1C1/AKR1C2,ALB,...",27.950782
2,lipopolysaccharide,,chemical drug,Activated,2.076,2.44e-25,"ACLY,ACO1,ADA,ADIPOQ,AGT,ALB,ALCAM,ALDH2,ALDH3...",24.61261


In [2]:
# Activated
import altair as alt
source1 = df.loc[df['Regulation Type']=='Activated'][:30]
bars1 = alt.Chart(source1).mark_bar().encode(
    x=alt.X('-log10 adj p-value:Q'),
    y=alt.Y('Upstream Regulator:N', sort='-x', axis=alt.Axis(title=None, labelAngle=0, orient='right')),
    color='Regulation Type:N',
    tooltip = ['p-value of overlap','Activation z-score']
).properties(
    width=150,
    height=400
)

source2 = df.loc[df['Regulation Type']=='Inhibited'][:30]
bars2 = alt.Chart(source2).mark_bar().encode(
    x=alt.X('-log10 adj p-value:Q'),
    y=alt.Y('Upstream Regulator:N',sort='-x', axis=alt.Axis(title=None, labelAngle=0, orient='left')),
    color='Regulation Type:N',
    tooltip = ['p-value of overlap','Activation z-score']
).properties(
    width=150,
    height=400
)

results = bars2|bars1
results.configure_range(
    #category={'scheme': 'accent'}
    category=alt.RangeScheme(['#E2A499','lightblue'])# #E2A499:lightred
).properties(
    title='Ingenuity Pathways Analysis (IPA)'
)


  for col_name, dtype in df.dtypes.iteritems():


## 2. Gene Ontology Enrichment Analysis

The UP and Down files are obtained from Gprofiler https://biit.cs.ut.ee/gprofiler/gost with the differential genes from protein human differential expression analyzed in the previous notebook: 02_hsproteomics 

In [3]:
df2_down = pd.read_csv('../data/Proteomica_Humana_CO/DOWNhuman_gProfiler_hsapiens_12-12-2023_17-11-29__intersections.csv')
df2_down.rename(columns = {'negative_log10_of_adjusted_p_value':'-log10 adj p-value'}, inplace = True)
df2_up   = pd.read_csv('../data/Proteomica_Humana_CO/UPhuman_gProfiler_hsapiens_12-12-2023_17-08-02__intersections.csv')
df2_up.rename(columns = {'negative_log10_of_adjusted_p_value':'-log10 adj p-value'}, inplace = True)

In [4]:
choose = 1
process = ['GO:MF', 'GO:BP', 'GO:CC', 'KEGG', 'REAC', 'WP', 'TF', 'MIRNA','HPA', 'CORUM', 'HP']
source_bp_up = df2_up.loc[df2_up['source'] == process[choose]] 
source_bp_down = df2_down.loc[df2_down['source'] == process[choose]] 

import altair as alt

bars_down = alt.Chart(source_bp_down[:30]).mark_bar(color='lightblue').encode(
    y=alt.X('term_name:N', sort='-x', axis=alt.Axis(title=None, labelAngle=0, orient='left')),
    x=alt.Y('-log10 adj p-value:Q'),
    #color='highlighted:N',
    tooltip = ['term_size']
).properties(
    width=150,
    height=400
)

bars_up = alt.Chart(source_bp_up[:30]).mark_bar(color='#E2A499').encode(
    y=alt.X('term_name:N', sort='-x', axis=alt.Axis(title=None, labelAngle=0, orient='right')),
    x=alt.Y('-log10 adj p-value:Q'),
    #color='highlighted:N',
    tooltip = ['term_size']
).properties(
    width=150,
    height=400
)

resultsORA = bars_down|bars_up

resultsORA.properties(
    title='Functional Enrichment Analysis in {0}'.format(process[choose])
)

  for col_name, dtype in df.dtypes.iteritems():
