# Run a Gene Set Enrichment Analysis using Reactome_2016

This notebook looks at the significant genes in at least one cancer. GSEA is used to recognize pathways with multiple significant genes. 

In [1]:
import pandas as pd
import numpy as np
import gseapy as gp
import re

import cptac
import cptac.utils as u
import plot_utils as p

# Step 1: Run GSEA for significant genes in at least 1 cancer

First read in sig_pval_all_proteins.csv into a df. This csv file contains only genes with a significant p-value in at least one cancer. Then run GSEA using a list of genes from the df.

In [2]:
df = pd.read_csv('../../Make_Tables/csv/sig_pval_heatmap.csv')
df = df.replace(to_replace = r'_\d', value = '', regex = True) # '_\d' added to isoforms

In [3]:
prot_list = list(df.Proteomics.unique()) # list of genes with a sig pval in >= 1 cancer
prot_enr = gp.enrichr(gene_list = prot_list, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='/Enrichr')

In [4]:
prot_enr.res2d.head(5)

Unnamed: 0,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes,Gene_set
0,Gene Expression Homo sapiens R-HSA-74160,440/1631,8.018168000000001e-54,1.22678e-50,0,0,2.029896,248.170761,TDRKH;RPL4;ATF2;MDC1;NUP107;POP4;HNRNPU;EHMT1;...,Reactome_2016
1,Processing of Capped Intron-Containing Pre-mRN...,116/193,9.309803e-53,7.121998999999999e-50,0,0,4.52247,541.818784,NUP107;NUP188;EIF4A3;HNRNPU;EFTUD2;SNRPD2;SNRP...,Reactome_2016
2,mRNA Splicing - Major Pathway Homo sapiens R-H...,85/134,1.386665e-41,7.071989000000001e-39,0,0,4.772975,449.037119,EIF4A3;HNRNPU;YBX1;PRPF19;USP39;ELAVL1;EFTUD2;...,Reactome_2016
3,mRNA Splicing Homo sapiens R-HSA-72172,85/144,4.3142079999999996e-38,1.650185e-35,0,0,4.441518,382.131886,EIF4A3;HNRNPU;YBX1;PRPF19;USP39;ELAVL1;EFTUD2;...,Reactome_2016
4,Major pathway of rRNA processing in the nucleo...,79/166,6.626108e-27,2.0275889999999998e-24,0,0,3.580915,215.853192,LTV1;RPL4;DDX47;RPP30;WDR3;RPLP0;PWP2;RPL10A;R...,Reactome_2016


# Run GSEA for significant genes in multiple cancers

In [7]:
mult_df = pd.read_csv('../../Make_Tables/csv/mult_sig_pval_heatmap.csv')
mult_df = mult_df.replace(to_replace = r'_\d', value = '', regex = True)

mult_sig_list = list(mult_df.Proteomics.unique()) # list of genes with a sig pval in >= 1 cancer
enr2 = gp.enrichr(gene_list = mult_sig_list, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='/Enrichr')

In [8]:
enr2.res2d.head(40)

Unnamed: 0,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes,Gene_set
0,Gene Expression Homo sapiens R-HSA-74160,78/1631,1.086596e-16,1.662491e-13,0,0,2.748472,101.029209,DDX47;WDR3;WDR4;HNRNPU;NAT10;ZC3H8;ADAR;PPP2R2...,Reactome_2016
1,"Cell Cycle, Mitotic Homo sapiens R-HSA-69278",39/462,3.268842e-16,2.500664e-13,0,0,4.85147,172.988519,TOP2A;AHCTF1;PCNA;MCM7;DCTN1;NCAPG;BUB1B;PPP2R...,Reactome_2016
2,Processing of Capped Intron-Containing Pre-mRN...,25/193,5.415573e-15,2.761942e-12,0,0,7.444464,244.546913,DDX5;DHX9;HNRNPU;USP39;ELAVL1;EFTUD2;NUP85;POL...,Reactome_2016
3,Cell Cycle Homo sapiens R-HSA-1640170,40/566,5.17085e-14,1.97785e-11,0,0,4.061573,124.256343,TOP2A;AHCTF1;PCNA;MCM7;DCTN1;NCAPG;BUB1B;PPP2R...,Reactome_2016
4,rRNA modification in the nucleus Homo sapiens ...,14/58,9.127898e-13,2.793137e-10,0,0,13.872374,384.573712,UTP6;DDX47;IMP3;WDR3;HEATR1;NAT10;WDR75;IMP4;P...,Reactome_2016
5,DNA strand elongation Homo sapiens R-HSA-69190,11/32,3.51941e-12,8.974496e-10,0,0,19.755747,521.012939,GINS2;RFC3;PCNA;RFC4;MCM7;RFC2;GINS4;MCM3;MCM4...,Reactome_2016
6,Transport of Mature Transcript to Cytoplasm Ho...,14/74,3.192499e-11,6.977891e-09,0,0,10.872942,262.773259,RANBP2;NCBP1;NUP210;NUP133;CPSF1;NUP155;CPSF3;...,Reactome_2016
7,Unwinding of DNA Homo sapiens R-HSA-176974,7/12,3.341307e-10,6.390249e-08,0,0,33.524904,731.496276,GINS2;MCM7;GINS4;MCM3;MCM4;MCM5;MCM2,Reactome_2016
8,Transport of Mature mRNA Derived from an Intro...,10/37,5.131583e-10,8.723692e-08,0,0,15.532774,332.252822,RANBP2;CPSF1;NCBP1;NUP210;NUP133;NUP155;NUP85;...,Reactome_2016
9,Transport of Mature mRNAs Derived from Intronl...,10/38,6.857537e-10,1.049203e-07,0,0,15.124017,319.124359,RANBP2;CPSF1;NCBP1;NUP210;NUP133;NUP155;NUP85;...,Reactome_2016


# Opposite Efects

In [9]:
pn_mult_df = pd.read_csv('../../Make_Tables/csv/pos_neg_df.csv')
pn_mult_df = pn_mult_df.replace(to_replace = r'_\d', value = '', regex = True)

pn_mult_sig_list = list(pn_mult_df.Proteomics.unique())
print(len(pn_mult_sig_list))
enr3 = gp.enrichr(gene_list = pn_mult_sig_list, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='/Enrichr')

128


In [10]:
enr3.res2d.head()

Unnamed: 0,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes,Gene_set
0,Gene Expression Homo sapiens R-HSA-74160,47/1631,1.3553459999999999e-19,2.07368e-16,0,0,4.502606,195.615977,RNMT;DHX9;DDX47;HEATR1;NAT10;ZC3H8;ADAR;PPP2R2...,Reactome_2016
1,rRNA modification in the nucleus Homo sapiens ...,13/58,4.002845e-17,3.062177e-14,0,0,35.021552,1322.306668,UTP6;DDX47;IMP3;HEATR1;NAT10;WDR75;IMP4;PWP2;W...,Reactome_2016
2,rRNA processing Homo sapiens R-HSA-72312,16/180,3.669368e-14,1.871378e-11,0,0,13.888889,429.669053,UTP6;IMP3;DDX47;HEATR1;WDR75;NAT10;IMP4;PWP2;W...,Reactome_2016
3,Major pathway of rRNA processing in the nucleo...,15/166,1.892817e-13,7.240024e-11,0,0,14.118976,413.623026,UTP6;IMP3;DDX47;HEATR1;WDR75;IMP4;PWP2;WDR12;U...,Reactome_2016
4,"Cell Cycle, Mitotic Homo sapiens R-HSA-69278",18/462,9.248352e-10,2.829996e-07,0,0,6.087662,126.631933,TOP2A;AHCTF1;NUP210;NUP155;NUP133;RFC2;DCTN1;B...,Reactome_2016
