# GSEA by cancer 

This notebook takes the FDR significant proteomic trans effects for each cancer type and runs a GSEA using Reactome 2016. The result is a data frame with the top pathways that have many significant trans effects. (Main gene is EGFR)

In [1]:

import pandas as pd
import numpy as np
import re 
import matplotlib.pyplot as plt
from scipy import stats
import gseapy as gp
from gseapy.plot import barplot, dotplot
import cptac
import plot_utils as p

  import pandas.util.testing as tm


In [2]:
df_FDR_append = pd.read_csv("../Step3.2_combining_pearson_dfs/csv_files/pancan_EGFR_pearson_sig_all_prot_append_FDR.csv")
df_FDR_append = df_FDR_append.drop(['Unnamed: 0'], axis=1)


In [3]:
def create_prot_list(df):
    df = df.loc[:,~df.columns.duplicated()]
    prot_genes_list = df.columns.values.tolist()
    prot_genes_list.remove('EGFR_Mutation')
    prot_genes_list.remove('EGFR_Location')
    prot_genes_list.remove('EGFR_Mutation_Status')
    prot_genes_list.remove('Sample_Status')
    return prot_genes_list

In [4]:
#load cptac data 
brain = cptac.Gbm()
kidney = cptac.Ccrcc()
Ovar = cptac.Ovarian()
colon = cptac.Colon()
brca = cptac.Brca()
luad = cptac.Luad()
Hnscc = cptac.Hnscc()
Lscc = cptac.Lscc()

                                        



                                            



                                          



                                         



# Enrichment Analysis

# Brca

In [5]:
df1 = brca.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)




In [6]:
#Run wrap pearson corr function
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [7]:


brca = df1_transposed.columns.values.tolist()
brca_genes = []
for gene in brca :
    brca_genes.append((re.sub("_proteomics", "", gene)))
len(brca_genes)

2669

In [13]:
brca_enr = gp.enrichr(gene_list = brca_genes, description='Tumor_partition', gene_sets='NCI-Nature_2016', 
                       outdir='test/enrichr_kegg')
brca_enr.res2d.head(20)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,NCI-Nature_2016,Urokinase-type plasminogen activator (uPA) and...,25/42,3.110276e-12,6.500478e-10,0,0,4.460383,118.183685,ITGB1;ITGAM;LRP1;SRC;ITGB3;ITGB2;SERPINE1;PLG;...
1,NCI-Nature_2016,PDGFR-beta signaling pathway Homo sapiens c901...,47/128,2.028208e-11,2.119477e-09,0,0,2.751499,67.745429,USP6NL;DOCK4;TAGLN;LRP1;SRC;ARPC1B;ITGB3;BRK1;...
2,NCI-Nature_2016,Beta3 integrin cell surface interactions Homo ...,23/43,4.493506e-10,3.130476e-08,0,0,4.008121,86.267656,FGB;FGA;LAMA4;ITGB3;SPHK1;ITGA2B;FGG;TNC;FN1;P...
3,NCI-Nature_2016,ErbB1 downstream signaling Homo sapiens 30d605...,37/105,9.913735e-09,5.179927e-07,0,0,2.640547,48.663545,USP6NL;RALA;SRC;ARPC1B;PEBP1;BRK1;PPP2R2A;IQGA...
4,NCI-Nature_2016,Signaling events mediated by focal adhesion ki...,25/58,2.441868e-08,1.020701e-06,0,0,3.229932,56.613988,ITGB1;ROCK2;SRC;ASAP1;ETS1;ARHGAP35;RAP1B;CCND...
5,NCI-Nature_2016,HIF-1-alpha transcription factor network Homo ...,27/66,2.6282e-08,9.154896e-07,0,0,3.0655,53.506398,TFRC;SERPINE1;ITGB2;SLC2A1;ENO1;ETS1;NDRG1;HK2...
6,NCI-Nature_2016,amb2 Integrin signaling Homo sapiens 5d4f90b6-...,19/40,1.783866e-07,5.326115e-06,0,0,3.559386,55.310404,ITGAM;YES1;LRP1;SRC;ITGB2;MST1;PLAUR;PLG;THY1;...
7,NCI-Nature_2016,Beta1 integrin cell surface interactions Homo ...,25/66,4.985324e-07,1.302416e-05,0,0,2.838425,41.190087,ITGB1;LAMA5;LAMA4;TNC;F13A1;LAMC2;LAMC1;NID1;V...
8,NCI-Nature_2016,Beta2 integrin cell surface interactions Homo ...,15/29,8.895837e-07,2.065811e-05,0,0,3.875919,54.001288,FGB;SPON2;FGA;ITGAM;F10;ITGB2;FGG;PLAUR;THY1;F...
9,NCI-Nature_2016,mTOR signaling pathway Homo sapiens 559dd850-6...,25/69,1.316363e-06,2.751199e-05,0,0,2.715016,36.763044,RRN3;IRS1;PLD1;YY1;IKBKB;RPTOR;NRAS;YWHAQ;MLST...


In [18]:
brca_df = brca_enr.res2d
innate_immune = brca_df.iloc[0,1]

#innate_immune = innate_immune.split(';')
innate_immune

'Urokinase-type plasminogen activator (uPA) and uPAR-mediated signaling Homo sapiens 503076a2-6196-11e5-8ac5-06603eb7f303'

# GBM

In [5]:
df1 = brain.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

prot_genes_list = create_prot_list(df1_tumor)





In [6]:
#Run wrap pearson corr function
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [7]:


gbm_prot = df1_transposed.columns.values.tolist()
gbm_genes = []
for gene in gbm_prot:
    gbm_genes.append((re.sub("_proteomics", "", gene)))
len(gbm_genes)

1590

In [8]:
gbm_enr = gp.enrichr(gene_list = gbm_genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')
gbm_enr.res2d.head(10)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,Complement and coagulation cascades Homo sapie...,51/79,8.181877e-37,2.39729e-34,0,0,8.120373,674.752022,SERPINA1;ITGAM;PROS1;SERPINE1;ITGB2;F13A1;C8B;...
1,KEGG_2016,Platelet activation Homo sapiens hsa04611,42/122,7.105065000000001e-17,1.040892e-14,0,0,4.330343,161.015757,ITGB1;ROCK1;ITGB3;GNAI3;PIK3CD;ITPR3;RASGRP2;A...
2,KEGG_2016,Regulation of actin cytoskeleton Homo sapiens ...,57/214,1.60787e-16,1.570353e-14,0,0,3.350379,121.841398,ITGB1;NCKAP1;ITGAM;ITGB5;ARPC1B;ITGB3;ITGB2;PI...
3,KEGG_2016,Osteoclast differentiation Homo sapiens hsa04380,40/132,5.150369e-14,3.772645e-12,0,0,3.811702,116.627113,CSF1R;SPI1;NCF2;ITGB3;NCF4;FHL2;PIK3CD;TREM2;P...
4,KEGG_2016,Staphylococcus aureus infection Homo sapiens h...,25/56,1.337151e-13,7.835705e-12,0,0,5.615454,166.45926,C1QB;CFD;C1QA;ITGAM;C1S;C1R;ITGB2;CFI;C5AR1;FP...
5,KEGG_2016,Fc gamma R-mediated phagocytosis Homo sapiens ...,32/93,3.479421e-13,1.699117e-11,0,0,4.328126,124.159829,ARPC1B;WAS;PIK3CD;PLD1;PIK3CG;FCGR3A;SCIN;INPP...
6,KEGG_2016,Bacterial invasion of epithelial cells Homo sa...,29/78,4.584749e-13,1.919045e-11,0,0,4.676665,132.868127,ITGB1;ARHGEF26;SHC1;ARPC1B;PXN;WAS;ILK;PIK3CD;...
7,KEGG_2016,Lysosome Homo sapiens hsa04142,36/123,2.953353e-12,1.081666e-10,0,0,3.681546,97.737984,HEXB;CTSZ;HEXA;GBA;AP4E1;TCIRG1;GNS;CTSS;AP4M1...
8,KEGG_2016,Focal adhesion Homo sapiens hsa04510,46/202,5.10718e-11,1.662671e-09,0,0,2.864437,67.880832,ITGB1;FLT1;ITGB5;ROCK1;SHC1;ITGB3;PXN;PDGFA;IL...
9,KEGG_2016,Leukocyte transendothelial migration Homo sapi...,32/118,4.156481e-10,1.217849e-08,0,0,3.41115,73.684877,ITGB1;ITGAM;ROCK1;NCF2;NCF4;PXN;ITGB2;GNAI3;PI...


In [12]:
gbm_df = gbm_enr.res2d
comp_coag = gbm_df.iloc[0,9]
comp_coag = comp_coag.split(';')
comp_coag

['SERPINA1',
 'ITGAM',
 'PROS1',
 'SERPINE1',
 'ITGB2',
 'F13A1',
 'C8B',
 'C8A',
 'SERPINA5',
 'C4A',
 'THBD',
 'C8G',
 'PLAU',
 'C3AR1',
 'VSIG4',
 'FGB',
 'FGA',
 'SERPINB2',
 'FGG',
 'SERPINF2',
 'PLAUR',
 'F2',
 'F3',
 'F7',
 'PROCR',
 'F9',
 'SERPING1',
 'CFB',
 'C1QB',
 'CFD',
 'C1QA',
 'CPB2',
 'C1S',
 'C1R',
 'SERPINC1',
 'CFI',
 'C5AR1',
 'PLG',
 'KNG1',
 'C2',
 'C3',
 'C6',
 'C7',
 'C9',
 'A2M',
 'F10',
 'SERPIND1',
 'F11',
 'F13B',
 'KLKB1',
 'C1QC']

# Colon

In [6]:
#Get Colon df with proteomcis and Mutation data. Filter for only tumor samples. 
df1 = colon.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]
prot_genes_list = create_prot_list(df1_tumor)



In [7]:
#Run wrap pearson corr function
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


Get column names (sig trans genes) and remove proteomics from name.

In [8]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

colon = df1_transposed.columns.values.tolist()
colon_genes = []
for gene in colon :
    colon_genes.append((re.sub("_proteomics", "", gene)))


Run enrichment analysis 

In [11]:

tumor_enr = gp.enrichr(gene_list = colon_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')

In [12]:
tumor_enr.res2d.head(5)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Mitochondrial translation Homo sapiens R-HSA-5...,30/90,2.3414370000000003e-23,3.5823989999999997e-20,0,0,10.875476,566.706822,GFM2;MRPL19;MRPS34;MRPL39;MRPL17;MTIF2;MRPL37;...
1,Reactome_2016,Mitochondrial translation initiation Homo sapi...,27/84,1.131038e-20,8.652439e-18,0,0,10.487066,481.655903,MRPL19;MRPS34;MRPL39;MRPL17;MTIF2;MRPL37;MTIF3...
2,Reactome_2016,Mitochondrial translation elongation Homo sapi...,27/84,1.131038e-20,5.768293e-18,0,0,10.487066,481.655903,MRPL19;MRPS34;MRPL39;MRPL17;MRPL37;MRPL15;MRPL...
3,Reactome_2016,Mitochondrial translation termination Homo sap...,26/84,1.74051e-19,6.657452e-17,0,0,10.098656,436.210828,GFM2;MRPL19;MRPS34;MRPL39;MRPL17;MRPL37;MRPL15...
4,Reactome_2016,mRNA Splicing - Major Pathway Homo sapiens R-H...,25/134,3.480804e-13,1.065126e-10,0,0,6.08702,174.614343,SF3B6;SRSF1;DDX23;HNRNPR;U2AF1;USP39;SART1;CST...


In [78]:
enrich_df = tumor_enr.res2d
translation = enrich_df.iloc[0,9]
translation = translation.split(';')

enrich_df.iloc[1,1]

'Mitochondrial translation initiation Homo sapiens R-HSA-5368286'

# Luad

In [5]:
df1 = luad.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)




In [6]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [7]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 


In [8]:

Luad = df1_transposed.columns.values.tolist()
Luad_genes = []
for gene in Luad :
    Luad_genes.append((re.sub("_proteomics", "", gene)))
len(Luad_genes)

1198

In [9]:
tumor_enr = gp.enrichr(gene_list = Luad_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
tumor_enr.res2d.head(5)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Immune System Homo sapiens R-HSA-168256,158/1547,1.186818e-11,1.815831e-08,0,0,1.705061,42.894496,ATF1;CYFIP2;APP;AHCYL1;NCF1;WIPF1;PROS1;WIPF2;...
1,Reactome_2016,Innate Immune System Homo sapiens R-HSA-168249,93/807,8.043351e-10,6.153164e-07,0,0,1.9239,40.288409,ATF1;CYFIP2;APP;AHCYL1;CDKN1B;WIPF1;WIPF2;PROS...
2,Reactome_2016,RHO GTPases Activate WASPs and WAVEs Homo sapi...,14/36,7.762482e-09,3.958866e-06,0,0,6.492302,121.237012,CYFIP2;WIPF1;WIPF2;WAS;BRK1;ARPC4;ARPC5;PTK2;A...
3,Reactome_2016,Hemostasis Homo sapiens R-HSA-109582,66/552,6.232478e-08,2.383923e-05,0,0,1.99608,33.116784,APP;DOCK5;DGKB;PROS1;F13A1;PIK3CD;ARRB2;CLU;AK...
4,Reactome_2016,Membrane Trafficking Homo sapiens R-HSA-199991,54/420,9.540055e-08,2.919257e-05,0,0,2.146435,34.697504,APP;GCC1;USE1;GOLGA5;KIF13B;KIFAP3;VPS36;CTSC;...


In [16]:
enrich_df = tumor_enr.res2d
immune_genes = enrich_df.iloc[1,9]
immune_genes = immune_genes.split(';')
immune_genes

['ATF1',
 'CYFIP2',
 'APP',
 'AHCYL1',
 'CDKN1B',
 'WIPF1',
 'WIPF2',
 'PROS1',
 'ARAF',
 'PIK3CD',
 'ICAM3',
 'BRK1',
 'CD3G',
 'ARRB2',
 'DDX41',
 'CLU',
 'FGF2',
 'IFIH1',
 'LGALS3',
 'PPP3CB',
 'PSTPIP1',
 'C8G',
 'PPP2R5E',
 'CASP2',
 'MAP3K8',
 'LBP',
 'JAK3',
 'HRAS',
 'MAP2K3',
 'KSR1',
 'VWF',
 'TMEM173',
 'SPTB',
 'DUSP6',
 'APBB1IP',
 'FGR',
 'CNKSR1',
 'CDC34',
 'MAPKAPK3',
 'LCK',
 'RASA1',
 'KIT',
 'BTK',
 'TAX1BP1',
 'IRF7',
 'ELMO1',
 'PIK3C3',
 'BPI',
 'PRKCQ',
 'TLN1',
 'PSMD10',
 'CFD',
 'CAMK2D',
 'CFH',
 'WAS',
 'RASGRP2',
 'RASAL3',
 'EGFR',
 'FOXO1',
 'RASGRP4',
 'C3',
 'C5',
 'PLCG2',
 'CD59',
 'CSK',
 'FADD',
 'RPS27A',
 'MAP2K7',
 'WASF2',
 'SPTBN1',
 'MAP2K6',
 'SPTBN2',
 'ANGPT1',
 'LIMK1',
 'ARPC4',
 'ISG15',
 'ARPC5',
 'PTK2',
 'NFKB2',
 'MAPK13',
 'GZMM',
 'MYO1C',
 'ARPC2',
 'ARPC3',
 'CD209',
 'TNIP2',
 'CAMK4',
 'ABI1',
 'UBE2N',
 'CTNNB1',
 'GRB2',
 'UBE2K',
 'FGFR1']

In [39]:
explor_genes = []
for gene in hemostasis_genes:
    gene += "_proteomics"
    explor_genes.append(gene)

df_FDR_explor = df_FDR_append[df_FDR_append.Comparison.isin(explor_genes)]


# Lscc

In [10]:
df1 = Lscc.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)




In [11]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [12]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 


In [13]:
Lscc = df1_transposed.columns.values.tolist()
Lscc_genes = []
for gene in Lscc :
    Lscc_genes.append((re.sub("_proteomics", "", gene)))
len(Lscc_genes)

474

In [14]:
Lscc_enr = gp.enrichr(gene_list = Lscc_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
Lscc_enr.res2d.head(5)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Extracellular matrix organization Homo sapiens...,28/283,1.969085e-10,3.0127e-07,0,0,4.174681,93.296939,DDR1;COL17A1;COL14A1;LAMC3;ITGB4;TNC;LOXL4;LAM...
1,Reactome_2016,Non-integrin membrane-ECM interactions Homo sa...,10/42,3.792171e-08,2.901011e-05,0,0,10.046213,171.667088,DDR1;LAMB3;ITGB4;LAMC3;ITGA2;TNC;SDC1;LAMC2;IT...
2,Reactome_2016,Apoptotic cleavage of cell adhesion proteins ...,6/11,7.17181e-08,3.657623e-05,0,0,23.01496,378.608118,DSP;CDH1;CTNNB1;PKP1;DSG2;DSG3
3,Reactome_2016,Laminin interactions Homo sapiens R-HSA-3000157,7/23,7.08913e-07,0.0002711592,0,0,12.84168,181.832199,LAMB3;ITGB4;LAMC3;ITGA2;LAMC2;ITGAV;ITGA6
4,Reactome_2016,Type I hemidesmosome assembly Homo sapiens R-H...,5/9,8.527709e-07,0.0002609479,0,0,23.441163,327.584973,COL17A1;LAMB3;ITGB4;LAMC2;ITGA6


In [None]:
enrich_df = tumor_enr.res2d
matrix_genes = enrich_df.iloc[3,9]
matrix_genes = hemostasis_genes.split(';')
matrix_genes

# Kidney

In [46]:
df1 = kidney.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)



In [47]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [49]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [50]:

kidney = df1_transposed.columns.values.tolist()
kidney_genes = []
for gene in kidney :
    kidney_genes.append((re.sub("_proteomics", "", gene)))
len(kidney_genes)

1073

In [53]:
Kidney_enr = gp.enrichr(gene_list = kidney_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
Kidney_enr.res2d.head(5)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Metabolism Homo sapiens R-HSA-1430728,235/1908,8.779935e-36,1.34333e-32,0,0,2.295724,185.312245,SLC23A1;GDE1;ENO1;ENO2;NAMPT;FDXR;PDK4;PIP4K2A...
1,Reactome_2016,Respiratory electron transport Homo sapiens R-...,39/88,1.723308e-26,1.318331e-23,0,0,8.260612,490.043992,NDUFA13;NDUFB7;NDUFB6;NDUFB10;UQCRB;NDUFA12;ND...
2,Reactome_2016,"Respiratory electron transport, ATP synthesis ...",40/109,2.1912500000000003e-23,1.1175369999999999e-20,0,0,6.840121,356.883197,ATP5S;NDUFA13;NDUFB7;NDUFB10;UQCRB;NDUFB6;NDUF...
3,Reactome_2016,The citric acid (TCA) cycle and respiratory el...,45/153,1.316761e-21,5.036611e-19,0,0,5.482156,263.577172,ATP5S;NDUFA13;NDUFB7;NDUFB10;UQCRB;NDUFB6;NDUF...
4,Reactome_2016,Complex I biogenesis Homo sapiens R-HSA-6799198,26/49,1.234848e-20,3.778635e-18,0,0,9.890256,453.3768,NDUFA13;NDUFB7;NDUFB6;NDUFB10;NDUFA12;NDUFB5;N...


In [56]:
Kidney_enrich_df = Kidney_enr.res2d

TCA_genes= Kidney_enrich_df.iloc[3,9]
TCA_genes = TCA_genes.split(';')


['ATP5S',
 'NDUFA13',
 'NDUFB7',
 'NDUFB10',
 'UQCRB',
 'NDUFB6',
 'NDUFA12',
 'NDUFB5',
 'NDUFA10',
 'NDUFB3',
 'NDUFB1',
 'UQCR11',
 'UQCR10',
 'COX5B',
 'LDHA',
 'PDK4',
 'UQCRFS1',
 'CYC1',
 'NDUFV2',
 'SLC16A3',
 'NDUFV1',
 'PDK1',
 'COX8A',
 'NDUFA9',
 'NDUFA8',
 'NDUFA7',
 'SLC16A1',
 'NDUFA6',
 'NDUFA4',
 'NDUFA3',
 'NDUFA2',
 'NDUFC2',
 'NDUFC1',
 'COX6B1',
 'COX7A2L',
 'NDUFS8',
 'NDUFS7',
 'UQCRQ',
 'NDUFS5',
 'NDUFS4',
 'UQCRC1',
 'NDUFS3',
 'NDUFS2',
 'NDUFS1',
 'UQCRC2']

# Ovarian

In [11]:

df1 = Ovar.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)



In [12]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [12]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [13]:

Ovar = df1_transposed.columns.values.tolist()
Ovar_genes = []
for gene in Ovar :
    Ovar_genes.append((re.sub("_proteomics", "", gene)))
len(Ovar_genes)

613

In [15]:
Ovar_enr = gp.enrichr(gene_list = Ovar_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
Ovar_enr.res2d.head(10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Mitochondrial translation Homo sapiens R-HSA-5...,30/90,2.3414370000000003e-23,3.5823989999999997e-20,0,0,10.875476,566.706822,GFM2;MRPL19;MRPS34;MRPL39;MRPL17;MTIF2;MRPL37;...
1,Reactome_2016,Mitochondrial translation initiation Homo sapi...,27/84,1.131038e-20,8.652439e-18,0,0,10.487066,481.655903,MRPL19;MRPS34;MRPL39;MRPL17;MTIF2;MRPL37;MTIF3...
2,Reactome_2016,Mitochondrial translation elongation Homo sapi...,27/84,1.131038e-20,5.768293e-18,0,0,10.487066,481.655903,MRPL19;MRPS34;MRPL39;MRPL17;MRPL37;MRPL15;MRPL...
3,Reactome_2016,Mitochondrial translation termination Homo sap...,26/84,1.74051e-19,6.657452e-17,0,0,10.098656,436.210828,GFM2;MRPL19;MRPS34;MRPL39;MRPL17;MRPL37;MRPL15...
4,Reactome_2016,mRNA Splicing - Major Pathway Homo sapiens R-H...,25/134,3.480804e-13,1.065126e-10,0,0,6.08702,174.614343,SF3B6;SRSF1;DDX23;HNRNPR;U2AF1;USP39;SART1;CST...
5,Reactome_2016,Processing of Capped Intron-Containing Pre-mRN...,29/193,1.415549e-12,3.609649e-10,0,0,4.902417,133.755101,SF3B6;SRSF1;DDX23;HNRNPR;U2AF1;USP39;SART1;CST...
6,Reactome_2016,mRNA Splicing Homo sapiens R-HSA-72172,25/144,1.884289e-12,4.118517e-10,0,0,5.66431,152.922051,SF3B6;SRSF1;DDX23;HNRNPR;U2AF1;USP39;SART1;CST...
7,Reactome_2016,Organelle biogenesis and maintenance Homo sapi...,37/326,7.394122e-12,1.414126e-09,0,0,3.702999,94.909119,DYNC1I2;GFM2;MRPL19;MRPS34;MRPL17;MRPL39;MTIF2...
8,Reactome_2016,Mitochondrial protein import Homo sapiens R-HS...,14/54,5.837246e-10,9.923319e-08,0,0,8.458703,179.845499,DNAJC19;TIMM8A;TOMM40;IDH3G;TIMM13;TIMM10;TOMM...
9,Reactome_2016,Metabolism of proteins Homo sapiens R-HSA-392499,66/1074,5.604597e-08,8.575033e-06,0,0,2.004976,33.477272,PIGU;SPON1;GSK3A;CXCL8;TOMM22;EEF1B2;RPS18;TIM...


# Hnscc

In [25]:

df1 = Hnscc.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

#df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)



In [26]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [27]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [28]:
Hnscc = df1_transposed.columns.values.tolist()
Hnscc_genes = []
for gene in Hnscc :
    Hnscc_genes.append((re.sub("_proteomics", "", gene)))
len(Hnscc_genes)

1074

In [29]:
Hnscc_enr = gp.enrichr(gene_list = Hnscc_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
Hnscc_enr.res2d.head(10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Extracellular matrix organization Homo sapiens...,59/283,1.1803569999999998e-19,1.805946e-16,0,0,3.88232,169.204302,DDR1;ITGB1;SPARC;ITGB5;SERPINE1;COL12A1;ICAM3;...
1,Reactome_2016,Hemostasis Homo sapiens R-HSA-109582,81/552,1.188696e-16,9.093528e-14,0,0,2.732572,100.199337,ITGB1;SPARC;DOCK8;SERPINE1;PLEK;ATP2A3;ARRB1;A...
2,Reactome_2016,Immune System Homo sapiens R-HSA-168256,158/1547,1.170688e-15,5.97051e-13,0,0,1.901921,65.390299,CYFIP2;NCKAP1;NUP107;NCF1;WIPF1;NCF4;ARAF;INPP...
3,Reactome_2016,Cell surface interactions at the vascular wall...,30/101,5.646039e-15,2.15961e-12,0,0,5.531279,181.46923,ITGB1;F11R;ITGAL;SPN;THBD;INPP5D;BSG;ITGAV;SLC...
4,Reactome_2016,Adaptive Immune System Homo sapiens R-HSA-1280218,92/762,1.973048e-13,6.037528e-11,0,0,2.248322,65.77248,ITGB1;GSK3B;CDKN1B;NCF1;ITGB5;SH3KBP1;CLTC;NCF...
5,Reactome_2016,Signaling by Rho GTPases Homo sapiens R-HSA-19...,55/367,4.616114e-12,1.177109e-09,0,0,2.790759,72.842907,CYFIP2;ARHGAP9;ITGB1;NCKAP1;NUP107;CDKN1B;RTKN...
6,Reactome_2016,Metabolism Homo sapiens R-HSA-1430728,170/1908,1.454692e-11,3.179542e-09,0,0,1.659191,41.402846,RPL4;ALDH1L1;RPL30;NUP107;RPL3;INPPL1;MT1X;ENO...
7,Reactome_2016,"Platelet activation, signaling and aggregation...",42/253,6.10447e-11,1.16748e-08,0,0,3.091395,72.707799,SPARC;SERPINE1;PLEK;ITPR1;ITPR3;ARRB1;ARRB2;RA...
8,Reactome_2016,Collagen formation Homo sapiens R-HSA-1474290,23/85,6.925813e-11,1.177388e-08,0,0,5.038887,117.875595,COL17A1;COL28A1;COL15A1;CRTAP;LAMB3;COL11A1;CO...
9,Reactome_2016,VEGFA-VEGFR2 Pathway Homo sapiens R-HSA-4420097,48/320,1.039101e-10,1.589824e-08,0,0,2.793296,64.210881,CYFIP2;NCKAP1;NCF1;NCF4;ARAF;ITPR1;HSPB1;ITPR3...
