# GSEA by cancer 

This notebook takes the FDR significant proteomic trans effects for each cancer type and runs a GSEA using Reactome 2016. The result is a data frame with the top pathways that have many significant trans effects. (Main gene is EGFR)

In [3]:

import pandas as pd
import numpy as np
import re 
import matplotlib.pyplot as plt
from scipy import stats
import gseapy as gp
from gseapy.plot import barplot, dotplot
import cptac
import plot_utils as p

  import pandas.util.testing as tm


In [4]:
df_FDR_append = pd.read_csv("../Step3.2_combining_pearson_dfs/csv_files/pancan_EGFR_pearson_sig_all_prot_append_FDR.csv")
df_FDR_append = df_FDR_append.drop(['Unnamed: 0'], axis=1)


In [5]:
def create_prot_list(df):
    df = df.loc[:,~df.columns.duplicated()]
    prot_genes_list = df.columns.values.tolist()
    prot_genes_list.remove('EGFR_Mutation')
    prot_genes_list.remove('EGFR_Location')
    prot_genes_list.remove('EGFR_Mutation_Status')
    prot_genes_list.remove('Sample_Status')
    return prot_genes_list

In [48]:
#load cptac data 
brain = cptac.Gbm()
kidney = cptac.Ccrcc()
Ovar = cptac.Ovarian()
colon = cptac.Colon()
brca = cptac.Brca()
luad = cptac.Luad()
Hnscc = cptac.Hnscc()
Lscc = cptac.Lscc()

                                        



                                            



                                          



                                         



# Enrichment Analysis

# Brca

In [49]:
df1 = brca.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)




In [50]:
#Run wrap pearson corr function
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [53]:


brca = df1_transposed.columns.values.tolist()
brca_genes = []
for gene in brca :
    brca_genes.append((re.sub("_proteomics", "", gene)))
len(brca_genes)

2669

In [64]:
brca_enr = gp.enrichr(gene_list = brca_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
brca_enr.res2d.head(10)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Metabolism Homo sapiens R-HSA-1430728,390/1908,8.129875e-20,1.243871e-16,0,0,1.531679,67.326706,RPL4;CDA;RPL5;RPL3;ENO1;RPL8;RPL9;GLS;RPL7;IL4...
1,Reactome_2016,Extracellular matrix organization Homo sapiens...,89/283,1.750819e-15,1.339377e-12,0,0,2.356595,80.074025,ITGB1;COLGALT2;SPARC;ITGAM;ITGB4;ELN;ITGB3;SER...
2,Reactome_2016,Signaling by Rho GTPases Homo sapiens R-HSA-19...,105/367,6.940927e-15,3.539873e-12,0,0,2.143901,69.894036,RTKN;NCF1;WIPF1;NCF2;WIPF2;WIPF3;NCF4;ACTB;ARH...
3,Reactome_2016,Nonsense-Mediated Decay (NMD) Homo sapiens R-H...,45/106,1.350158e-13,5.164353e-11,0,0,3.181179,94.269093,RPL4;RPL5;RPL3;RPL10;PPP2R2A;SMG9;CASC3;RPL8;R...
4,Reactome_2016,Nonsense Mediated Decay (NMD) enhanced by the ...,45/106,1.350158e-13,4.131483e-11,0,0,3.181179,94.269093,RPL4;RPL5;RPL3;RPL10;PPP2R2A;SMG9;CASC3;RPL8;R...
5,Reactome_2016,SRP-dependent cotranslational protein targetin...,45/107,2.031066e-13,5.179218e-11,0,0,3.151448,92.101214,RPL4;RPL5;RPL3;RPN2;RPL10;RPN1;SRP54;SRP14;RPL...
6,Reactome_2016,Hemostasis Homo sapiens R-HSA-109582,136/552,3.162701e-13,6.912761e-11,0,0,1.846211,53.137967,APP;GUCY1B3;SPARC;SERPINE2;PROS1;SERPINE1;ITGA...
7,Reactome_2016,Translation Homo sapiens R-HSA-72766,55/151,5.948716e-13,1.137692e-10,0,0,2.7294,76.833782,N6AMT1;RPL4;RPL5;RPL3;SRP54;SRP14;RPL8;RPL10A;...
8,Reactome_2016,Eukaryotic Translation Elongation Homo sapiens...,39/89,1.682164e-12,2.859678e-10,0,0,3.283644,89.022667,RPL4;RPL5;RPL3;RPL10;RPL10A;RPL8;RPL9;RPL7;EEF...
9,Reactome_2016,RHO GTPase Effectors Homo sapiens R-HSA-195258,76/255,4.114883e-12,6.29577e-10,0,0,2.23334,58.550157,ITGB1;RTKN;NCF1;WIPF1;NCF2;WIPF2;ARPC1B;WIPF3;...


In [71]:
brca_df = brca_enr.res2d
brca_df.iloc[2,1]

'Signaling by Rho GTPases Homo sapiens R-HSA-194315'

# GBM

In [58]:
df1 = brain.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

prot_genes_list = create_prot_list(df1_tumor)





In [59]:
#Run wrap pearson corr function
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [60]:


gbm_prot = df1_transposed.columns.values.tolist()
gbm_genes = []
for gene in gbm_prot:
    gbm_genes.append((re.sub("_proteomics", "", gene)))
len(gbm_genes)

1590

In [62]:
gbm_enr = gp.enrichr(gene_list = gbm_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
gbm_enr.res2d.head(10)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Hemostasis Homo sapiens R-HSA-109582,119/552,2.723097e-24,4.166339e-21,0,0,2.711694,147.13728,DGKG;CD84;DGKB;DGKA;PROS1;SERPINE1;F13A1;DOCK1...
1,Reactome_2016,Innate Immune System Homo sapiens R-HSA-168249,150/807,2.0757650000000002e-23,1.587961e-20,0,0,2.338033,122.113411,NCKAP1;AHCYL1;WIPF1;PROS1;WIPF3;ICAM2;PYCARD;C...
2,Reactome_2016,"Platelet activation, signaling and aggregation...",71/253,1.1474189999999999e-21,5.851834999999999e-19,0,0,3.529967,170.203636,DGKG;SERPINA3;ECM1;SERPINA1;DAGLA;DGKB;DGKA;PR...
3,Reactome_2016,Extracellular matrix organization Homo sapiens...,75/283,3.5061279999999996e-21,1.341094e-18,0,0,3.333556,157.009717,DDR1;ITGB1;COL18A1;PTPRS;ITGAM;ITGB5;COL14A1;N...
4,Reactome_2016,Immune System Homo sapiens R-HSA-168256,225/1547,5.0822659999999997e-20,1.555173e-17,0,0,1.829469,81.275892,NCKAP1;CNTFR;AHCYL1;WIPF1;WIPF3;ICAM2;IFI30;C4...
5,Reactome_2016,Platelet degranulation Homo sapiens R-HSA-114608,39/105,4.5391620000000007e-17,1.157486e-14,0,0,4.672058,175.81515,CFD;SERPINA3;ITIH4;ECM1;SERPINA1;PROS1;ITGB3;S...
6,Reactome_2016,Response to elevated platelet cytosolic Ca2+ H...,39/110,2.878713e-16,6.292044e-14,0,0,4.459691,159.585673,CFD;SERPINA3;ITIH4;ECM1;SERPINA1;PROS1;ITGB3;S...
7,Reactome_2016,Formation of Fibrin Clot (Clotting Cascade) Ho...,21/39,1.098807e-13,2.101468e-11,0,0,6.773101,202.105148,FGB;FGA;F10;SERPIND1;SERPINC1;PROS1;FGG;F11;F1...
8,Reactome_2016,Signaling by Rho GTPases Homo sapiens R-HSA-19...,70/367,4.701829e-12,7.99311e-10,0,0,2.399191,62.578269,ARHGAP9;YWHAE;ITGB1;NCKAP1;WIPF1;NCF2;YWHAB;AR...
9,Reactome_2016,Fcgamma receptor (FCGR) dependent phagocytosis...,35/120,6.598641e-12,1.009592e-09,0,0,3.668763,94.449214,NCKAP1;AHCYL1;WIPF1;ARPC1B;WIPF3;WAS;ITPR3;PLD...


# Colon

In [72]:
#Get Colon df with proteomcis and Mutation data. Filter for only tumor samples. 
df1 = colon.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]
prot_genes_list = create_prot_list(df1_tumor)



In [73]:
#Run wrap pearson corr function
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


Get column names (sig trans genes) and remove proteomics from name.

In [74]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

colon = df1_transposed.columns.values.tolist()
colon_genes = []
for gene in colon :
    colon_genes.append((re.sub("_proteomics", "", gene)))


Run enrichment analysis 

In [75]:

tumor_enr = gp.enrichr(gene_list = colon_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')

In [76]:
tumor_enr.res2d.head(5)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Mitochondrial translation Homo sapiens R-HSA-5...,30/90,2.3414370000000003e-23,3.5823989999999997e-20,0,0,10.875476,566.706822,GFM2;MRPL19;MRPS34;MRPL39;MRPL17;MTIF2;MRPL37;...
1,Reactome_2016,Mitochondrial translation initiation Homo sapi...,27/84,1.131038e-20,8.652439e-18,0,0,10.487066,481.655903,MRPL19;MRPS34;MRPL39;MRPL17;MTIF2;MRPL37;MTIF3...
2,Reactome_2016,Mitochondrial translation elongation Homo sapi...,27/84,1.131038e-20,5.768293e-18,0,0,10.487066,481.655903,MRPL19;MRPS34;MRPL39;MRPL17;MRPL37;MRPL15;MRPL...
3,Reactome_2016,Mitochondrial translation termination Homo sap...,26/84,1.74051e-19,6.657452e-17,0,0,10.098656,436.210828,GFM2;MRPL19;MRPS34;MRPL39;MRPL17;MRPL37;MRPL15...
4,Reactome_2016,mRNA Splicing - Major Pathway Homo sapiens R-H...,25/134,3.480804e-13,1.065126e-10,0,0,6.08702,174.614343,SF3B6;SRSF1;DDX23;HNRNPR;U2AF1;USP39;SART1;CST...


In [78]:
enrich_df = tumor_enr.res2d
translation = enrich_df.iloc[0,9]
translation = translation.split(';')

enrich_df.iloc[1,1]

'Mitochondrial translation initiation Homo sapiens R-HSA-5368286'

# Luad

In [28]:
df1 = luad.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)




In [29]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [30]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 


In [32]:

Luad = df1_transposed.columns.values.tolist()
Luad_genes = []
for gene in Luad :
    Luad_genes.append((re.sub("_proteomics", "", gene)))
len(Luad_genes)

1198

In [40]:
tumor_enr = gp.enrichr(gene_list = Luad_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
tumor_enr.res2d.head(5)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Immune System Homo sapiens R-HSA-168256,158/1547,1.186818e-11,1.815831e-08,0,0,1.705061,42.894496,ATF1;CYFIP2;APP;AHCYL1;NCF1;WIPF1;PROS1;WIPF2;...
1,Reactome_2016,Innate Immune System Homo sapiens R-HSA-168249,93/807,8.043351e-10,6.153164e-07,0,0,1.9239,40.288409,ATF1;CYFIP2;APP;AHCYL1;CDKN1B;WIPF1;WIPF2;PROS...
2,Reactome_2016,RHO GTPases Activate WASPs and WAVEs Homo sapi...,14/36,7.762482e-09,3.958866e-06,0,0,6.492302,121.237012,CYFIP2;WIPF1;WIPF2;WAS;BRK1;ARPC4;ARPC5;PTK2;A...
3,Reactome_2016,Hemostasis Homo sapiens R-HSA-109582,66/552,6.232478e-08,2.383923e-05,0,0,1.99608,33.116784,APP;DOCK5;DGKB;PROS1;F13A1;PIK3CD;ARRB2;CLU;AK...
4,Reactome_2016,Membrane Trafficking Homo sapiens R-HSA-199991,54/420,9.540055e-08,2.919257e-05,0,0,2.146435,34.697504,APP;GCC1;USE1;GOLGA5;KIF13B;KIFAP3;VPS36;CTSC;...


In [36]:
enrich_df = tumor_enr.res2d
hemostasis_genes = enrich_df.iloc[3,9]
hemostasis_genes = hemostasis_genes.split(';')


In [39]:
explor_genes = []
for gene in hemostasis_genes:
    gene += "_proteomics"
    explor_genes.append(gene)

df_FDR_explor = df_FDR_append[df_FDR_append.Comparison.isin(explor_genes)]


# Lscc

In [41]:
df1 = Lscc.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)




In [42]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [43]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 


In [44]:
Lscc = df1_transposed.columns.values.tolist()
Lscc_genes = []
for gene in Lscc :
    Lscc_genes.append((re.sub("_proteomics", "", gene)))
len(Lscc_genes)

474

In [45]:
Lscc_enr = gp.enrichr(gene_list = Lscc_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
Lscc_enr.res2d.head(5)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Extracellular matrix organization Homo sapiens...,28/283,1.969085e-10,3.0127e-07,0,0,4.174681,93.296939,DDR1;COL17A1;COL14A1;LAMC3;ITGB4;TNC;LOXL4;LAM...
1,Reactome_2016,Non-integrin membrane-ECM interactions Homo sa...,10/42,3.792171e-08,2.901011e-05,0,0,10.046213,171.667088,DDR1;LAMB3;ITGB4;LAMC3;ITGA2;TNC;SDC1;LAMC2;IT...
2,Reactome_2016,Apoptotic cleavage of cell adhesion proteins ...,6/11,7.17181e-08,3.657623e-05,0,0,23.01496,378.608118,DSP;CDH1;CTNNB1;PKP1;DSG2;DSG3
3,Reactome_2016,Laminin interactions Homo sapiens R-HSA-3000157,7/23,7.08913e-07,0.0002711592,0,0,12.84168,181.832199,LAMB3;ITGB4;LAMC3;ITGA2;LAMC2;ITGAV;ITGA6
4,Reactome_2016,Type I hemidesmosome assembly Homo sapiens R-H...,5/9,8.527709e-07,0.0002609479,0,0,23.441163,327.584973,COL17A1;LAMB3;ITGB4;LAMC2;ITGA6


# Kidney

In [46]:
df1 = kidney.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)



In [47]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [49]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [50]:

kidney = df1_transposed.columns.values.tolist()
kidney_genes = []
for gene in kidney :
    kidney_genes.append((re.sub("_proteomics", "", gene)))
len(kidney_genes)

1073

In [53]:
Kidney_enr = gp.enrichr(gene_list = kidney_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
Kidney_enr.res2d.head(5)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Metabolism Homo sapiens R-HSA-1430728,235/1908,8.779935e-36,1.34333e-32,0,0,2.295724,185.312245,SLC23A1;GDE1;ENO1;ENO2;NAMPT;FDXR;PDK4;PIP4K2A...
1,Reactome_2016,Respiratory electron transport Homo sapiens R-...,39/88,1.723308e-26,1.318331e-23,0,0,8.260612,490.043992,NDUFA13;NDUFB7;NDUFB6;NDUFB10;UQCRB;NDUFA12;ND...
2,Reactome_2016,"Respiratory electron transport, ATP synthesis ...",40/109,2.1912500000000003e-23,1.1175369999999999e-20,0,0,6.840121,356.883197,ATP5S;NDUFA13;NDUFB7;NDUFB10;UQCRB;NDUFB6;NDUF...
3,Reactome_2016,The citric acid (TCA) cycle and respiratory el...,45/153,1.316761e-21,5.036611e-19,0,0,5.482156,263.577172,ATP5S;NDUFA13;NDUFB7;NDUFB10;UQCRB;NDUFB6;NDUF...
4,Reactome_2016,Complex I biogenesis Homo sapiens R-HSA-6799198,26/49,1.234848e-20,3.778635e-18,0,0,9.890256,453.3768,NDUFA13;NDUFB7;NDUFB6;NDUFB10;NDUFA12;NDUFB5;N...


In [56]:
Kidney_enrich_df = Kidney_enr.res2d

TCA_genes= Kidney_enrich_df.iloc[3,9]
TCA_genes = TCA_genes.split(';')


['ATP5S',
 'NDUFA13',
 'NDUFB7',
 'NDUFB10',
 'UQCRB',
 'NDUFB6',
 'NDUFA12',
 'NDUFB5',
 'NDUFA10',
 'NDUFB3',
 'NDUFB1',
 'UQCR11',
 'UQCR10',
 'COX5B',
 'LDHA',
 'PDK4',
 'UQCRFS1',
 'CYC1',
 'NDUFV2',
 'SLC16A3',
 'NDUFV1',
 'PDK1',
 'COX8A',
 'NDUFA9',
 'NDUFA8',
 'NDUFA7',
 'SLC16A1',
 'NDUFA6',
 'NDUFA4',
 'NDUFA3',
 'NDUFA2',
 'NDUFC2',
 'NDUFC1',
 'COX6B1',
 'COX7A2L',
 'NDUFS8',
 'NDUFS7',
 'UQCRQ',
 'NDUFS5',
 'NDUFS4',
 'UQCRC1',
 'NDUFS3',
 'NDUFS2',
 'NDUFS1',
 'UQCRC2']

# Ovarian

In [11]:

df1 = Ovar.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)



In [12]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [12]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [13]:

Ovar = df1_transposed.columns.values.tolist()
Ovar_genes = []
for gene in Ovar :
    Ovar_genes.append((re.sub("_proteomics", "", gene)))
len(Ovar_genes)

613

In [15]:
Ovar_enr = gp.enrichr(gene_list = Ovar_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
Ovar_enr.res2d.head(10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Mitochondrial translation Homo sapiens R-HSA-5...,30/90,2.3414370000000003e-23,3.5823989999999997e-20,0,0,10.875476,566.706822,GFM2;MRPL19;MRPS34;MRPL39;MRPL17;MTIF2;MRPL37;...
1,Reactome_2016,Mitochondrial translation initiation Homo sapi...,27/84,1.131038e-20,8.652439e-18,0,0,10.487066,481.655903,MRPL19;MRPS34;MRPL39;MRPL17;MTIF2;MRPL37;MTIF3...
2,Reactome_2016,Mitochondrial translation elongation Homo sapi...,27/84,1.131038e-20,5.768293e-18,0,0,10.487066,481.655903,MRPL19;MRPS34;MRPL39;MRPL17;MRPL37;MRPL15;MRPL...
3,Reactome_2016,Mitochondrial translation termination Homo sap...,26/84,1.74051e-19,6.657452e-17,0,0,10.098656,436.210828,GFM2;MRPL19;MRPS34;MRPL39;MRPL17;MRPL37;MRPL15...
4,Reactome_2016,mRNA Splicing - Major Pathway Homo sapiens R-H...,25/134,3.480804e-13,1.065126e-10,0,0,6.08702,174.614343,SF3B6;SRSF1;DDX23;HNRNPR;U2AF1;USP39;SART1;CST...
5,Reactome_2016,Processing of Capped Intron-Containing Pre-mRN...,29/193,1.415549e-12,3.609649e-10,0,0,4.902417,133.755101,SF3B6;SRSF1;DDX23;HNRNPR;U2AF1;USP39;SART1;CST...
6,Reactome_2016,mRNA Splicing Homo sapiens R-HSA-72172,25/144,1.884289e-12,4.118517e-10,0,0,5.66431,152.922051,SF3B6;SRSF1;DDX23;HNRNPR;U2AF1;USP39;SART1;CST...
7,Reactome_2016,Organelle biogenesis and maintenance Homo sapi...,37/326,7.394122e-12,1.414126e-09,0,0,3.702999,94.909119,DYNC1I2;GFM2;MRPL19;MRPS34;MRPL17;MRPL39;MTIF2...
8,Reactome_2016,Mitochondrial protein import Homo sapiens R-HS...,14/54,5.837246e-10,9.923319e-08,0,0,8.458703,179.845499,DNAJC19;TIMM8A;TOMM40;IDH3G;TIMM13;TIMM10;TOMM...
9,Reactome_2016,Metabolism of proteins Homo sapiens R-HSA-392499,66/1074,5.604597e-08,8.575033e-06,0,0,2.004976,33.477272,PIGU;SPON1;GSK3A;CXCL8;TOMM22;EEF1B2;RPS18;TIM...


# Hnscc

In [25]:

df1 = Hnscc.join_omics_to_mutations(omics_df_name="proteomics", mutations_genes="EGFR")
df1_tumor = df1.loc[df1['Sample_Status'] == "Tumor"]

#df1_tumor.columns = df1_tumor.columns.droplevel(1)
prot_genes_list = create_prot_list(df1_tumor)



In [26]:
prot_genes_list = create_prot_list(df1_tumor)
df2= p.wrap_pearson_corr(df1_tumor,"EGFR_proteomics",comparison_columns= prot_genes_list,correction_method='fdr_bh', return_all = False)


In [27]:
df2 = df2.set_index('Comparison')
df1_transposed = df2.T 

In [28]:
Hnscc = df1_transposed.columns.values.tolist()
Hnscc_genes = []
for gene in Hnscc :
    Hnscc_genes.append((re.sub("_proteomics", "", gene)))
len(Hnscc_genes)

1074

In [29]:
Hnscc_enr = gp.enrichr(gene_list = Hnscc_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')
Hnscc_enr.res2d.head(10)


Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Extracellular matrix organization Homo sapiens...,59/283,1.1803569999999998e-19,1.805946e-16,0,0,3.88232,169.204302,DDR1;ITGB1;SPARC;ITGB5;SERPINE1;COL12A1;ICAM3;...
1,Reactome_2016,Hemostasis Homo sapiens R-HSA-109582,81/552,1.188696e-16,9.093528e-14,0,0,2.732572,100.199337,ITGB1;SPARC;DOCK8;SERPINE1;PLEK;ATP2A3;ARRB1;A...
2,Reactome_2016,Immune System Homo sapiens R-HSA-168256,158/1547,1.170688e-15,5.97051e-13,0,0,1.901921,65.390299,CYFIP2;NCKAP1;NUP107;NCF1;WIPF1;NCF4;ARAF;INPP...
3,Reactome_2016,Cell surface interactions at the vascular wall...,30/101,5.646039e-15,2.15961e-12,0,0,5.531279,181.46923,ITGB1;F11R;ITGAL;SPN;THBD;INPP5D;BSG;ITGAV;SLC...
4,Reactome_2016,Adaptive Immune System Homo sapiens R-HSA-1280218,92/762,1.973048e-13,6.037528e-11,0,0,2.248322,65.77248,ITGB1;GSK3B;CDKN1B;NCF1;ITGB5;SH3KBP1;CLTC;NCF...
5,Reactome_2016,Signaling by Rho GTPases Homo sapiens R-HSA-19...,55/367,4.616114e-12,1.177109e-09,0,0,2.790759,72.842907,CYFIP2;ARHGAP9;ITGB1;NCKAP1;NUP107;CDKN1B;RTKN...
6,Reactome_2016,Metabolism Homo sapiens R-HSA-1430728,170/1908,1.454692e-11,3.179542e-09,0,0,1.659191,41.402846,RPL4;ALDH1L1;RPL30;NUP107;RPL3;INPPL1;MT1X;ENO...
7,Reactome_2016,"Platelet activation, signaling and aggregation...",42/253,6.10447e-11,1.16748e-08,0,0,3.091395,72.707799,SPARC;SERPINE1;PLEK;ITPR1;ITPR3;ARRB1;ARRB2;RA...
8,Reactome_2016,Collagen formation Homo sapiens R-HSA-1474290,23/85,6.925813e-11,1.177388e-08,0,0,5.038887,117.875595,COL17A1;COL28A1;COL15A1;CRTAP;LAMB3;COL11A1;CO...
9,Reactome_2016,VEGFA-VEGFR2 Pathway Homo sapiens R-HSA-4420097,48/320,1.039101e-10,1.589824e-08,0,0,2.793296,64.210881,CYFIP2;NCKAP1;NCF1;NCF4;ARAF;ITPR1;HSPB1;ITPR3...
