In [1]:

import pandas as pd
import numpy as np
import re 
import matplotlib.pyplot as plt
from scipy import stats
import gseapy as gp
from gseapy.plot import barplot, dotplot

In [2]:

df = pd.read_csv('GBM_EGFR_Not_interacting_pearson_sig.csv')
df = df.drop(['Unnamed: 0'], axis=1)
df
df = df.set_index('Comparison')
df1_transposed = df.T 
GBM_genes_prot = df1_transposed.columns.values.tolist()

In [3]:
GBM_genes = []
for gene in GBM_genes_prot:
    GBM_genes.append((re.sub("_proteomics", "", gene)))
    
GBM_genes

['PHLDA1',
 'SOCS2',
 'PLA2G15',
 'DAB2',
 'GLA',
 'CTSC',
 'CKB',
 'DCBLD2',
 'ARNT2',
 'SCPEP1',
 'ROBO2',
 'PHLDA3',
 'BLOC1S2',
 'ATP7A',
 'MEOX2',
 'CD180',
 'FAM129B',
 'SEC23IP',
 'TOR4A',
 'XRN1',
 'PCDH17',
 'LRP4',
 'ARHGAP4',
 'GPRIN3',
 'PPP1R18',
 'NEO1',
 'EPHB3',
 'KYNU',
 'LRRC75A',
 'CLIP2',
 'STAB1',
 'ARHGAP45',
 'HSD17B11',
 'SVIL',
 'DOCK10',
 'MITF',
 'SH3BP1',
 'GPHN',
 'DPP6',
 'ENG',
 'ST14',
 'ARHGAP27',
 'OSTF1',
 'APBB1IP',
 'HAAO',
 'CTSZ',
 'THEMIS2',
 'NFIA',
 'NIPSNAP2',
 'LANCL2',
 'RNF130',
 'ARHGAP15',
 'KCND3',
 'FCGRT',
 'ABCA1',
 'ZYG11B',
 'ARMT1',
 'DOCK11',
 'GGA2',
 'GBA',
 'PROCR',
 'UAP1',
 'CTNND2',
 'GMIP',
 'ARHGAP24',
 'CD4',
 'NCKAP1L',
 'ARHGAP30',
 'PI4K2A',
 'SCRN1',
 'BLOC1S1',
 'LDLRAP1',
 'NPAS3',
 'FYB1',
 'ADAP2',
 'FAM19A5',
 'DOCK8',
 'MAN2A1',
 'LCP2',
 'EHBP1L1',
 'DNAJC21',
 'ABI3',
 'RAB3IL1',
 'MLC1',
 'TRAF3IP3',
 'INPP5F',
 'MSR1',
 'TBXAS1',
 'NAIP',
 'OTULINL',
 'FGD3',
 'RIN2',
 'RGS12',
 'GAS2',
 'PHLDB2',
 'TNFAIP8'

In [4]:

tumor_enr = gp.enrichr(gene_list = GBM_genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')

In [5]:
tumor_enr.res2d.head()

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,Lysosome Homo sapiens hsa04142,10/123,2.121576e-09,6.216218e-07,0,0,14.263301,284.853896,GGA2;PLA2G15;GAA;HEXB;CTSZ;GBA;MAN2B1;GLA;CTSS...
1,KEGG_2016,Other glycan degradation Homo sapiens hsa00511,3/18,0.000138267,0.02025612,0,0,29.239766,259.834025,HEXB;GBA;MAN2B1
2,KEGG_2016,Galactose metabolism Homo sapiens hsa00052,3/30,0.0006545248,0.06392525,0,0,17.54386,128.624581,B4GALT1;GAA;GLA
3,KEGG_2016,N-Glycan biosynthesis Homo sapiens hsa00510,3/49,0.002745789,0.201129,0,0,10.741139,63.347873,ST6GAL1;B4GALT1;MAN2A1
4,KEGG_2016,Glycosphingolipid biosynthesis - globo series ...,2/14,0.002802403,0.1642208,0,0,25.062657,147.300199,HEXB;GLA


# Kidney

In [6]:

df = pd.read_csv('Kidney_EGFR_Not_interacting_pearson_sig.csv')
df = df.drop(['Unnamed: 0'], axis=1)
df
df = df.set_index('Comparison')
df1_transposed = df.T 
Kidney_genes_prot = df1_transposed.columns.values.tolist()


In [7]:
Kidney_genes = []
for gene in Kidney_genes_prot:
    Kidney_genes.append((re.sub("_proteomics", "", gene)))
    


In [8]:

tumor_enr = gp.enrichr(gene_list = Kidney_genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')

In [9]:
tumor_enr.res2d.head()

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,Parkinson's disease Homo sapiens hsa05012,6/142,7e-06,0.002161,0,0,12.804097,151.312247,COX7A2L;APAF1;UQCRB;NDUFS4;NDUFA4L2;NDUFS1
1,KEGG_2016,Alzheimer's disease Homo sapiens hsa05010,6/168,1.9e-05,0.002818,0,0,10.822511,117.518878,COX7A2L;APAF1;UQCRB;NDUFS4;NDUFA4L2;NDUFS1
2,KEGG_2016,Huntington's disease Homo sapiens hsa05016,6/193,4.2e-05,0.004098,0,0,9.420631,94.948784,COX7A2L;APAF1;UQCRB;NDUFS4;NDUFA4L2;NDUFS1
3,KEGG_2016,Oxidative phosphorylation Homo sapiens hsa00190,5/133,7.8e-05,0.005702,0,0,11.392117,107.777958,COX7A2L;UQCRB;NDUFS4;NDUFA4L2;NDUFS1
4,KEGG_2016,Non-alcoholic fatty liver disease (NAFLD) Homo...,5/151,0.000142,0.008297,0,0,10.034116,88.927786,COX7A2L;UQCRB;NDUFS4;NDUFA4L2;NDUFS1


# Ovarian

In [10]:

df = pd.read_csv('Ovar_EGFR_Not_interacting_pearson_sig.csv')
df = df.drop(['Unnamed: 0'], axis=1)
df
df = df.set_index('Comparison')
df1_transposed = df.T 
Ovar_genes_prot = df1_transposed.columns.values.tolist()

In [11]:
Ovar_genes = []
for gene in Ovar_genes_prot:
    Ovar_genes.append((re.sub("_proteomics", "", gene)))
    

In [12]:

tumor_enr = gp.enrichr(gene_list = Ovar_genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')



In [13]:
tumor_enr.res2d.head()

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,One carbon pool by folate Homo sapiens hsa00670,1/20,0.007973,1.0,0,0,125.0,603.956216,ALDH1L1
1,KEGG_2016,Porphyrin and chlorophyll metabolism Homo sapi...,1/42,0.01668,1.0,0,0,59.52381,243.664108,CP
2,KEGG_2016,Insulin secretion Homo sapiens hsa04911,1/85,0.033504,1.0,0,0,29.411765,99.88492,TRPM4


# Colon 

In [14]:

df = pd.read_csv('Colon_EGFR_Not_interacting_pearson_sig.csv')
df = df.drop(['Unnamed: 0'], axis=1)
df
df = df.set_index('Comparison')
df1_transposed = df.T 
Colon_genes_prot = df1_transposed.columns.values.tolist()

In [15]:
Colon_genes = []
for gene in Colon_genes_prot:
    Colon_genes.append((re.sub("_proteomics", "", gene)))
    

In [16]:

tumor_enr = gp.enrichr(gene_list = Colon_genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')



In [17]:
tumor_enr.res2d.head()

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,Ribosome biogenesis in eukaryotes Homo sapiens...,2/89,0.001721,0.504226,0,0,32.102729,204.330776,UTP6;GAR1
1,KEGG_2016,RNA transport Homo sapiens hsa03013,2/172,0.006252,0.915907,0,0,16.611296,84.300099,NUP153;RAE1
2,KEGG_2016,Huntington's disease Homo sapiens hsa05016,2/193,0.007811,0.762876,0,0,14.803849,71.831526,NDUFAB1;TFAM
3,KEGG_2016,Other types of O-glycan biosynthesis Homo sapi...,1/31,0.021489,1.0,0,0,46.082949,176.967364,POFUT1
4,KEGG_2016,Oxidative phosphorylation Homo sapiens hsa00190,1/133,0.089208,1.0,0,0,10.741139,25.95898,NDUFAB1


# Brca

In [18]:

df = pd.read_csv('Brca_EGFR_Not_interacting_pearson_sig.csv')
df = df.drop(['Unnamed: 0'], axis=1)
df
df = df.set_index('Comparison')
df1_transposed = df.T 
Brca_genes_prot = df1_transposed.columns.values.tolist()

In [19]:
Brca_genes = []
for gene in Brca_genes_prot:
    Brca_genes.append((re.sub("_proteomics", "", gene)))

In [20]:

tumor_enr = gp.enrichr(gene_list = Brca_genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')



In [21]:
tumor_enr.res2d.head()

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,"Alanine, aspartate and glutamate metabolism Ho...",5/35,0.000198,0.057962,0,0,9.157509,78.096509,GPT2;GFPT2;CAD;ABAT;GLS
1,KEGG_2016,2-Oxocarboxylic acid metabolism Homo sapiens h...,3/17,0.002174,0.318465,0,0,11.312217,69.358224,AADAT;GPT2;BCAT2
2,KEGG_2016,Metabolic pathways Homo sapiens hsa01100,31/1239,0.006482,0.633036,0,0,1.603858,8.081498,HAAO;COX17;NDUFA4L2;ABAT;ENO1;NT5C2;GLS;NSDHL;...
3,KEGG_2016,Lysine degradation Homo sapiens hsa00310,4/52,0.008737,0.639985,0,0,4.930966,23.373713,AADAT;PLOD3;PLOD1;COLGALT1
4,KEGG_2016,Nicotinate and nicotinamide metabolism Homo sa...,3/29,0.010178,0.596408,0,0,6.6313,30.42152,NAMPT;NT5C2;NUDT12


# Luad

In [22]:

df = pd.read_csv('Luad_EGFR_Not_interacting_pearson_sig.csv')
df = df.drop(['Unnamed: 0'], axis=1)
df
df = df.set_index('Comparison')
df1_transposed = df.T 
Luad_genes_prot = df1_transposed.columns.values.tolist()

In [23]:
Luad_genes = []
for gene in Luad_genes_prot:
    Luad_genes.append((re.sub("_proteomics", "", gene)))

In [24]:

tumor_enr = gp.enrichr(gene_list = Luad_genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')



In [25]:
tumor_enr.res2d.head()

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,Porphyrin and chlorophyll metabolism Homo sapi...,2/42,0.011975,1.0,0,0,12.055455,53.344391,HMOX2;BLVRA
1,KEGG_2016,Mineral absorption Homo sapiens hsa04978,2/51,0.017333,1.0,0,0,9.928022,40.259582,SLC34A2;HMOX2
2,KEGG_2016,mRNA surveillance pathway Homo sapiens hsa03015,2/91,0.050334,1.0,0,0,5.564056,16.631376,ETF1;GSPT1
3,KEGG_2016,Steroid biosynthesis Homo sapiens hsa00100,1/20,0.076139,1.0,0,0,12.658228,32.597374,NSDHL
4,KEGG_2016,Wnt signaling pathway Homo sapiens hsa04310,2/142,0.108414,1.0,0,0,3.565698,7.922263,DAAM1;GPC4


# Hnscc

In [30]:

df = pd.read_csv('Hnscc_EGFR_Not_interacting_pearson_sig.csv')
df = df.drop(['Unnamed: 0'], axis=1)
df
df = df.set_index('Comparison')
df1_transposed = df.T 
Hnscc_genes_prot = df1_transposed.columns.values.tolist()

In [31]:
Hnscc_genes = []
for gene in Hnscc_genes_prot:
    Hnscc_genes.append((re.sub("_proteomics", "", gene)))

In [32]:

tumor_enr = gp.enrichr(gene_list = Hnscc_genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')



In [33]:
tumor_enr.res2d.head()

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,B cell receptor signaling pathway Homo sapiens...,3/73,0.0041,1.0,0,0,9.339975,51.340292,INPP5D;BTK;NFATC2
1,KEGG_2016,cGMP-PKG signaling pathway Homo sapiens hsa04022,4/167,0.006348,0.929923,0,0,5.443658,27.543161,ITPR1;ATP2B4;ATP2A3;NFATC2
2,KEGG_2016,Pancreatic secretion Homo sapiens hsa04972,3/96,0.008761,0.855629,0,0,7.102273,33.646863,ITPR1;ATP2B4;ATP2A3
3,KEGG_2016,Phosphatidylinositol signaling system Homo sap...,3/98,0.009267,0.67884,0,0,6.957328,32.568983,INPP5D;ITPR1;PIP4K2A
4,KEGG_2016,Platelet activation Homo sapiens hsa04611,3/122,0.016687,0.97788,0,0,5.588674,22.875019,ITPR1;BTK;FERMT3
