In [2]:

import pandas as pd
import numpy as np
import re 
import matplotlib.pyplot as plt
from scipy import stats
import gseapy as gp
from gseapy.plot import barplot, dotplot

# Enrichment Analysis for all proteins Pos/Neg corr

Read in file from step 3.3 which has all of the proteins that have positive/ negative correlations depending on tumor type. Then, set comparison column to be index and transpose df so columns are gene names.

In [2]:
df = pd.read_csv("../Step3.3_Pos_Neg_Correlation_patterns/csv_files/pancan_EGFR_all_pos_neg_FDR.csv")
df = df.set_index('Comparison')
df1_transposed = df.T 

Get column names and remove proteomics from name. 

In [3]:

FDR_pos_neg = df1_transposed.columns.values.tolist()
pos_neg_genes = []
for gene in FDR_pos_neg:
    pos_neg_genes.append((re.sub("_proteomics", "", gene)))
len(pos_neg_genes)

479

Run enrichment analysis 

In [4]:

tumor_enr = gp.enrichr(gene_list = pos_neg_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_kegg')

In [8]:
gp.get_library_name()

['ARCHS4_Cell-lines',
 'ARCHS4_IDG_Coexp',
 'ARCHS4_Kinases_Coexp',
 'ARCHS4_TFs_Coexp',
 'ARCHS4_Tissues',
 'Achilles_fitness_decrease',
 'Achilles_fitness_increase',
 'Aging_Perturbations_from_GEO_down',
 'Aging_Perturbations_from_GEO_up',
 'Allen_Brain_Atlas_down',
 'Allen_Brain_Atlas_up',
 'BioCarta_2013',
 'BioCarta_2015',
 'BioCarta_2016',
 'BioPlanet_2019',
 'BioPlex_2017',
 'CCLE_Proteomics_2020',
 'CORUM',
 'Cancer_Cell_Line_Encyclopedia',
 'ChEA_2013',
 'ChEA_2015',
 'ChEA_2016',
 'Chromosome_Location',
 'Chromosome_Location_hg19',
 'ClinVar_2019',
 'DSigDB',
 'Data_Acquisition_Method_Most_Popular_Genes',
 'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019',
 'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019',
 'DisGeNET',
 'Disease_Perturbations_from_GEO_down',
 'Disease_Perturbations_from_GEO_up',
 'Disease_Signatures_from_GEO_down_2014',
 'Disease_Signatures_from_GEO_up_2014',
 'DrugMatrix',
 'Drug_Perturbations_from_GEO_2014',
 'Drug_Perturbations_from_GEO_down',
 'Drug_Perturb

In [5]:
tumor_enr.res2d.head(20)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Hemostasis Homo sapiens R-HSA-109582,43/552,1.158587e-11,1.772638e-08,0,0,3.252549,81.903202,ITGB1;ITIH4;ITGAM;SERPINC1;DGKB;DGKA;PROS1;ITG...
1,Reactome_2016,Formation of Fibrin Clot (Clotting Cascade) Ho...,12/39,6.754022e-11,5.166827e-08,0,0,12.847278,300.861383,FGB;PROCR;PROC;VWF;F10;SERPIND1;SERPINC1;PROS1...
2,Reactome_2016,Immune System Homo sapiens R-HSA-168256,77/1547,5.837734e-10,2.977244e-07,0,0,2.078236,44.186435,ITGB1;LRSAM1;ITGAM;AHCYL1;ITGB5;PROS1;WIPF2;IT...
3,Reactome_2016,Intrinsic Pathway of Fibrin Clot Formation Hom...,9/22,9.072196e-10,3.470115e-07,0,0,17.08104,355.638126,PROC;VWF;F10;SERPIND1;SERPINC1;F2;A2M;KLKB1;KNG1
4,Reactome_2016,Innate Immune System Homo sapiens R-HSA-168249,48/807,6.870636e-09,2.102415e-06,0,0,2.483489,46.679676,ITGAM;AHCYL1;UBA7;PROS1;WIPF2;ITGB3;ICAM3;ITPR...
5,Reactome_2016,"Platelet activation, signaling and aggregation...",24/253,1.115108e-08,2.843526e-06,0,0,3.960821,72.52948,FGB;ITIH4;VWF;PRKCB;DGKB;DGKA;PROS1;ITGB3;SERP...
6,Reactome_2016,Common Pathway of Fibrin Clot Formation Homo s...,8/22,2.431173e-08,5.313849e-06,0,0,15.183147,266.19559,FGB;PROCR;PROC;F10;SERPIND1;SERPINC1;PROS1;F2
7,Reactome_2016,Response to elevated platelet cytosolic Ca2+ H...,15/110,5.755021e-08,1.100648e-05,0,0,5.69368,94.917108,FGB;ITIH4;VWF;PRKCB;PROS1;ITGB3;SERPINE1;A1BG;...
8,Reactome_2016,Regulation of Complement cascade Homo sapiens ...,8/26,1.091723e-07,1.855928e-05,0,0,12.847278,205.946219,C3;C5;C8G;C6;PROS1;C8A;CFB;C2
9,Reactome_2016,Platelet degranulation Homo sapiens R-HSA-114608,14/105,2.119555e-07,3.242919e-05,0,0,5.567154,85.549837,FGB;ITIH4;VWF;PROS1;ITGB3;SERPINE1;A1BG;CLU;KN...


In [21]:
enrich_df = tumor_enr.res2d
complement_genes = enrich_df.iloc[0,9]
complement_genes

Endocytosis_genes= enrich_df.iloc[1,9]
Endocytosis_genes

Actin_genes = enrich_df.iloc[2,9]
Actin_genes

platlet_genes = enrich_df.iloc[5,9]
platlet_genes2 = enrich_df.iloc[7,9]

platlet_genes3 = enrich_df.iloc[9,9]

platlet_genes = platlet_genes.split(';')
platlet_genes2 = platlet_genes2.split(';')
platlet_genes3 = platlet_genes3.split(';')
platlet_genes_all = platlet_genes + platlet_genes2 + platlet_genes3 
allplatlet = np.unique(platlet_genes_all)
allplatlet

array(['A1BG', 'A2M', 'APBB1IP', 'CLU', 'DGKA', 'DGKB', 'F2', 'FGB',
       'GNG2', 'GNG7', 'ITGB3', 'ITIH4', 'ITPR3', 'KNG1', 'PECAM1',
       'PFN1', 'PLA2G4A', 'PRKCB', 'PROS1', 'QSOX1', 'RASGRP2',
       'SERPINE1', 'VCL', 'VWF'], dtype='<U8')

In [8]:

tumor_enr = gp.enrichr(gene_list = pos_neg_genes, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='test/enrichr_reactome')

In [11]:
tumor_enr.res2d.head(10)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,Reactome_2016,Hemostasis Homo sapiens R-HSA-109582,43/552,1.158587e-11,1.772638e-08,0,0,3.252549,81.903202,ITGB1;ITIH4;ITGAM;SERPINC1;DGKB;DGKA;PROS1;ITG...
1,Reactome_2016,Formation of Fibrin Clot (Clotting Cascade) Ho...,12/39,6.754022e-11,5.166827e-08,0,0,12.847278,300.861383,FGB;PROCR;PROC;VWF;F10;SERPIND1;SERPINC1;PROS1...
2,Reactome_2016,Immune System Homo sapiens R-HSA-168256,77/1547,5.837734e-10,2.977244e-07,0,0,2.078236,44.186435,ITGB1;LRSAM1;ITGAM;AHCYL1;ITGB5;PROS1;WIPF2;IT...
3,Reactome_2016,Intrinsic Pathway of Fibrin Clot Formation Hom...,9/22,9.072196e-10,3.470115e-07,0,0,17.08104,355.638126,PROC;VWF;F10;SERPIND1;SERPINC1;F2;A2M;KLKB1;KNG1
4,Reactome_2016,Innate Immune System Homo sapiens R-HSA-168249,48/807,6.870636e-09,2.102415e-06,0,0,2.483489,46.679676,ITGAM;AHCYL1;UBA7;PROS1;WIPF2;ITGB3;ICAM3;ITPR...
5,Reactome_2016,"Platelet activation, signaling and aggregation...",24/253,1.115108e-08,2.843526e-06,0,0,3.960821,72.52948,FGB;ITIH4;VWF;PRKCB;DGKB;DGKA;PROS1;ITGB3;SERP...
6,Reactome_2016,Common Pathway of Fibrin Clot Formation Homo s...,8/22,2.431173e-08,5.313849e-06,0,0,15.183147,266.19559,FGB;PROCR;PROC;F10;SERPIND1;SERPINC1;PROS1;F2
7,Reactome_2016,Response to elevated platelet cytosolic Ca2+ H...,15/110,5.755021e-08,1.100648e-05,0,0,5.69368,94.917108,FGB;ITIH4;VWF;PRKCB;PROS1;ITGB3;SERPINE1;A1BG;...
8,Reactome_2016,Regulation of Complement cascade Homo sapiens ...,8/26,1.091723e-07,1.855928e-05,0,0,12.847278,205.946219,C3;C5;C8G;C6;PROS1;C8A;CFB;C2
9,Reactome_2016,Platelet degranulation Homo sapiens R-HSA-114608,14/105,2.119555e-07,3.242919e-05,0,0,5.567154,85.549837,FGB;ITIH4;VWF;PROS1;ITGB3;SERPINE1;A1BG;CLU;KN...


In [12]:
enrich_df = tumor_enr.res2d
hemostasis_genes = enrich_df.iloc[0,9]
immune_system = enrich_df.iloc[2,9]
immune_system

'ITGB1;LRSAM1;ITGAM;AHCYL1;ITGB5;PROS1;WIPF2;ITGB3;NCF4;ICAM3;PTPRJ;IFI30;CLU;C8A;PVR;PSTPIP1;ACTR1A;C8G;CASP10;MRC1;FBXO6;LBP;KPNA3;CTSC;CTSB;FGB;ACTR2;CXADR;VWF;PRKCB;PPP2R5A;IL16;GFRA1;CYBA;TMEM173;APBB1IP;HCK;KIT;RAPGEF2;BPI;CFB;VCL;TLR2;UBA7;ITPR3;BCL10;RASAL3;RASGRP2;MALT1;C2;C3;C5;NRAS;C6;ERBB3;STAT6;FLNB;GBP4;AP1M2;LAIR1;VASP;ARPC4;ARPC5;BAIAP2;LILRB4;NFKB2;ISG20;FCGR2A;ARPC3;NEDD4;CAPZA1;REL;STUB1;FCGR2B;PIK3AP1;PTPN2;LGMN'

# Enrichment Analysis all prot 

Now download data frame for proteins same correlation pattern across cancers (atleast 2 sig). Follow same pattern as above

In [3]:
df = pd.read_csv("../Step3.3_Pos_Neg_Correlation_patterns/csv_files/pancan_EGFR_all_FDR_atleast2_same_corr.csv")
df = df.set_index('Comparison')
df1_transposed = df.T 
FDR_atleast2= df1_transposed.columns.values.tolist()

In [4]:
atleast2_sig = []
for gene in FDR_atleast2:
    atleast2_sig.append((re.sub("_proteomics", "", gene)))
len(atleast2_sig)

731

In [10]:

tumor_enr = gp.enrichr(gene_list = atleast2_sig, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')

In [11]:
tumor_enr.res2d.head(10)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,T cell receptor signaling pathway Homo sapiens...,18/104,3.843089e-08,1.1e-05,0,0,4.735347,80.853225,JUN;NFATC2;PIK3CD;CD3G;PIK3CG;VAV1;MAPK13;ZAP7...
1,KEGG_2016,B cell receptor signaling pathway Homo sapiens...,15/73,4.867319e-08,7e-06,0,0,5.621873,94.661868,CR2;JUN;SYK;PIK3CD;NFATC2;PIK3CG;VAV1;PPP3CC;I...
2,KEGG_2016,Natural killer cell mediated cytotoxicity Homo...,20/135,1.017982e-07,1e-05,0,0,4.053301,65.259253,SYK;IFNGR1;SH2D1A;NFATC2;PIK3CD;ITGAL;PIK3CG;V...
3,KEGG_2016,Glycolysis / Gluconeogenesis Homo sapiens hsa0...,14/67,1.089237e-07,8e-06,0,0,5.716969,91.657983,GPI;ADH1C;ADH1B;ENO1;HK2;PKM;ALDH1B1;PGK1;ALDO...
4,KEGG_2016,Fc gamma R-mediated phagocytosis Homo sapiens ...,16/93,2.376797e-07,1.4e-05,0,0,4.707059,71.793675,NCF1;SYK;MYO10;SPHK1;WAS;PIK3CD;PLD1;PIK3CG;VA...
5,KEGG_2016,Pathways in cancer Homo sapiens hsa05200,35/397,1.561655e-06,7.6e-05,0,0,2.412071,32.248826,RALA;SPI1;CDKN1B;SLC2A1;PIK3CD;LAMC2;PLD1;PIK3...
6,KEGG_2016,Proteoglycans in cancer Homo sapiens hsa05205,23/203,1.593929e-06,6.7e-05,0,0,3.099877,41.38121,FZD1;DDX5;CAMK2D;HGF;ITGA2;FZD6;WNT5A;RRAS2;PI...
7,KEGG_2016,Arrhythmogenic right ventricular cardiomyopath...,13/74,2.497948e-06,9.1e-05,0,0,4.806448,62.003377,ITGA4;ITGB4;ITGA2;GJA1;CDH2;CTNNA1;ITGB8;CTNNB...
8,KEGG_2016,Metabolic pathways Homo sapiens hsa01100,77/1239,3.34583e-06,0.000109,0,0,1.700325,21.437355,GPI;ACAA2;AKR1B1;ABAT;PYGM;MSMO1;ENO1;PYGL;PFA...
9,KEGG_2016,Carbon metabolism Homo sapiens hsa01200,16/113,3.507929e-06,0.000103,0,0,3.873951,48.658706,GPI;H6PD;ENO1;HK2;ALDH6A1;PKM;PC;PSAT1;PGK1;PH...


In [39]:
enrich_df = tumor_enr.res2d

T_cell_genes= enrich_df.iloc[0,9]
T_cell_genes

'JUN;NFATC2;PIK3CD;CD3G;PIK3CG;VAV1;MAPK13;ZAP70;CD4;PTPRC;PPP3CC;LCK;GRAP2;PRKCQ;GRB2;LCP2;PTPN6;CARD11'

In [40]:
Killer_T_cell_genes= enrich_df.iloc[2,9]
Killer_T_cell_genes

'SYK;IFNGR1;SH2D1A;NFATC2;PIK3CD;ITGAL;PIK3CG;VAV1;ICAM1;ZAP70;FCGR3A;PPP3CC;LCK;PLCG2;RAC2;PTK2B;FAS;GRB2;LCP2;PTPN6'

In [9]:

tumor_enr = gp.enrichr(gene_list = pos_neg_genes, description='Tumor_partition', gene_sets='NCI-Nature_2016', 
                       outdir='test/enrichr_kegg')

In [11]:
enrich_df = tumor_enr.res2d
enrich_df.head(10)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,NCI-Nature_2016,Beta2 integrin cell surface interactions Homo ...,9/29,1.573383e-08,3e-06,0,0,12.95803,232.822798,C3;FGB;ITGAM;PROC;FCGR2A;F10;PLAUR;ICAM3;KNG1
1,NCI-Nature_2016,Urokinase-type plasminogen activator (uPA) and...,8/42,5.891132e-06,0.000616,0,0,7.953077,95.771447,ITGB1;FGB;ITGAM;ITGB5;ITGA3;ITGB3;SERPINE1;PLAUR
2,NCI-Nature_2016,Syndecan-1-mediated signaling events Homo sapi...,8/46,1.197536e-05,0.000834,0,0,7.261505,82.292164,COL15A1;COL11A1;COL6A2;COL12A1;COL5A2;CASK;SDC...
3,NCI-Nature_2016,Integrins in angiogenesis Homo sapiens 2ddeac8...,9/72,5.370179e-05,0.002806,0,0,5.219207,51.315575,COL15A1;ITGB3;COL6A2;COL11A1;COL12A1;COL5A2;SD...
4,NCI-Nature_2016,Beta1 integrin cell surface interactions Homo ...,8/66,0.0001735009,0.007252,0,0,5.061049,43.825282,ITGB1;FGB;ITGA3;COL6A2;COL11A1;COL5A2;PLAUR;NID1
5,NCI-Nature_2016,Signaling events mediated by VEGFR1 and VEGFR2...,8/68,0.0002141778,0.007461,0,0,4.912195,41.501678,PRKCB;FES;ITGB3;CAV1;NEDD4;MYOF;PTPRJ;VCL
6,NCI-Nature_2016,Integrin family cell surface interactions Homo...,5/26,0.0003347846,0.009996,0,0,8.029549,64.252636,ITGB1;ITGAM;ITGB5;ITGA3;ITGB3
7,NCI-Nature_2016,Beta3 integrin cell surface interactions Homo ...,6/43,0.0005258115,0.013737,0,0,5.826091,43.990296,FGB;ITGB3;PLAUR;PECAM1;SDC1;PVR
8,NCI-Nature_2016,PDGFR-beta signaling pathway Homo sapiens c901...,10/128,0.001033619,0.024003,0,0,3.262004,22.425265,ACTR2;HCK;NRAS;ARPC3;ITGB3;PLA2G4A;PTPRJ;ARPC5...
9,NCI-Nature_2016,PAR1-mediated thrombin signaling events Homo s...,5/43,0.00350969,0.073353,0,0,4.855076,27.441995,VASP;GNG2;PRKCB;ARHGEF1;F2


In [12]:
angio_genes= enrich_df.iloc[3,9]

# GBM positive  

In [17]:
df = pd.read_csv("../Step3.3_Pos_Neg_Correlation_patterns/csv_files/pancan_EGFR_Gbm_pos.csv")

df = df.set_index('Comparison')
df1_transposed = df.T 
df1_transposed


Comparison,PHLDA1_proteomics,SOCS2_proteomics,CDH4_proteomics,CKB_proteomics,ARNT2_proteomics,ROBO2_proteomics,PHLDA3_proteomics,MEOX2_proteomics,PCDH17_proteomics,LRP4_proteomics,...,GNAI3_proteomics,RIC3_proteomics,PITPNA_proteomics,HDGFL2_proteomics,DNAJC5_proteomics,GAS1_proteomics,PGAP1_proteomics,KCTD3_proteomics,GSTCD_proteomics,APBA2_proteomics
Correlation_Gbm,0.816848,0.56272,0.55918,0.544246,0.542079,0.527128,0.525883,0.545863,0.506045,0.500896,...,0.269345,0.36255,0.268927,0.268911,0.26884,0.294941,0.268767,0.268664,0.268529,0.268245


In [18]:

all_prot = df1_transposed.columns.values.tolist()
genes = []
for gene in all_prot:
    genes.append((re.sub("_proteomics", "", gene)))
len(genes)

573

In [23]:

tumor_enr = gp.enrichr(gene_list = genes, description='Tumor_partition', gene_sets='KEGG_2016', 
                       outdir='test/enrichr_kegg')

In [24]:
enrich_df = tumor_enr.res2d
enrich_df.head(10)

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,KEGG_2016,"Glycine, serine and threonine metabolism Homo ...",13/40,4.532055e-11,1.327892e-08,0,0,11.343805,270.178348,GAMT;GCAT;GCSH;SRR;GATM;CBS;PSAT1;CTH;CHDH;PHG...
1,KEGG_2016,Tight junction Homo sapiens hsa04530,15/139,1.110592e-05,0.001627017,0,0,3.76662,42.969726,MAGI1;YES1;ACTN2;SHROOM2;GNAI3;CASK;MPP5;TJP1;...
2,KEGG_2016,Hippo signaling pathway Homo sapiens hsa04390,15/153,3.494431e-05,0.003412894,0,0,3.421962,35.115337,YWHAE;APC2;BMPR2;YWHAB;MPP5;BMP7;PPP1CC;DLG1;C...
3,KEGG_2016,Proximal tubule bicarbonate reclamation Homo s...,6/23,3.587391e-05,0.002627764,0,0,9.105395,93.198273,GLUD1;ATP1A2;ATP1B2;SLC38A3;SLC4A4;AQP1
4,KEGG_2016,Pathways in cancer Homo sapiens hsa05200,25/397,0.0002154739,0.01262677,0,0,2.197986,18.55687,RB1;CTBP1;GNAI3;PDGFA;ADCY8;RASGRP2;RASGRP1;ED...
5,KEGG_2016,Arginine and proline metabolism Homo sapiens h...,7/50,0.0005239063,0.02558409,0,0,4.886562,36.914055,GAMT;ALDH4A1;GATM;NOS2;CKB;PRODH;ALDH7A1
6,KEGG_2016,Bile secretion Homo sapiens hsa04976,8/71,0.0009414489,0.03940636,0,0,3.932847,27.404431,GNAS;AQP4;ATP1A2;KCNN2;ATP1B2;ADCY8;SLC4A4;AQP1
7,KEGG_2016,Adherens junction Homo sapiens hsa04520,8/74,0.001237653,0.04532902,0,0,3.773407,25.261219,TJP1;YES1;ACTN2;CTNNB1;FYN;CTNNA2;PTPRF;WASF3
8,KEGG_2016,Biosynthesis of amino acids Homo sapiens hsa01230,8/74,0.001237653,0.04029247,0,0,3.773407,25.261219,MAT2A;CBS;PSAT1;CTH;PHGDH;ALDOC;ALDH7A1;PSPH
9,KEGG_2016,Regulation of actin cytoskeleton Homo sapiens ...,15/214,0.001341006,0.03929147,0,0,2.446543,16.182256,APC2;NCKAP1;ACTN2;PDGFA;ARHGAP35;PPP1CC;PIKFYV...


In [21]:
VEGFR = enrich_df.iloc[4,9]
VEGFR

'SHC1;STK39;WAS;CD3G;BCL10;CD3E;RASGRP2;CBL;CD3D;RASGRP1;MALT1;IKBKB;CDC42;NRAS;GRAP2;RASSF5;FLNA;MAP3K8;CSK;FYN;IKBKG;PLCG1;HRAS;PAG1;NCK1;MAP4K1;DBNL;PDPK1;PRKCB;PRKCA;GAB2;VAV1;ZAP70;CD4;PTPRC;STIM1;LCK;TRAF6;HLA-DRA;LCP2;PRKCQ;PTPN6;KRAS;GRB2;SOS1;CARD11'