In [1]:
import pandas as pd
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)

In [2]:
delta_correlation_df = pd.read_csv('delta_correlation_df.csv')
delta_correlation_df

Unnamed: 0,Gene,Delta_Correlation,P_Value,FDR,Cancer
0,A1BG,-0.198013,2.451044e-01,4.045115e-01,HNSCC
1,A2M,-0.118384,4.480278e-01,6.091130e-01,HNSCC
2,A2ML1,-0.023469,2.918125e-01,4.561968e-01,HNSCC
3,AAAS,0.275905,1.051756e-01,2.209072e-01,HNSCC
4,AACS,-0.136836,1.800586e-01,3.266475e-01,HNSCC
...,...,...,...,...,...
50684,ZWINT,1.219024,2.267627e-09,1.049863e-07,Endometrial
50685,ZXDC,-0.346532,2.983295e-01,5.386144e-01,Endometrial
50686,ZYG11B,0.768196,5.463938e-04,5.319699e-03,Endometrial
50687,ZYX,0.253630,2.456049e-01,4.795301e-01,Endometrial


In [3]:
all_pathways = []
positive_pathways = []
negative_pathways = []
for cancer in pd.unique(delta_correlation_df.Cancer):
    cancer_df = delta_correlation_df[delta_correlation_df.Cancer == cancer]
    background_genes  = list(pd.unique(cancer_df.Gene))
    cancer_df = cancer_df[cancer_df.FDR < 0.05]
    positive_cancer_df = cancer_df[cancer_df.Delta_Correlation > 0]
    negative_cancer_df = cancer_df[cancer_df.Delta_Correlation < 0]
    cancer_df['Delta_Correlation'] = abs(cancer_df['Delta_Correlation'])
    cancer_df = cancer_df.sort_values('Delta_Correlation', ascending = False)
    positive_cancer_df = positive_cancer_df.sort_values('Delta_Correlation', ascending = False)
    negative_cancer_df = negative_cancer_df.sort_values('Delta_Correlation')
    all_genes = list(cancer_df.Gene)
    positive_genes = list(positive_cancer_df.Gene)
    negative_genes = list(negative_cancer_df.Gene)
    all_cancer_pathways = gp.profile(organism='hsapiens', query = all_genes, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)
    all_cancer_pathways['Cancer'] = [cancer] * len(all_cancer_pathways)
    positive_cancer_pathways = gp.profile(organism='hsapiens', query = positive_genes, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)
    positive_cancer_pathways['Cancer'] = [cancer] * len(positive_cancer_pathways)
    negative_cancer_pathways = gp.profile(organism='hsapiens', query = negative_genes, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)
    negative_cancer_pathways['Cancer'] = [cancer] * len(negative_cancer_pathways)
    all_pathways.append(all_cancer_pathways)
    positive_pathways.append(positive_cancer_pathways)
    negative_pathways.append(negative_cancer_pathways)
all_pathways = pd.concat(all_pathways)    
all_pathways.to_csv('Individual_cancer_pathway_enrichment.csv', index = False)
positive_pathways = pd.concat(positive_pathways)
positive_pathways.to_csv('Positive_delta_corr_individual_cancer_pathways.csv', index = False)
negative_pathways = pd.concat(negative_pathways)
negative_pathways.to_csv('Negative_delta_corr_individual_cancer_pathways', index = False)

In [4]:
pd.set_option('display.max_rows', 500)

In [5]:
pd.unique(all_pathways.source)

array(['HP', 'CORUM', 'HPA', 'REAC', 'WP', 'MIRNA', 'GO:BP', 'GO:MF',
       'KEGG', 'GO:CC'], dtype=object)

In [6]:
all_pathways[all_pathways.source == 'GO:BP']

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences,Cancer
22,GO:BP,GO:0006261,DNA-templated DNA replication,0.001152,True,"""A DNA replication process that uses parental ...",122,3150,71,11402,0.02254,0.581967,query_1,[GO:0006260],"[MMS22L, RFC5, ZPR1, MCM10, STOML2, TIMELESS, ...","[[IMP, IBA], [IDA, IBA], [ISS], [IMP, IBA], [I...",LSCC
122,GO:BP,GO:0006260,DNA replication,0.037516,True,"""The cellular metabolic process in which a cel...",187,1917,68,11402,0.035472,0.363636,query_1,[GO:0006259],"[MMS22L, RFC5, ZPR1, MCM10, STOML2, TIMELESS, ...","[[IMP, IBA], [IDA, IBA, NAS], [ISS], [IMP, IBA...",LSCC
125,GO:BP,GO:0090329,regulation of DNA-templated DNA replication,0.040067,True,"""Any process that modulates the rate, frequenc...",38,1889,22,11402,0.011646,0.578947,query_1,"[GO:0006261, GO:0006275]","[RFC5, STOML2, TIMELESS, TIPIN, LIG3, GINS2, G...","[[IDA], [IMP], [IBA], [IBA, TAS], [IMP], [IDA]...",LSCC
9,GO:BP,GO:0000727,double-strand break repair via break-induced r...,1.8e-05,True,"""The error-free repair of a double-strand brea...",9,554,9,11081,0.016245,1.0,query_1,[GO:0000724],"[CDC45, MCM2, MCM7, GINS4, GINS2, MCM5, MCM6, ...","[[IBA], [IBA], [IBA], [IBA], [IBA], [IBA], [IB...",LUAD
12,GO:BP,GO:0007059,chromosome segregation,0.000139,True,"""The process in which genetic material, in the...",212,282,28,11081,0.099291,0.132075,query_1,[GO:0009987],"[CENPH, SMC2, TRIP13, ESCO2, MIS18A, KNTC1, KI...","[[IBA], [IDA, IBA, TAS], [IMP, ISS], [IBA], [I...",LUAD
19,GO:BP,GO:0032392,DNA geometric change,0.000398,True,"""The process in which a transformation is indu...",44,182,10,11081,0.054945,0.227273,query_1,[GO:0071103],"[CDC45, ANXA1, MCM2, MCM7, RECQL5, HMGB3, TOP2...","[[IDA], [IBA], [IDA, IBA], [IDA, IBA], [IBA], ...",LUAD
30,GO:BP,GO:0000278,mitotic cell cycle,0.001097,True,"""Progression through the phases of the mitotic...",579,13,9,11081,0.692308,0.015544,query_1,[GO:0007049],"[CDC45, CENPH, SMC2, TRIP13, MELK, WDHD1, AURK...","[[IBA], [IBA], [IDA, IBA, TAS], [IMP], [TAS], ...",LUAD
31,GO:BP,GO:0032508,DNA duplex unwinding,0.001244,True,"""The process in which interchain hydrogen bond...",38,182,9,11081,0.049451,0.236842,query_1,[GO:0032392],"[CDC45, ANXA1, MCM2, MCM7, RECQL5, TOP2A, GINS...","[[IDA], [IBA], [IDA, IBA], [IDA, IBA], [IBA], ...",LUAD
32,GO:BP,GO:0071103,DNA conformation change,0.001471,True,"""A cellular process that results in a change i...",49,182,10,11081,0.054945,0.204082,query_1,[GO:0051276],"[CDC45, ANXA1, MCM2, MCM7, RECQL5, HMGB3, TOP2...","[[IDA], [IBA], [IDA, IBA], [IDA, IBA], [IBA], ...",LUAD
35,GO:BP,GO:0006268,DNA unwinding involved in DNA replication,0.001688,True,"""The process in which interchain hydrogen bond...",20,554,10,11081,0.018051,0.5,query_1,"[GO:0006261, GO:0032508]","[CDC45, MCM2, MCM7, RECQL5, GINS4, GINS2, MCM5...","[[IDA], [IDA, IBA], [IDA, IBA], [IBA], [IDA], ...",LUAD


In [7]:
pd.unique(positive_pathways.source)

array(['HP', 'CORUM', 'KEGG', 'HPA', 'REAC', 'WP', 'MIRNA', 'GO:BP',
       'GO:CC', 'GO:MF'], dtype=object)

In [8]:
positive_pathways[positive_pathways.source == 'WP']

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences,Cancer
9,WP,WP:WP2446,Retinoblastoma gene in cancer,0.000111674,True,Retinoblastoma gene in cancer,80,4703,58,11402,0.012333,0.725,query_1,[WP:000000],"[TP53, RFC5, SKP2, SMC2, PRIM1, CDC45, TFDP1, ...","[[WP], [WP], [WP], [WP], [WP], [WP], [WP], [WP...",LSCC
17,WP,WP:WP5213,Amino acid metabolism in triple-negative breas...,0.0004878876,True,Amino acid metabolism in triple-negative breas...,7,508,5,11402,0.009843,0.714286,query_1,[WP:000000],"[SLC2A1, PSPH, SLC7A11, SLC1A5, PSAT1]","[[WP], [WP], [WP], [WP], [WP]]",LSCC
18,WP,WP:WP466,DNA replication,0.0005321233,True,DNA replication,37,3130,25,11402,0.007987,0.675676,query_1,[WP:000000],"[RFC5, MCM10, PRIM1, CDC45, ORC5, POLA1, POLE,...","[[WP], [WP], [WP], [WP], [WP], [WP], [WP], [WP...",LSCC
24,WP,WP:WP4752,Base excision repair,0.00112112,True,Base excision repair,29,5375,26,11402,0.004837,0.896552,query_1,[WP:000000],"[UNG, LIG3, POLE, PARP2, TDG, APEX2, POLE3, LI...","[[WP], [WP], [WP], [WP], [WP], [WP], [WP], [WP...",LSCC
44,WP,WP:WP4290,Metabolic reprogramming in colon cancer,0.005439309,True,Metabolic reprogramming in colon cancer,39,712,11,11402,0.015449,0.282051,query_1,[WP:000000],"[IDH3A, SLC2A1, PGK1, PSPH, GLUD1, GOT2, SLC1A...","[[WP], [WP], [WP], [WP], [WP], [WP], [WP], [WP...",LSCC
49,WP,WP:WP384,Apoptosis modulation by HSP70,0.006615788,True,Apoptosis modulation by HSP70,14,124,4,11402,0.032258,0.285714,query_1,[WP:000000],"[RIPK1, NFKB1, FADD, TNFRSF1A]","[[WP], [WP], [WP], [WP]]",LSCC
61,WP,WP:WP4629,Aerobic glycolysis,0.01310997,True,Aerobic glycolysis,12,5250,12,11402,0.002286,1.0,query_1,[WP:000000],"[SLC2A1, PGK1, TPI1, GAPDH, HK1, LDHA, PFKM, P...","[[WP], [WP], [WP], [WP], [WP], [WP], [WP], [WP...",LSCC
68,WP,WP:WP4016,DNA IR-damage and cellular response via ATR,0.01578617,True,DNA IR-damage and cellular response via ATR,70,647,14,11402,0.021638,0.2,query_1,[WP:000000],"[FANCI, TP53, FANCD2, CHEK2, BRCA1, CDC45, TOP...","[[WP], [WP], [WP], [WP], [WP], [WP], [WP], [WP...",LSCC
76,WP,WP:WP4674,Head and neck squamous cell carcinoma,0.01819447,True,Head and neck squamous cell carcinoma,65,40,4,11402,0.1,0.061538,query_1,[WP:000000],"[FGFR3, TP53, NFKB1, KEAP1]","[[WP], [WP], [WP], [WP]]",LSCC
84,WP,WP:WP5046,NAD metabolism in oncogene-induced senescence ...,0.0226445,True,NAD metabolism in oncogene-induced senescence ...,22,1300,10,11402,0.007692,0.454545,query_1,[WP:000000],"[TP53, SLC2A1, RELA, GOT1, GOT2, ELAVL1, SIRT1...","[[WP], [WP], [WP], [WP], [WP], [WP], [WP], [WP...",LSCC


In [9]:
pd.unique(negative_pathways.source)

array(['KEGG', 'GO:CC', 'REAC', 'WP', 'GO:BP', 'HP', 'GO:MF', 'CORUM',
       'HPA'], dtype=object)

In [10]:
negative_pathways[negative_pathways.source == 'KEGG']

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences,Cancer
0,KEGG,KEGG:05415,Diabetic cardiomyopathy,0.0,True,Diabetic cardiomyopathy,149,242,31,9747,0.128099,0.208054,query_1,[KEGG:00000],"[MT-ND4, PDHA1, MT-CO1, MT-CO3, NDUFB9, UQCRFS...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
10,KEGG,KEGG:00190,Oxidative phosphorylation,0.0,True,Oxidative phosphorylation,83,132,17,9747,0.128788,0.204819,query_1,[KEGG:00000],"[MT-ND4, MT-CO1, MT-CO3, ATP6V1B1, NDUFB9, UQC...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
16,KEGG,KEGG:05208,Chemical carcinogenesis - reactive oxygen species,0.0,True,Chemical carcinogenesis - reactive oxygen species,156,132,20,9747,0.151515,0.128205,query_1,[KEGG:00000],"[MT-ND4, MT-CO1, MT-CO3, NDUFB9, UQCRFS1, NDUF...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
17,KEGG,KEGG:04714,Thermogenesis,0.0,True,Thermogenesis,143,132,20,9747,0.151515,0.13986,query_1,[KEGG:00000],"[MT-ND4, MT-CO1, MT-CO3, NDUFB9, UQCRFS1, CPT1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
30,KEGG,KEGG:05020,Prion disease,0.0,True,Prion disease,179,132,19,9747,0.143939,0.106145,query_1,[KEGG:00000],"[MT-ND4, MT-CO1, MT-CO3, NDUFB9, UQCRFS1, NDUF...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
35,KEGG,KEGG:05014,Amyotrophic lateral sclerosis,0.0,True,Amyotrophic lateral sclerosis,243,132,21,9747,0.159091,0.08642,query_1,[KEGG:00000],"[MT-ND4, MT-CO1, MT-CO3, NDUFB9, PRPH, UQCRFS1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
36,KEGG,KEGG:05012,Parkinson disease,0.0,True,Parkinson disease,168,132,18,9747,0.136364,0.107143,query_1,[KEGG:00000],"[MT-ND4, MT-CO1, MT-CO3, NDUFB9, UQCRFS1, NDUF...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
37,KEGG,KEGG:05016,Huntington disease,0.0,True,Huntington disease,192,132,19,9747,0.143939,0.098958,query_1,[KEGG:00000],"[MT-ND4, MT-CO1, MT-CO3, NDUFB9, UQCRFS1, NDUF...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
43,KEGG,KEGG:05010,Alzheimer disease,1e-06,True,Alzheimer disease,237,132,21,9747,0.159091,0.088608,query_1,[KEGG:00000],"[MT-ND4, MT-CO1, MT-CO3, NDUFB9, UQCRFS1, NDUF...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
74,KEGG,KEGG:05022,Pathways of neurodegeneration - multiple diseases,1.1e-05,True,Pathways of neurodegeneration - multiple diseases,293,132,22,9747,0.166667,0.075085,query_1,[KEGG:00000],"[MT-ND4, MT-CO1, MT-CO3, NDUFB9, PRPH, UQCRFS1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
