In [1]:
import pandas as pd
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)

In [2]:
delta_correlation_df = pd.read_csv('delta_correlation_df.csv')
delta_correlation_df

Unnamed: 0,Gene,Delta_Correlation,P_Value,FDR,Cancer
0,A1BG,-0.268533,5.703182e-02,1.320375e-01,CCRCC
1,A1CF,0.192038,1.063340e-04,6.401858e-04,CCRCC
2,A2M,-0.191619,1.277644e-01,2.439276e-01,CCRCC
3,AAAS,0.019654,8.963138e-01,9.409267e-01,CCRCC
4,AACS,-0.169937,6.007042e-02,1.375402e-01,CCRCC
...,...,...,...,...,...
50303,ZXDC,0.073854,6.395306e-01,6.962162e-01,LUAD
50304,ZYG11B,0.734569,1.583743e-10,1.504331e-09,LUAD
50305,ZYX,0.440861,2.253307e-06,9.499585e-06,LUAD
50306,ZZEF1,0.422001,3.731185e-11,3.889196e-10,LUAD


In [3]:
all_pathways = []
positive_pathways = []
negative_pathways = []
for cancer in pd.unique(delta_correlation_df.Cancer):
    cancer_df = delta_correlation_df[delta_correlation_df.Cancer == cancer]
    background_genes  = list(pd.unique(cancer_df.Gene))
    cancer_df = cancer_df[cancer_df.FDR < 0.05]
    positive_cancer_df = cancer_df[cancer_df.Delta_Correlation > 0]
    negative_cancer_df = cancer_df[cancer_df.Delta_Correlation < 0]
    cancer_df['Delta_Correlation'] = abs(cancer_df['Delta_Correlation'])
    cancer_df = cancer_df.sort_values('Delta_Correlation', ascending = False)
    positive_cancer_df = positive_cancer_df.sort_values('Delta_Correlation', ascending = False)
    negative_cancer_df = negative_cancer_df.sort_values('Delta_Correlation')
    all_genes = list(cancer_df.Gene)
    positive_genes = list(positive_cancer_df.Gene)
    negative_genes = list(negative_cancer_df.Gene)
    all_cancer_pathways = gp.profile(organism='hsapiens', query = all_genes, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)
    all_cancer_pathways['Cancer'] = [cancer] * len(all_cancer_pathways)
    positive_cancer_pathways = gp.profile(organism='hsapiens', query = positive_genes, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)
    positive_cancer_pathways['Cancer'] = [cancer] * len(positive_cancer_pathways)
    negative_cancer_pathways = gp.profile(organism='hsapiens', query = negative_genes, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)
    negative_cancer_pathways['Cancer'] = [cancer] * len(negative_cancer_pathways)
    all_pathways.append(all_cancer_pathways)
    positive_pathways.append(positive_cancer_pathways)
    negative_pathways.append(negative_cancer_pathways)
all_pathways = pd.concat(all_pathways)    
all_pathways.to_csv('Individual_cancer_pathway_enrichment.csv', index = False)
positive_pathways = pd.concat(positive_pathways)
positive_pathways.to_csv('Positive_delta_corr_individual_cancer_pathways.csv', index = False)
negative_pathways = pd.concat(negative_pathways)
negative_pathways.to_csv('Negative_delta_corr_individual_cancer_pathways', index = False)

In [4]:
pd.set_option('display.max_rows', 500)

In [12]:
pd.unique(all_pathways.source)

array(['KEGG', 'CORUM', 'HP', 'WP', 'GO:BP', 'GO:MF', 'GO:CC', 'REAC',
       'MIRNA', 'HPA'], dtype=object)

In [13]:
all_pathways[all_pathways.source == 'GO:BP']

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences,Cancer
11,GO:BP,GO:0009081,branched-chain amino acid metabolic process,0.037084,True,"""The chemical reactions and pathways involving...",23,3159,19,9916,0.006015,0.826087,query_1,"[GO:0006082, GO:1901564]","[IVD, HIBCH, SDS, DBT, ALDH6A1, ACAD8, ACADSB,...","[[IDA, TAS], [TAS], [IBA], [TAS], [TAS], [TAS]...",CCRCC
6,GO:BP,GO:0006120,"mitochondrial electron transport, NADH to ubiq...",0.000251,True,"""The transfer of electrons from NADH to ubiqui...",42,1589,26,10080,0.016362,0.619048,query_1,"[GO:0019646, GO:0042775]","[NDUFS5, NDUFB1, NDUFB4, NDUFB10, NDUFB9, NDUF...","[[TAS, NAS], [TAS, NAS], [TAS, NAS], [TAS, NAS...",HNSCC
15,GO:BP,GO:0019646,aerobic electron transport chain,0.003612,True,"""A process in which a series of electron carri...",70,1419,32,10080,0.022551,0.457143,query_1,"[GO:0006119, GO:0009060, GO:0022904]","[NDUFS5, NDUFB1, NDUFB4, NDUFB10, NDUFB9, NDUF...","[[TAS, NAS], [TAS, NAS], [TAS, NAS], [TAS, NAS...",HNSCC
18,GO:BP,GO:0042775,mitochondrial ATP synthesis coupled electron t...,0.00465,True,"""The transfer of electrons through a series of...",75,1419,33,10080,0.023256,0.44,query_1,[GO:0042773],"[NDUFS5, NDUFB1, NDUFB4, NDUFB10, NDUFB9, NDUF...","[[TAS, NAS], [TAS, NAS], [TAS, NAS], [TAS, NAS...",HNSCC
19,GO:BP,GO:0042773,ATP synthesis coupled electron transport,0.006284,True,"""The transfer of electrons through a series of...",76,1419,33,10080,0.023256,0.434211,query_1,"[GO:0006119, GO:0022904]","[NDUFS5, NDUFB1, NDUFB4, NDUFB10, NDUFB9, NDUF...","[[TAS, NAS], [TAS, NAS], [TAS, NAS], [TAS, NAS...",HNSCC
26,GO:BP,GO:0022904,respiratory electron transport chain,0.012214,True,"""A process in which a series of electron carri...",85,1419,35,10080,0.024665,0.411765,query_1,"[GO:0022900, GO:0045333]","[NDUFS5, NDUFB1, NDUFB4, NDUFB10, NDUFB9, NDUF...","[[TAS, NAS], [TAS, NAS], [TAS, NAS], [TAS, NAS...",HNSCC
29,GO:BP,GO:0022900,electron transport chain,0.018394,True,"""A process in which a series of electron carri...",91,1419,36,10080,0.02537,0.395604,query_1,[GO:0006091],"[NDUFS5, NDUFB1, NDUFB4, NDUFB10, NDUFB9, NDUF...","[[TAS, NAS], [TAS, NAS], [TAS, NAS], [TAS, NAS...",HNSCC
32,GO:BP,GO:0006119,oxidative phosphorylation,0.022092,True,"""The phosphorylation of ADP to ATP that accomp...",107,1869,49,10080,0.026217,0.457944,query_1,"[GO:0009060, GO:0046034]","[STOML2, NDUFS5, NDUFB1, NDUFB4, NDUFB10, NDUF...","[[IMP], [TAS, NAS], [TAS, NAS], [TAS, NAS], [T...",HNSCC
6,GO:BP,GO:0101024,mitotic nuclear membrane organization,5e-06,True,"""A mitotic cell cycle process which results in...",51,2585,34,10857,0.013153,0.666667,query_1,"[GO:0071763, GO:0140014, GO:1903047]","[PPP2R2A, CCNB2, EMD, CCNB1, NUP205, CDK1, KPN...","[[TAS], [TAS], [TAS], [TAS], [TAS], [TAS], [TA...",LSCC
7,GO:BP,GO:0007084,mitotic nuclear membrane reassembly,5e-06,True,"""The mitotic cell cycle process involving ESCR...",51,2585,34,10857,0.013153,0.666667,query_1,"[GO:0031468, GO:0101024]","[PPP2R2A, CCNB2, EMD, CCNB1, NUP205, CDK1, KPN...","[[TAS], [TAS], [TAS], [TAS], [TAS], [TAS], [TA...",LSCC


In [14]:
pd.unique(positive_pathways.source)

array(['WP', 'CORUM', 'KEGG', 'GO:BP', 'HP', 'REAC', 'MIRNA', 'HPA',
       'GO:MF', 'GO:CC'], dtype=object)

In [15]:
positive_pathways[positive_pathways.source == 'KEGG']

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences,Cancer
1,KEGG,KEGG:00280,"Valine, leucine and isoleucine degradation",0.002601,True,"Valine, leucine and isoleucine degradation",44,1747,22,9211,0.012593,0.5,query_1,[KEGG:00000],"[EHHADH, ACAA2, HADHB, BCAT2, ACSF3, ACAD8, AL...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",Endometrial
5,KEGG,KEGG:01230,Biosynthesis of amino acids,0.034841,True,Biosynthesis of amino acids,62,451,12,9211,0.026608,0.193548,query_1,[KEGG:00000],"[TALDO1, MTR, PGK1, PKM, GOT1, GLUL, SDS, ASS1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",Endometrial
7,KEGG,KEGG:05222,Small cell lung cancer,0.03302,True,Small cell lung cancer,74,231,9,10080,0.038961,0.121622,query_1,[KEGG:00000],"[RXRB, MYC, BIRC2, TP53, COL4A6, LAMC2, CDK6, ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",HNSCC
10,KEGG,KEGG:03030,DNA replication,9e-06,True,DNA replication,35,2607,24,10857,0.009206,0.685714,query_1,[KEGG:00000],"[RFC4, RFC5, PRIM1, RFC2, RFC3, MCM5, MCM6, SS...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",LSCC
63,KEGG,KEGG:04110,Cell cycle,0.001704,True,Cell cycle,99,498,18,10857,0.036145,0.181818,query_1,[KEGG:00000],"[TP53, CHEK2, PRKDC, SKP2, YWHAG, YWHAZ, YWHAQ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",LSCC
88,KEGG,KEGG:03430,Mismatch repair,0.004764,True,Mismatch repair,21,1362,11,10857,0.008076,0.52381,query_1,[KEGG:00000],"[RFC4, RFC5, RFC2, MSH6, RFC3, MSH2, SSBP1, PC...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",LSCC
126,KEGG,KEGG:03013,RNA transport,0.011048,True,RNA transport,148,2765,69,10857,0.024955,0.466216,query_1,[KEGG:00000],"[NUP153, XPO5, EIF5, EIF5B, GEMIN2, STRAP, XPO...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",LSCC
172,KEGG,KEGG:04668,TNF signaling pathway,0.027329,True,TNF signaling pathway,87,157,8,10857,0.050955,0.091954,query_1,[KEGG:00000],"[MMP3, NFKB1, RIPK1, TNFRSF1A, BAG4, FADD, ICA...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",LSCC
192,KEGG,KEGG:05215,Prostate cancer,0.037306,True,Prostate cancer,70,15,3,10857,0.2,0.042857,query_1,[KEGG:00000],"[MMP3, TP53, NFKB1]","[[KEGG], [KEGG], [KEGG]]",LSCC
205,KEGG,KEGG:05418,Fluid shear stress and atherosclerosis,0.041753,True,Fluid shear stress and atherosclerosis,107,171,9,10857,0.052632,0.084112,query_1,[KEGG:00000],"[TP53, NFKB1, TNFRSF1A, KEAP1, SDC1, CALML3, I...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",LSCC


In [16]:
pd.unique(negative_pathways.source)

array(['GO:CC', 'HP', 'REAC', 'KEGG', 'GO:BP', 'GO:MF', 'WP', 'CORUM',
       'HPA'], dtype=object)

In [17]:
negative_pathways[negative_pathways.source == 'KEGG']

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences,Cancer
6,KEGG,KEGG:01100,Metabolic pathways,3.750499e-23,True,Metabolic pathways,1077,817,233,9916,0.28519,0.216342,query_1,[KEGG:00000],"[AGXT, NDUFC1, CYP17A1, NT5C1A, INPP5E, PAH, C...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
14,KEGG,KEGG:01200,Carbon metabolism,8.174637e-18,True,Carbon metabolism,103,860,46,9916,0.053488,0.446602,query_1,[KEGG:00000],"[AGXT, PGAM2, HIBCH, CAT, ALDOB, SUCLG1, GLYCT...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
18,KEGG,KEGG:00280,"Valine, leucine and isoleucine degradation",2.7160930000000002e-17,True,"Valine, leucine and isoleucine degradation",46,857,29,9916,0.033839,0.630435,query_1,[KEGG:00000],"[IVD, ACAA1, ABAT, HIBCH, HMGCL, DBT, ALDH6A1,...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
29,KEGG,KEGG:00190,Oxidative phosphorylation,1.859558e-15,True,Oxidative phosphorylation,98,764,44,9916,0.057592,0.44898,query_1,[KEGG:00000],"[NDUFC1, NDUFS4, NDUFA10, NDUFS3, NDUFA8, NDUF...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
48,KEGG,KEGG:00020,Citrate cycle (TCA cycle),7.6841e-14,True,Citrate cycle (TCA cycle),29,764,20,9916,0.026178,0.689655,query_1,[KEGG:00000],"[PCK2, SUCLG1, PCK1, IDH3B, PDHB, SUCLA2, OGDH...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
69,KEGG,KEGG:00640,Propanoate metabolism,2.362685e-11,True,Propanoate metabolism,33,857,20,9916,0.023337,0.606061,query_1,[KEGG:00000],"[LDHAL6A, ABAT, HIBCH, DBT, SUCLG1, ALDH6A1, L...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
90,KEGG,KEGG:04932,Non-alcoholic fatty liver disease,3.588244e-09,True,Non-alcoholic fatty liver disease,118,552,32,9916,0.057971,0.271186,query_1,[KEGG:00000],"[NDUFC1, NDUFS4, NDUFA10, NDUFS3, NDUFA8, NDUF...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
94,KEGG,KEGG:05415,Diabetic cardiomyopathy,6.938417e-09,True,Diabetic cardiomyopathy,161,764,45,9916,0.058901,0.279503,query_1,[KEGG:00000],"[NDUFC1, NDUFS4, NDUFA10, NDUFS3, NDUFA8, NDUF...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
99,KEGG,KEGG:00620,Pyruvate metabolism,1.738878e-08,True,Pyruvate metabolism,41,803,20,9916,0.024907,0.487805,query_1,[KEGG:00000],"[LDHAL6A, PCK2, LDHC, ADH6, LDHD, PCK1, PDHB, ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
100,KEGG,KEGG:04714,Thermogenesis,2.025254e-08,True,Thermogenesis,166,764,48,9916,0.062827,0.289157,query_1,[KEGG:00000],"[NDUFC1, NDUFS4, NDUFA10, NDUFS3, NDUFA8, NDUF...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG...",CCRCC
