In [1]:
import pandas as pd
from gprofiler import GProfiler

In [2]:
delta_correlation_df = pd.read_csv('delta_correlation_df.csv')
delta_correlation_df

Unnamed: 0,Gene,Delta_Correlation,P_Value,FDR,Cancer
0,A1BG,-0.388745,0.054892,0.142871,CCRCC
1,A1CF,0.146993,0.001086,0.005807,CCRCC
2,A2M,-0.277035,0.029705,0.089411,CCRCC
3,AAAS,-0.044291,0.708329,0.829294,CCRCC
4,AACS,-0.128218,0.065429,0.162756,CCRCC
...,...,...,...,...,...
58209,ZSWIM8,-0.301055,0.297288,0.635725,PDAC
58210,ZW10,-0.190323,0.365786,0.694767,PDAC
58211,ZYG11B,-0.231357,0.398680,0.720323,PDAC
58212,ZYX,-0.232389,0.246493,0.581496,PDAC


In [3]:
background_genes = list(pd.unique(delta_correlation_df.Gene))

In [4]:
gp = GProfiler(return_dataframe=True)

In [5]:
sig_delta_corr = delta_correlation_df[delta_correlation_df.FDR < 0.05]
num_genes = pd.DataFrame(sig_delta_corr.groupby('Gene').size())
sig_genes = num_genes[num_genes[0] >=3].index
sig_delta_corr = sig_delta_corr[sig_delta_corr.Gene.isin(sig_genes)]
sig_delta_corr = sig_delta_corr.groupby('Gene').mean()
sig_delta_corr['Delta_Correlation'] = abs(sig_delta_corr.Delta_Correlation)
sig_delta_corr = sig_delta_corr.sort_values('Delta_Correlation', ascending = False)
sig_delta_corr.reset_index(inplace = True)
genes = list(sig_delta_corr.Gene)
pathway_enrichment = gp.profile(organism='hsapiens', query = genes, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes, sources = ['KEGG'])
pathway_enrichment.to_csv('combinded_kegg_pathway_enrichment_df.csv', index = False)

In [6]:
pathway_enrichment

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
0,KEGG,KEGG:03030,DNA replication,2.553518e-08,True,DNA replication,35,1632,21,12487,0.012868,0.6,query_1,[KEGG:00000],"[PCNA, FEN1, RFC1, PRIM1, POLA2, MCM3, MCM7, R...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
1,KEGG,KEGG:01200,Carbon metabolism,2.949204e-06,True,Carbon metabolism,110,3794,64,12487,0.016869,0.581818,query_1,[KEGG:00000],"[TALDO1, PGK1, GCSH, RPIA, ACSS1, MDH2, SHMT2,...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
2,KEGG,KEGG:03430,Mismatch repair,1.382308e-05,True,Mismatch repair,21,1632,14,12487,0.008578,0.666667,query_1,[KEGG:00000],"[PCNA, PMS2, RFC1, RFC5, MSH3, MSH6, SSBP1, LI...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
3,KEGG,KEGG:00240,Pyrimidine metabolism,9.80526e-05,True,Pyrimidine metabolism,51,3535,34,12487,0.009618,0.666667,query_1,[KEGG:00000],"[TYMS, CAD, RRM1, ENTPD5, TK1, UPRT, DTYMK, TK...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
4,KEGG,KEGG:00280,"Valine, leucine and isoleucine degradation",0.0007317368,True,"Valine, leucine and isoleucine degradation",48,3842,30,12487,0.007808,0.625,query_1,[KEGG:00000],"[EHHADH, OXCT1, ACSF3, IL4I1, ALDH3A2, ACAA2, ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
5,KEGG,KEGG:04110,Cell cycle,0.001030852,True,Cell cycle,103,721,21,12487,0.029126,0.203883,query_1,[KEGG:00000],"[CHEK1, PCNA, TP53, CHEK2, CCNA2, MAD2L1, CDK1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
6,KEGG,KEGG:01240,Biosynthesis of cofactors,0.00109969,True,Biosynthesis of cofactors,133,3840,74,12487,0.019271,0.556391,query_1,[KEGG:00000],"[FLAD1, CAD, MTHFD1L, NADK2, CPOX, SHMT2, GCLC...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
7,KEGG,KEGG:01230,Biosynthesis of amino acids,0.007994229,True,Biosynthesis of amino acids,72,3764,39,12487,0.010361,0.541667,query_1,[KEGG:00000],"[TALDO1, PGK1, RPIA, SHMT2, CPS1, PGAM1, PSPH,...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
8,KEGG,KEGG:03410,Base excision repair,0.009076788,True,Base excision repair,29,2828,18,12487,0.006365,0.62069,query_1,[KEGG:00000],"[PCNA, POLB, FEN1, UNG, TDG, MPG, LIG1, POLD3,...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
9,KEGG,KEGG:00970,Aminoacyl-tRNA biosynthesis,0.01495672,True,Aminoacyl-tRNA biosynthesis,28,1685,16,12487,0.009496,0.571429,query_1,[KEGG:00000],"[SARS2, DARS2, TARS2, SEPSECS, IARS2, HARS2, Y...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
