In [1]:
import pandas as pd
from gprofiler import GProfiler

In [2]:
delta_correlation_df = pd.read_csv('delta_correlation_df.csv')
delta_correlation_df

Unnamed: 0,Gene,Delta_Correlation,P_Value,FDR,Cancer
0,A1BG,-0.268533,5.703182e-02,1.320375e-01,CCRCC
1,A1CF,0.192038,1.063340e-04,6.401858e-04,CCRCC
2,A2M,-0.191619,1.277644e-01,2.439276e-01,CCRCC
3,AAAS,0.019654,8.963138e-01,9.409267e-01,CCRCC
4,AACS,-0.169937,6.007042e-02,1.375402e-01,CCRCC
...,...,...,...,...,...
50303,ZXDC,0.073854,6.395306e-01,6.962162e-01,LUAD
50304,ZYG11B,0.734569,1.583743e-10,1.504331e-09,LUAD
50305,ZYX,0.440861,2.253307e-06,9.499585e-06,LUAD
50306,ZZEF1,0.422001,3.731185e-11,3.889196e-10,LUAD


In [3]:
background_genes = list(pd.unique(delta_correlation_df.Gene))

In [4]:
gp = GProfiler(return_dataframe=True)

In [5]:
sig_delta_corr = delta_correlation_df[delta_correlation_df.FDR < 0.05]
num_genes = pd.DataFrame(sig_delta_corr.groupby('Gene').size())
sig_genes = num_genes[num_genes[0] >=3].index
sig_delta_corr = sig_delta_corr[sig_delta_corr.Gene.isin(sig_genes)]
sig_delta_corr = sig_delta_corr.groupby('Gene').mean()
sig_delta_corr['Delta_Correlation'] = abs(sig_delta_corr.Delta_Correlation)
sig_delta_corr = sig_delta_corr.sort_values('Delta_Correlation', ascending = False)
sig_delta_corr.reset_index(inplace = True)
genes = list(sig_delta_corr.Gene)
pathway_enrichment = gp.profile(organism='hsapiens', query = genes, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)
#pathway_enrichment.to_csv('combinded_GO_pathway_enrichment_df.csv', index = False)

In [6]:
pathway_enrichment

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
0,HPA,HPA:0600222,tonsil; squamous epithelial cells[≥Medium],7.849803e-20,True,tonsil; squamous epithelial cells[≥Medium],4331,2457,1178,12415,0.479446,0.271993,query_1,[HPA:0600221],"[PRC1, NECTIN4, NCAPG2, FLAD1, CIP2A, DHX36, S...","[[Enhanced], [Enhanced], [Approved], [Approved..."
1,HPA,HPA:0310431,lymph node; germinal center cells[≥Low],1.837034e-18,True,lymph node; germinal center cells[≥Low],4412,3096,1466,12415,0.473514,0.332276,query_1,[HPA:0310000],"[IGF2BP3, PRC1, NCAPG2, FLAD1, CIP2A, DHX36, S...","[[Enhanced], [Enhanced], [Approved], [Approved..."
2,HPA,HPA:0610832,urinary bladder; urothelial cells[≥Medium],3.859159e-16,True,urinary bladder; urothelial cells[≥Medium],5175,2467,1361,12415,0.551682,0.262995,query_1,[HPA:0610831],"[PRC1, NECTIN4, NCAPG2, FLAD1, CIP2A, DHX36, S...","[[Enhanced], [Enhanced], [Approved], [Approved..."
3,HPA,HPA:0170051,endometrium 2; glandular cells[≥Low],7.165793e-16,True,endometrium 2; glandular cells[≥Low],5764,3317,1983,12415,0.597829,0.344032,query_1,[HPA:0170000],"[PRC1, NECTIN4, NCAPG2, FLAD1, CIP2A, DHX36, S...","[[Enhanced], [Enhanced], [Approved], [Approved..."
4,HPA,HPA:0470682,skin 2; epidermal cells[≥Medium],1.545091e-15,True,skin 2; epidermal cells[≥Medium],4196,2440,1127,12415,0.461885,0.268589,query_1,[HPA:0470681],"[PRC1, NECTIN4, NCAPG2, FLAD1, CIP2A, DHX36, S...","[[Enhanced], [Enhanced], [Approved], [Approved..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,REAC,REAC:R-HSA-453279,Mitotic G1 phase and G1/S transition,4.913412e-02,True,Mitotic G1 phase and G1/S transition,132,548,18,12415,0.032847,0.136364,query_1,[REAC:R-HSA-69278],"[CDKN2A, TYMS, PPP2R2A, CDK1, CCNA2, PCNA, TK1...","[[REAC], [REAC], [REAC], [REAC], [REAC], [REAC..."
396,CORUM,CORUM:353,DNA ligase IV-condensin complex,4.925396e-02,True,DNA ligase IV-condensin complex,3,169,2,12415,0.011834,0.666667,query_1,[CORUM:0000000],"[SMC2, SMC4]","[[CORUM], [CORUM]]"
397,HP,HP:0006721,Acute lymphoblastic leukemia,4.960672e-02,True,A form of acute leukemia characterized by exce...,27,146,5,12415,0.034247,0.185185,query_1,[HP:0002488],"[CDKN2A, TRIP13, TP53, NSUN2, BLM]","[[HP], [HP], [HP], [HP], [HP]]"
398,REAC,REAC:R-HSA-446107,Type I hemidesmosome assembly,4.963203e-02,True,Type I hemidesmosome assembly,11,2990,9,12415,0.003010,0.818182,query_1,[REAC:R-HSA-446728],"[LAMC2, LAMB3, COL17A1, ITGB4, KRT5, ITGA6, DS...","[[REAC], [REAC], [REAC], [REAC], [REAC], [REAC..."


In [8]:
pd.unique(pathway_enrichment.source)

array(['HPA', 'GO:BP', 'REAC', 'MIRNA', 'WP', 'KEGG', 'GO:CC', 'CORUM',
       'GO:MF', 'HP'], dtype=object)