In [1]:
import cptac
import numpy as np
import pandas as pd
import statsmodels.stats.multitest as ssm

In [20]:
df = pd.read_csv('data/regressions.csv')
df

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,gene,cancer,interaction_coeff,condition_coeff,transcript_coeff,intercept,interaction_pval,condition_pval,transcript_pval,intercept_pval
0,0,0,A1BG,ccrcc,-0.399310,0.683426,0.377849,-0.462140,0.030269,0.000267,1.543240e-02,5.284138e-03
1,1,1,A1CF,ccrcc,0.009264,-0.282587,0.073218,-0.563416,0.483928,0.007715,1.334367e-20,3.375975e-10
2,2,2,A2M,ccrcc,-0.001738,1.059763,0.001776,-0.899006,0.102154,0.000248,2.443435e-02,6.862382e-05
3,3,3,A4GALT,ccrcc,-0.069014,1.186499,0.074956,-0.824329,0.787282,0.572700,5.662479e-01,5.581218e-01
4,4,4,AAAS,ccrcc,-0.005048,0.253459,0.008609,-0.148476,0.712627,0.094165,3.084530e-01,2.857311e-01
...,...,...,...,...,...,...,...,...,...,...,...,...
54098,10066,10066,ZXDC,luad,-0.146323,0.889409,0.886971,-3.806726,0.927943,0.870375,4.851543e-01,4.178277e-01
54099,10067,10067,ZYG11B,luad,1.424677,-5.791951,-0.529182,2.529373,0.000585,0.000112,2.167428e-01,8.937205e-02
54100,10068,10068,ZYX,luad,0.714921,-5.450918,-0.123800,2.023549,0.017024,0.000068,7.126818e-01,1.239435e-01
54101,10069,10069,ZZEF1,luad,0.596868,-2.881060,0.511473,-1.305858,0.077011,0.022896,1.157814e-01,3.362924e-01


In [21]:
background_genes = list(pd.unique(df.gene))
len(background_genes)

13457

In [22]:
sig_df = df[df.interaction_pval <= 0.05]
sig_df

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,gene,cancer,interaction_coeff,condition_coeff,transcript_coeff,intercept,interaction_pval,condition_pval,transcript_pval,intercept_pval
0,0,0,A1BG,ccrcc,-0.399310,0.683426,0.377849,-0.462140,0.030269,0.000267,1.543240e-02,0.005284
10,10,10,AAK1,ccrcc,0.053062,-0.455378,0.032984,-0.009980,0.006332,0.000126,1.824412e-02,0.946774
11,11,11,AAMP,ccrcc,0.012601,-0.242299,0.006251,-0.403301,0.016747,0.330397,1.681417e-01,0.042514
14,14,14,AARS2,ccrcc,-0.083850,0.458319,0.152507,-1.035045,0.025599,0.094191,2.026622e-07,0.000002
24,24,24,ABCA3,ccrcc,-0.363050,1.321452,0.382745,-1.437112,0.014050,0.021521,3.249287e-03,0.008090
...,...,...,...,...,...,...,...,...,...,...,...,...
54096,10064,10064,ZWILCH,luad,3.158582,-4.319546,-0.801456,-0.265192,0.000018,0.000002,3.294543e-01,0.808117
54097,10065,10065,ZWINT,luad,0.875887,-2.358641,0.188274,-0.835616,0.007289,0.000877,5.507945e-01,0.001246
54099,10067,10067,ZYG11B,luad,1.424677,-5.791951,-0.529182,2.529373,0.000585,0.000112,2.167428e-01,0.089372
54100,10068,10068,ZYX,luad,0.714921,-5.450918,-0.123800,2.023549,0.017024,0.000068,7.126818e-01,0.123943


In [24]:
sig_genes = pd.DataFrame(sig_df.groupby('gene').size())
sig_genes = sig_genes[sig_genes[0] > 2]
sig_genes = list(sig_genes.index)
rows = []
for gene in sig_genes:
    row = dict()
    row['Gene'] = gene
    gene_df = sig_df[sig_df.gene == gene]
    avg_pval = np.mean(gene_df.interaction_pval)
    avg_interaction = np.mean(gene_df.interaction_coeff)
    abs_interaction = abs(avg_interaction)
    row['avg_pval'] = avg_pval
    row['avg_interaction_coeff'] = avg_interaction
    row['abs_interaction_coeff'] = abs_interaction
    rows.append(row)
sig_genes = pd.DataFrame(rows)
sig_genes

Unnamed: 0,Gene,avg_pval,avg_interaction_coeff,abs_interaction_coeff
0,AAK1,0.005286,0.453549,0.453549
1,AARS2,0.015897,0.719068,0.719068
2,AASDHPPT,0.001367,0.850032,0.850032
3,AASS,0.016457,1.079797,1.079797
4,ABCB10,0.013928,1.149501,1.149501
...,...,...,...,...
1334,ZNF638,0.002508,1.012607,1.012607
1335,ZNF687,0.014917,0.797145,0.797145
1336,ZNFX1,0.005884,0.388419,0.388419
1337,ZNHIT2,0.000316,0.756881,0.756881


In [6]:
sig_genes = sig_genes.sort_values('avg_pval')
pval_gene_list = list(sig_genes.Gene)
sig_genes = sig_genes.sort_values('abs_interaction_coeff', ascending = False)
interaction_gene_list = list(sig_genes.Gene)
positive_interactions = sig_genes[sig_genes.avg_interaction_coeff >= 0].sort_values('avg_interaction_coeff', ascending = False)
positive_interactions_gene_list = list(positive_interactions.Gene)
negative_interactions = sig_genes[sig_genes.avg_interaction_coeff <= 0].sort_values('avg_interaction_coeff')
negative_interactions_gene_list = list(negative_interactions.Gene)

In [7]:
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)

In [8]:
pval_results_df = gp.profile(organism='hsapiens', query = pval_gene_list, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)

In [9]:
interaction_results_df = gp.profile(organism='hsapiens', query = interaction_gene_list, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)

In [10]:
down_reg_results_df = gp.profile(organism='hsapiens', query = negative_interactions_gene_list, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)

In [11]:
up_reg_results_df = gp.profile(organism='hsapiens', query = positive_interactions_gene_list, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)

In [12]:
pval_results_df = pval_results_df[pval_results_df.term_size >= 5]
pval_results_df = pval_results_df[pval_results_df.term_size <= 500]
pval_results_df = pval_results_df[pval_results_df.source == 'KEGG']
pval_results_df

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
51,KEGG,KEGG:00520,Amino sugar and nucleotide sugar metabolism,0.00017,True,Amino sugar and nucleotide sugar metabolism,44,1309,17,13394,0.012987,0.386364,query_1,[KEGG:00000],"[UGDH, GFPT2, GFPT1, NPL, GMDS, NANP, UAP1, FP...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
52,KEGG,KEGG:01230,Biosynthesis of amino acids,0.000174,True,Biosynthesis of amino acids,73,1191,21,13394,0.017632,0.287671,query_1,[KEGG:00000],"[ALDOA, PYCR1, GOT1, BCAT1, PSPH, GPT2, SDS, S...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
57,KEGG,KEGG:00051,Fructose and mannose metabolism,0.00036,True,Fructose and mannose metabolism,32,1223,13,13394,0.01063,0.40625,query_1,[KEGG:00000],"[ALDOA, AKR1B10, SORD, FBP1, PFKFB2, GMDS, FPG...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
74,KEGG,KEGG:04115,p53 signaling pathway,0.001032,True,p53 signaling pathway,63,936,17,13394,0.018162,0.269841,query_1,[KEGG:00000],"[RRM2, SERPINE1, IGFBP3, CDK1, CASP9, STEAP3, ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
78,KEGG,KEGG:00270,Cysteine and methionine metabolism,0.001401,True,Cysteine and methionine metabolism,47,1023,14,13394,0.013685,0.297872,query_1,[KEGG:00000],"[GOT1, BCAT1, AHCY, SDS, CTH, DNMT3A, MPST, IL...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
87,KEGG,KEGG:01200,Carbon metabolism,0.002215,True,Carbon metabolism,114,1223,26,13394,0.021259,0.22807,query_1,[KEGG:00000],"[ALDOA, GOT1, PSPH, GCSH, FBP1, GPT2, DLD, SDS...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
115,KEGG,KEGG:00250,"Alanine, aspartate and glutamate metabolism",0.004399,True,"Alanine, aspartate and glutamate metabolism",34,68,4,13394,0.058824,0.117647,query_1,[KEGG:00000],"[PPAT, GFPT2, GFPT1, GOT1]","[[KEGG], [KEGG], [KEGG], [KEGG]]"
164,KEGG,KEGG:04512,ECM-receptor interaction,0.017954,True,ECM-receptor interaction,82,687,14,13394,0.020378,0.170732,query_1,[KEGG:00000],"[ITGAV, THBS2, CD36, ITGA11, ITGA2, COL1A1, CO...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
167,KEGG,KEGG:00030,Pentose phosphate pathway,0.021203,True,Pentose phosphate pathway,29,1191,10,13394,0.008396,0.344828,query_1,[KEGG:00000],"[ALDOA, FBP1, RPE, RBKS, PFKP, G6PD, PGM1, TAL...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."


In [13]:
interaction_results_df = interaction_results_df[interaction_results_df.term_size >= 5]
interaction_results_df = interaction_results_df[interaction_results_df.term_size <= 500]
interaction_results_df = interaction_results_df[interaction_results_df.source == 'KEGG']
interaction_results_df

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
112,KEGG,KEGG:04115,p53 signaling pathway,0.000133,True,p53 signaling pathway,63,999,19,13394,0.019019,0.301587,query_1,[KEGG:00000],"[CDKN2A, CDK1, CCNB1, CHEK1, SERPINE1, GTSE1, ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
116,KEGG,KEGG:01230,Biosynthesis of amino acids,0.000149,True,Biosynthesis of amino acids,73,1077,20,13394,0.01857,0.273973,query_1,[KEGG:00000],"[RPIA, GPT2, PGK1, CTH, PSPH, TALDO1, PYCR1, R...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
122,KEGG,KEGG:00520,Amino sugar and nucleotide sugar metabolism,0.000193,True,Amino sugar and nucleotide sugar metabolism,44,1321,17,13394,0.012869,0.386364,query_1,[KEGG:00000],"[UGDH, UAP1, GFPT1, UXS1, NANP, GFPT2, GNPNAT1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
137,KEGG,KEGG:00051,Fructose and mannose metabolism,0.000307,True,Fructose and mannose metabolism,32,1206,13,13394,0.010779,0.40625,query_1,[KEGG:00000],"[PMM2, TKFC, AKR1B10, GMDS, SORD, PFKP, PFKL, ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
162,KEGG,KEGG:00270,Cysteine and methionine metabolism,0.001865,True,Cysteine and methionine metabolism,47,1049,14,13394,0.013346,0.297872,query_1,[KEGG:00000],"[DNMT3A, AHCYL1, CTH, DNMT1, AHCY, PSAT1, GCLC...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
187,KEGG,KEGG:01200,Carbon metabolism,0.007408,True,Carbon metabolism,114,1234,25,13394,0.020259,0.219298,query_1,[KEGG:00000],"[RPIA, GPT2, PGK1, PSPH, TALDO1, RPE, GCSH, GL...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
193,KEGG,KEGG:04110,Cell cycle,0.008634,True,Cell cycle,113,896,21,13394,0.023438,0.185841,query_1,[KEGG:00000],"[CDKN2A, MCM6, CCNA2, PLK1, CDK1, CCNB1, CHEK1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
198,KEGG,KEGG:00250,"Alanine, aspartate and glutamate metabolism",0.010255,True,"Alanine, aspartate and glutamate metabolism",34,1049,11,13394,0.010486,0.323529,query_1,[KEGG:00000],"[GPT2, ADSL, PPAT, GFPT1, GFPT2, GLUD1, GLS, A...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
249,KEGG,KEGG:05230,Central carbon metabolism in cancer,0.035433,True,Central carbon metabolism in cancer,66,934,14,13394,0.014989,0.212121,query_1,[KEGG:00000],"[SLC7A5, SLC1A5, MAP2K1, MET, GLS, PGAM1, SLC1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
274,KEGG,KEGG:00030,Pentose phosphate pathway,0.049612,True,Pentose phosphate pathway,29,1321,10,13394,0.00757,0.344828,query_1,[KEGG:00000],"[RPIA, RBKS, TALDO1, RPE, PFKP, PFKL, G6PD, AL...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."


In [14]:
down_reg_results_df = down_reg_results_df[down_reg_results_df.term_size >= 5]
down_reg_results_df = down_reg_results_df[down_reg_results_df.term_size <= 500]
down_reg_results_df = down_reg_results_df[down_reg_results_df.source == 'KEGG']
down_reg_results_df

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
17,KEGG,KEGG:05412,Arrhythmogenic right ventricular cardiomyopathy,0.007987,True,Arrhythmogenic right ventricular cardiomyopathy,64,43,4,13394,0.093023,0.0625,query_1,[KEGG:00000],"[DMD, ITGA2B, ACTN2, DES]","[[KEGG], [KEGG], [KEGG], [KEGG]]"
18,KEGG,KEGG:04979,Cholesterol metabolism,0.008074,True,Cholesterol metabolism,43,25,3,13394,0.12,0.069767,query_1,[KEGG:00000],"[CD36, CYP27A1, LIPA]","[[KEGG], [KEGG], [KEGG]]"
24,KEGG,KEGG:05416,Viral myocarditis,0.013468,True,Viral myocarditis,55,26,3,13394,0.115385,0.054545,query_1,[KEGG:00000],"[CAV1, PRF1, DMD]","[[KEGG], [KEGG], [KEGG]]"
27,KEGG,KEGG:00010,Glycolysis / Gluconeogenesis,0.016691,True,Glycolysis / Gluconeogenesis,63,110,5,13394,0.045455,0.079365,query_1,[KEGG:00000],"[ALDH3B1, HK3, FBP1, ALDH3A1, PGM1]","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG]]"
28,KEGG,KEGG:05144,Malaria,0.018694,True,Malaria,37,84,4,13394,0.047619,0.108108,query_1,[KEGG:00000],"[CD36, HGF, HBB, PECAM1]","[[KEGG], [KEGG], [KEGG], [KEGG]]"
36,KEGG,KEGG:04145,Phagosome,0.032157,True,Phagosome,138,88,6,13394,0.068182,0.043478,query_1,[KEGG:00000],"[CD36, NCF4, NCF2, MRC1, HLA-DRB5, ATP6V0D1]","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG]]"
40,KEGG,KEGG:00520,Amino sugar and nucleotide sugar metabolism,0.048213,True,Amino sugar and nucleotide sugar metabolism,44,110,4,13394,0.036364,0.090909,query_1,[KEGG:00000],"[HK3, NPL, RENBP, PGM1]","[[KEGG], [KEGG], [KEGG], [KEGG]]"


In [15]:
up_reg_results_df = up_reg_results_df[up_reg_results_df.term_size >= 5]
up_reg_results_df = up_reg_results_df[up_reg_results_df.term_size <= 500]
up_reg_results_df = up_reg_results_df[up_reg_results_df.source == 'KEGG']
up_reg_results_df

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
130,KEGG,KEGG:01230,Biosynthesis of amino acids,5.6e-05,True,Biosynthesis of amino acids,73,1014,20,13394,0.019724,0.273973,query_1,[KEGG:00000],"[RPIA, GPT2, PGK1, CTH, PSPH, TALDO1, PYCR1, R...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
154,KEGG,KEGG:04115,p53 signaling pathway,0.00025,True,p53 signaling pathway,63,942,18,13394,0.019108,0.285714,query_1,[KEGG:00000],"[CDK1, CCNB1, CHEK1, SERPINE1, GTSE1, SERPINB5...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
164,KEGG,KEGG:00270,Cysteine and methionine metabolism,0.000906,True,Cysteine and methionine metabolism,47,1133,15,13394,0.013239,0.319149,query_1,[KEGG:00000],"[DNMT3A, AHCYL1, CTH, DNMT1, AHCY, PSAT1, GCLC...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
197,KEGG,KEGG:00250,"Alanine, aspartate and glutamate metabolism",0.005803,True,"Alanine, aspartate and glutamate metabolism",34,988,11,13394,0.011134,0.323529,query_1,[KEGG:00000],"[GPT2, ADSL, PPAT, GFPT1, GFPT2, GLUD1, GLS, A...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
202,KEGG,KEGG:01200,Carbon metabolism,0.008034,True,Carbon metabolism,114,1014,22,13394,0.021696,0.192982,query_1,[KEGG:00000],"[RPIA, GPT2, PGK1, PSPH, TALDO1, RPE, GCSH, GL...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
205,KEGG,KEGG:00520,Amino sugar and nucleotide sugar metabolism,0.008471,True,Amino sugar and nucleotide sugar metabolism,44,918,12,13394,0.013072,0.272727,query_1,[KEGG:00000],"[UGDH, UAP1, GFPT1, UXS1, NANP, GFPT2, GNPNAT1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
216,KEGG,KEGG:04110,Cell cycle,0.011806,True,Cell cycle,113,848,20,13394,0.023585,0.176991,query_1,[KEGG:00000],"[MCM6, CCNA2, PLK1, CDK1, CCNB1, CHEK1, MAD2L1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
219,KEGG,KEGG:00051,Fructose and mannose metabolism,0.01331,True,Fructose and mannose metabolism,32,1014,10,13394,0.009862,0.3125,query_1,[KEGG:00000],"[PMM2, TKFC, AKR1B10, GMDS, SORD, PFKP, PFKL, ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
235,KEGG,KEGG:05230,Central carbon metabolism in cancer,0.019568,True,Central carbon metabolism in cancer,66,884,14,13394,0.015837,0.212121,query_1,[KEGG:00000],"[SLC7A5, SLC1A5, MAP2K1, MET, GLS, PGAM1, SLC1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."


Make KEGG Mapper scripts https://www.kegg.jp/kegg/tool/map_pathway2.html Search mode: organism-specific: hsa outside ID: UniProt

In [16]:
for index, row in pval_results_df.iterrows():
    pathway = row.description
    print('\n' + str(pathway))
    old_genes = (row.intersections)
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    for gene, ID in zip(old_genes, new_genes):
        s = ID
        delta_corr = list(sig_genes[sig_genes.Gene == gene].avg_interaction_coeff)[0]
        if delta_corr > 0:
            s += ' green'
        else:
            s+= ' red'
        print(s)
        
    


Amino sugar and nucleotide sugar metabolism
O60701 green
O94808 green
Q06210 green
Q9BXD5 red
O60547 green
Q8TBE9 green
Q16222 green
O14772 green
Q96EK6 green
Q9UHQ9 green
Q16851 green
P36871 red
O15305 green
Q14376 green
P52790 red
Q8NBZ7 green
P51606 red

Biosynthesis of amino acids
P04075 green
P32322 green
P17174 green
P54687 green
P78330 green
Q8TD30 green
P20132 green
P34897 green
P32929 green
P18669 green
Q96AT9 green
P54886 green
Q9Y617 green
Q01813 green
P00505 green
P00558 green
P04424 green
P37837 green
P14618 green
P17858 green
P49247 green

Fructose and mannose metabolism
P04075 green
O60218 green
Q00796 green
P09467 red
O60825 green
O60547 green
O14772 green
Q01813 green
Q3LXA3 green
O15305 green
P15121 red
P17858 green
P52790 red

p53 signaling pathway
P31350 green
P05121 green
P17936 green
P06493 green
P55211 green
Q658P3 green
Q9NYZ3 green
P31947 green
O96017 green
P24941 green
O14757 green
Q53FA7 green
P36952 green
P99999 green
P14635 green
P60484 green
P07996 green


In [17]:
for index, row in interaction_results_df.iterrows():
    pathway = row.description
    print('\n' + str(pathway))
    old_genes = (row.intersections)
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    for gene, ID in zip(old_genes, new_genes):
        s = ID
        delta_corr = list(sig_genes[sig_genes.Gene == gene].avg_interaction_coeff)[0]
        if delta_corr > 0:
            s += ' green'
        else:
            s+= ' red'
        print(s)
        
    


p53 signaling pathway
P42771 red
Q8N726 green
P06493 green
P14635 green
O14757 green
P05121 green
Q9NYZ3 green
P36952 green
P31947 green
P31350 green
O96017 green
P24941 green
P17936 green
Q658P3 green
P55211 green
P07996 green
P60484 green
Q53FA7 green
Q13315 green

Biosynthesis of amino acids
P49247 green
Q8TD30 green
P00558 green
P32929 green
P78330 green
P37837 green
P32322 green
Q96AT9 green
Q9Y617 green
P34897 green
P54886 green
P18669 green
Q01813 green
P04424 green
P17858 green
P14618 green
P17174 green
P20132 green
P00505 green
P04075 green

Amino sugar and nucleotide sugar metabolism
O60701 green
Q16222 green
Q06210 green
Q8NBZ7 green
Q8TBE9 green
O94808 green
Q96EK6 green
O15305 green
O60547 green
Q16851 green
O14772 green
Q14376 green
P52790 red
Q9BXD5 red
Q9UHQ9 green
P51606 red
P36871 red

Fructose and mannose metabolism
O15305 green
Q3LXA3 green
O60218 green
O60547 green
Q00796 green
Q01813 green
P17858 green
O14772 green
O60825 green
P04075 green
P52790 red
P09467 red


In [18]:
for index, row in up_reg_results_df.iterrows():
    pathway = row.description
    print('\n' + str(pathway))
    old_genes = (row.intersections)
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    for gene, ID in zip(old_genes, new_genes):
        s = ID
        delta_corr = list(sig_genes[sig_genes.Gene == gene].avg_interaction_coeff)[0]
        if delta_corr > 0:
            s += ' green'
        else:
            s+= ' red'
        print(s)
        
    


Biosynthesis of amino acids
P49247 green
Q8TD30 green
P00558 green
P32929 green
P78330 green
P37837 green
P32322 green
Q96AT9 green
Q9Y617 green
P34897 green
P54886 green
P18669 green
Q01813 green
P04424 green
P17858 green
P14618 green
P17174 green
P20132 green
P00505 green
P04075 green

p53 signaling pathway
P06493 green
P14635 green
O14757 green
P05121 green
Q9NYZ3 green
P36952 green
P31947 green
P31350 green
O96017 green
P24941 green
P17936 green
Q658P3 green
P55211 green
P07996 green
P60484 green
Q53FA7 green
Q13315 green
P42574 green

Cysteine and methionine metabolism
Q9Y6K1 green
O43865 green
P32929 green
P26358 green
P23526 green
Q9Y617 green
P48506 green
P48507 green
P17174 green
Q96RQ9 green
Q16762 green
P20132 green
P25325 green
P00505 green
P54687 green

Alanine, aspartate and glutamate metabolism
Q8TD30 green
P30566 green
Q06203 green
Q06210 green
O94808 green
P00367 green
O94925 green
P04424 green
P17174 green
Q96RQ9 green
P00505 green

Carbon metabolism
P49247 green
Q8T

In [19]:
for index, row in down_reg_results_df.iterrows():
    pathway = row.description
    print('\n' + str(pathway))
    old_genes = (row.intersections)
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    for gene, ID in zip(old_genes, new_genes):
        s = ID
        delta_corr = list(sig_genes[sig_genes.Gene == gene].avg_interaction_coeff)[0]
        if delta_corr > 0:
            s += ' green'
        else:
            s+= ' red'
        print(s)
        
    


Arrhythmogenic right ventricular cardiomyopathy
P11532 red
P08514 red
P35609 red
P17661 red

Cholesterol metabolism
P16671 red
Q02318 red
P38571 red

Viral myocarditis
Q03135 red
P14222 red
P11532 red

Glycolysis / Gluconeogenesis
P43353 red
P52790 red
P09467 red
P30838 red
P36871 red

Malaria
P16671 red
P14210 red
P68871 red
P16284 red

Phagosome
P16671 red
Q15080 red
P19878 red
P22897 red
Q30154 red
P61421 red

Amino sugar and nucleotide sugar metabolism
P52790 red
Q9BXD5 red
P51606 red
P36871 red


In [None]:
# look at differences between protein expression levels of tRNA synthase in tumor vs normal
# does it matter which way to do pathway analysis? 
# change cut off to 15 good sample points for each