In [1]:
import cptac
import numpy as np
import pandas as pd
import statsmodels.stats.multitest as ssm

In [2]:
perm_df = pd.read_csv("data/full_10k_permutation_corrected.csv")
perm_df

Unnamed: 0,Cancer,A1BG,A1CF,A2M,A2ML1,A4GALT,AAAS,AACS,AADAC,AADAT,...,ZSWIM9,ZW10,ZWILCH,ZWINT,ZXDA,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,ccrcc,0.146442,0.004995,0.095512,,0.532265,0.851988,0.1691592,0.545156,0.156638,...,,0.3098461,0.1010117,0.3231642,,0.104305,0.7044145,0.2069261,0.5672603,0.378694
1,en,0.883318,,0.953553,0.056286,0.543085,0.027277,1.438315e-07,,0.517563,...,0.769929,0.8608802,8.894419e-05,1.330422e-07,,0.491506,0.2125946,0.6985013,0.428312,0.383779
2,luad,0.801031,,0.019206,,,0.191032,5.371135e-35,0.037629,0.029363,...,,2.594465e-09,7.619458999999999e-19,4.915995e-05,,0.641317,7.220021e-10,2.2193e-07,2.915594e-14,4.3e-05
3,hnscc,0.998985,0.862365,0.381438,0.003371964,0.103999,0.046917,0.890075,0.001904,,...,,0.003904316,0.2797887,0.00744844,,0.012482,0.004719306,0.9340559,0.2842001,0.854269
4,lscc,0.183336,,0.133999,2.266058e-21,0.202066,0.014338,0.1012628,0.5223,,...,0.002938,1.262976e-12,2.357531e-49,5.104002e-32,,0.020118,0.01345127,1.587683e-08,6.037113e-06,0.030463


In [3]:
delta_corr_df = pd.read_csv('data/corr_diff.csv')
delta_corr_df

Unnamed: 0,Cancer,A1BG,A1CF,A2M,A2ML1,A4GALT,AAAS,AACS,AADAC,AADAT,...,ZSWIM9,ZW10,ZWILCH,ZWINT,ZXDA,ZXDC,ZYG11B,ZYX,ZZEF1,ZZZ3
0,ccrcc,-0.388745,0.146993,-0.277035,,-0.795957,-0.044291,-0.128218,1.196756,-0.174925,...,,0.269728,0.329727,-0.337301,,0.33747,0.109414,-0.223934,-0.138352,-0.220727
1,endometrial,0.061654,,-0.025535,0.40221,0.399061,0.663703,0.593777,,0.302879,...,0.177842,0.088061,0.759429,1.006282,,-0.267078,0.400272,0.147305,0.367791,0.342423
2,luad,-0.039149,,0.097233,,,0.206094,0.683092,-0.119746,0.630155,...,,0.557832,0.904669,0.645076,,0.087794,0.707991,0.472139,0.418589,0.661323
3,hnscc,-0.002651,0.407295,-0.194186,0.117838,0.555675,0.462864,0.024965,-0.333934,-0.294627,...,1.039965,0.395189,0.119698,1.079145,,0.591841,-0.453269,-0.010672,0.255505,-0.060708
4,lscc,0.196946,,-0.06263,0.649123,0.246831,0.304418,0.150665,-0.068258,,...,0.613891,0.491043,0.64643,0.838981,,0.33235,0.231167,0.315679,0.268239,0.446553


In [4]:
perm_df = perm_df.melt(id_vars='Cancer', var_name = 'Gene', value_name = 'pval')
delta_corr_df = delta_corr_df.melt(id_vars='Cancer', var_name='Gene', value_name='delta_corr')

In [5]:
df = pd.merge(perm_df, delta_corr_df)
df = df.dropna()
df

Unnamed: 0,Cancer,Gene,pval,delta_corr
0,ccrcc,A1BG,0.146442,-0.388745
1,luad,A1BG,0.801031,-0.039149
2,hnscc,A1BG,0.998985,-0.002651
3,lscc,A1BG,0.183336,0.196946
4,ccrcc,A1CF,0.004995,0.146993
...,...,...,...,...
54671,lscc,ZZEF1,0.000006,0.268239
54672,ccrcc,ZZZ3,0.378694,-0.220727
54673,luad,ZZZ3,0.000043,0.661323
54674,hnscc,ZZZ3,0.854269,-0.060708


In [6]:
background_genes = list(pd.unique(df.Gene))

In [7]:
sig_perm_df = df[df.pval <= 0.05]

In [8]:
sig_genes = pd.DataFrame(sig_perm_df.groupby('Gene').size())
sig_genes = sig_genes[sig_genes[0] > 2]
sig_genes = list(sig_genes.index)
avg_pvals = []
avg_corrs = []
abs_avg_corrs = []
for gene in sig_genes:
    gene_df = sig_perm_df[sig_perm_df.Gene==gene]
    avg_pval = np.average(gene_df.pval)
    avg_corr = np.average(gene_df.delta_corr)
    avg_corrs.append(avg_corr)
    avg_pvals.append(avg_pval)
    abs_avg_corrs.append(abs(avg_corr))
sig_genes = pd.DataFrame({'Gene': sig_genes, 'avg_pval': avg_pvals, 'avg_delta_corr': avg_corrs, 'abs_delta_corr': abs_avg_corrs})
sig_genes

Unnamed: 0,Gene,avg_pval,avg_delta_corr,abs_delta_corr
0,AAGAB,1.191913e-05,0.513239,0.513239
1,AAK1,2.187208e-09,0.539893,0.539893
2,AAMP,7.805338e-03,0.422332,0.422332
3,AARS,1.022777e-02,0.443220,0.443220
4,AARS2,1.766085e-05,0.563538,0.563538
...,...,...,...,...
3044,ZPR1,4.507710e-03,0.552338,0.552338
3045,ZSCAN18,9.995611e-03,0.461501,0.461501
3046,ZW10,1.301439e-03,0.481355,0.481355
3047,ZWINT,2.499200e-03,0.854401,0.854401


In [9]:
sig_genes = sig_genes.sort_values('avg_pval')
pval_ranked_gene_list = list(sig_genes.Gene)
sig_genes = sig_genes.sort_values('abs_delta_corr', ascending = False)
delta_corr_ranked_gene_list = list(sig_genes.Gene)
down_reg = sig_genes[sig_genes.avg_delta_corr < 0]
down_reg = down_reg.sort_values('avg_delta_corr')
down_reg_ranked_list = list(down_reg.Gene)

In [10]:
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)

In [11]:
pval_results_df = gp.profile(organism='hsapiens', query = pval_ranked_gene_list, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)

In [12]:
delta_corr_results_df = gp.profile(organism='hsapiens', query = delta_corr_ranked_gene_list, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)

In [13]:
down_reg_results_df = gp.profile(organism='hsapiens', query = down_reg_ranked_list, no_iea=True,
                       ordered=True, no_evidences=False, background= background_genes)

In [14]:
pval_filtered_results = pval_results_df[pval_results_df.term_size <= 500]
pval_filtered_results = pval_filtered_results[pval_filtered_results.term_size >=5]
delta_corr_filtered_results = delta_corr_results_df[delta_corr_results_df.term_size <= 500]
delta_corr_filtered_results = delta_corr_filtered_results[delta_corr_filtered_results.term_size >=5]
down_reg_filtered_results = down_reg_results_df[down_reg_results_df.term_size <= 500]
down_reg_filtered_results = down_reg_filtered_results[down_reg_filtered_results.term_size >=5]

In [15]:
pval_kegg_results = pval_filtered_results[pval_filtered_results.source == 'KEGG']
pval_kegg_results

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
219,KEGG,KEGG:00280,"Valine, leucine and isoleucine degradation",0.000101,True,"Valine, leucine and isoleucine degradation",48,2397,24,12939,0.010013,0.5,query_1,[KEGG:00000],"[ALDH6A1, OXCT1, ACAT1, IL4I1, ALDH7A1, MUT, D...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
232,KEGG,KEGG:01200,Carbon metabolism,0.000552,True,Carbon metabolism,112,2596,45,12939,0.017334,0.401786,query_1,[KEGG:00000],"[ALDH6A1, ALDOC, ACAT1, PKM, RPIA, PFKM, RGN, ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
240,KEGG,KEGG:00240,Pyrimidine metabolism,0.001212,True,Pyrimidine metabolism,51,2738,27,12939,0.009861,0.529412,query_1,[KEGG:00000],"[CAD, ENTPD1, TK1, NME1, UPP1, TYMS, CMPK2, CA...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
255,KEGG,KEGG:01230,Biosynthesis of amino acids,0.003258,True,Biosynthesis of amino acids,72,2757,32,12939,0.011607,0.444444,query_1,[KEGG:00000],"[ALDOC, PKM, RPIA, PFKM, ASS1, PGAM1, PFKP, EN...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
260,KEGG,KEGG:01240,Biosynthesis of cofactors,0.004217,True,Biosynthesis of cofactors,136,2808,56,12939,0.019943,0.411765,query_1,[KEGG:00000],"[FLAD1, TPK1, GCLC, AK4, CAD, ALPL, PKM, NADK2...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
261,KEGG,KEGG:00270,Cysteine and methionine metabolism,0.004425,True,Cysteine and methionine metabolism,46,1564,17,12939,0.01087,0.369565,query_1,[KEGG:00000],"[GCLC, SMS, MTAP, IL4I1, TST, MPST, DNMT3A, LD...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
265,KEGG,KEGG:00920,Sulfur metabolism,0.005339,True,Sulfur metabolism,10,1618,7,12939,0.004326,0.7,query_1,[KEGG:00000],"[TST, MPST, PAPSS2, SUOX, SELENBP1, ETHE1, IMP...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
307,KEGG,KEGG:00010,Glycolysis / Gluconeogenesis,0.01844,True,Glycolysis / Gluconeogenesis,62,565,11,12939,0.019469,0.177419,query_1,[KEGG:00000],"[ALDOC, PKM, PFKM, ALDH7A1, PGAM1, PFKP, LDHB,...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
316,KEGG,KEGG:03030,DNA replication,0.022233,True,DNA replication,34,605,8,12939,0.013223,0.235294,query_1,[KEGG:00000],"[RFC5, PRIM1, MCM5, MCM6, POLA2, LIG1, POLD2, ...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
317,KEGG,KEGG:00640,Propanoate metabolism,0.022739,True,Propanoate metabolism,34,2725,17,12939,0.006239,0.5,query_1,[KEGG:00000],"[ALDH6A1, ACAT1, LDHB, LDHA, MUT, DBT, ABAT, A...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."


In [16]:
delta_corr_kegg_results = delta_corr_filtered_results[delta_corr_filtered_results.source == 'KEGG']
delta_corr_kegg_results

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
236,KEGG,KEGG:03030,DNA replication,5e-06,True,DNA replication,34,1072,15,12939,0.013993,0.441176,query_1,[KEGG:00000],"[RFC5, PRIM1, MCM7, MCM5, MCM2, MCM4, RFC1, PO...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
239,KEGG,KEGG:03430,Mismatch repair,7e-06,True,Mismatch repair,22,1072,12,12939,0.011194,0.545455,query_1,[KEGG:00000],"[PMS2, RFC5, RFC1, MSH6, LIG1, RFC4, MSH3, RFC...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
258,KEGG,KEGG:04110,Cell cycle,2e-05,True,Cell cycle,111,433,18,12939,0.04157,0.162162,query_1,[KEGG:00000],"[CHEK1, CCNA2, PLK1, TP53, CHEK2, YWHAG, CCNB1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
303,KEGG,KEGG:00280,"Valine, leucine and isoleucine degradation",0.00034,True,"Valine, leucine and isoleucine degradation",48,2964,26,12939,0.008772,0.541667,query_1,[KEGG:00000],"[OXCT1, IL4I1, ALDH7A1, ALDH3A2, ACAD8, HMGCS1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
344,KEGG,KEGG:00240,Pyrimidine metabolism,0.00196,True,Pyrimidine metabolism,51,2632,26,12939,0.009878,0.509804,query_1,[KEGG:00000],"[TYMS, TK1, CAD, DTYMK, DCTPP1, UMPS, UCK2, CT...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
361,KEGG,KEGG:01230,Biosynthesis of amino acids,0.003144,True,Biosynthesis of amino acids,72,2890,33,12939,0.011419,0.458333,query_1,[KEGG:00000],"[PGK1, RPIA, CPS1, PGAM1, PSPH, PYCR2, ARG2, P...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
406,KEGG,KEGG:00970,Aminoacyl-tRNA biosynthesis,0.007731,True,Aminoacyl-tRNA biosynthesis,28,805,11,12939,0.013665,0.392857,query_1,[KEGG:00000],"[DARS2, TARS2, SEPSECS, HARS2, YARS2, WARS2, N...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
425,KEGG,KEGG:01240,Biosynthesis of cofactors,0.011269,True,Biosynthesis of cofactors,136,2975,57,12939,0.01916,0.419118,query_1,[KEGG:00000],"[FLAD1, CAD, NADK2, GCLC, HMBS, UMPS, UROD, UG...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
463,KEGG,KEGG:01200,Carbon metabolism,0.019601,True,Carbon metabolism,112,2975,45,12939,0.015126,0.401786,query_1,[KEGG:00000],"[PGK1, RPIA, CPS1, PGAM1, PSPH, GPI, TKFC, PFK...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
494,KEGG,KEGG:03013,RNA transport,0.031887,True,RNA transport,152,735,23,12939,0.031293,0.151316,query_1,[KEGG:00000],"[STRAP, GEMIN2, EIF5B, XPO5, PAIP1, CLNS1A, EI...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."


In [17]:
down_reg_kegg_results = down_reg_filtered_results[down_reg_filtered_results.source == 'KEGG']
down_reg_kegg_results

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
4,KEGG,KEGG:00910,Nitrogen metabolism,0.009448,True,Nitrogen metabolism,14,13,2,12939,0.153846,0.142857,query_1,[KEGG:00000],"[CA4, CA3]","[[KEGG], [KEGG]]"
10,KEGG,KEGG:00190,Oxidative phosphorylation,0.029291,True,Oxidative phosphorylation,116,34,4,12939,0.117647,0.034483,query_1,[KEGG:00000],"[NDUFS4, NDUFS1, ATP6V1H, ATP6V1G1]","[[KEGG], [KEGG], [KEGG], [KEGG]]"
13,KEGG,KEGG:00130,Ubiquinone and other terpenoid-quinone biosynt...,0.034157,True,Ubiquinone and other terpenoid-quinone biosynt...,9,38,2,12939,0.052632,0.222222,query_1,[KEGG:00000],"[COQ5, COQ6]","[[KEGG], [KEGG]]"


Make KEGG Mapper scripts
https://www.kegg.jp/kegg/tool/map_pathway2.html
Search mode: 
    organism-specific: hsa
    outside ID: UniProt

In [18]:
for index, row in pval_kegg_results.iterrows():
    pathway = row.description
    print('\n' + str(pathway))
    old_genes = (row.intersections)
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    for gene, ID in zip(old_genes, new_genes):
        s = ID
        delta_corr = list(sig_genes[sig_genes.Gene == gene].avg_delta_corr)[0]
        if delta_corr > 0:
            s += ' green'
        else:
            s+= ' red'
        print(s)
        
    


Valine, leucine and isoleucine degradation
Q02252 green
P55809 green
P24752 green
Q96RQ9 green
P49419 green
P22033 green
P11182 green
P26440 green
P80404 green
P16219 green
Q9UKU7 green
Q06278 green
P45954 green
Q6NVY1 green
P35914 green
P42765 green
P11310 green
Q96RQ3 green
P30084 green
P31937 green
P51648 green
Q01581 green
P05165 green
Q99714 green

Carbon metabolism
Q02252 green
P09972 green
P24752 green
P14618 green
P49247 green
P08237 green
Q15493 green
P18669 green
Q01813 green
P10768 green
P13929 green
Q8TD30 green
P60174 green
Q5T6J7 green
P78330 green
P22033 green
P36957 green
P11498 green
P17174 green
P11413 green
P16219 green
P00558 green
Q9P2R7 green
P34896 green
P48735 green
Q6NVY1 green
Q9Y617 green
P04075 green
P07954 green
P17858 green
Q3LXA3 green
P06744 green
P08559 green
P30084 green
O43175 green
P00367 green
P23378 green
P09467 red
P53597 red
P31327 green
P05165 green
P19367 green
P20132 green
O75390 green
P06733 green

Pyrimidine metabolism
P27708 green
P49961 g

In [21]:
cancer_list = [ccrcc,en,luad,hnscc,lscc]
cancer_names = ['ccrcc', 'en', 'luad', 'hnscc', 'lscc']

In [22]:
delta_corr_kegg_results

Unnamed: 0,source,native,name,p_value,significant,description,term_size,query_size,intersection_size,effective_domain_size,precision,recall,query,parents,intersections,evidences
236,KEGG,KEGG:03030,DNA replication,5e-06,True,DNA replication,34,1072,15,12939,0.013993,0.441176,query_1,[KEGG:00000],"[RFC5, PRIM1, MCM7, MCM5, MCM2, MCM4, RFC1, PO...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
239,KEGG,KEGG:03430,Mismatch repair,7e-06,True,Mismatch repair,22,1072,12,12939,0.011194,0.545455,query_1,[KEGG:00000],"[PMS2, RFC5, RFC1, MSH6, LIG1, RFC4, MSH3, RFC...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
258,KEGG,KEGG:04110,Cell cycle,2e-05,True,Cell cycle,111,433,18,12939,0.04157,0.162162,query_1,[KEGG:00000],"[CHEK1, CCNA2, PLK1, TP53, CHEK2, YWHAG, CCNB1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
303,KEGG,KEGG:00280,"Valine, leucine and isoleucine degradation",0.00034,True,"Valine, leucine and isoleucine degradation",48,2964,26,12939,0.008772,0.541667,query_1,[KEGG:00000],"[OXCT1, IL4I1, ALDH7A1, ALDH3A2, ACAD8, HMGCS1...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
344,KEGG,KEGG:00240,Pyrimidine metabolism,0.00196,True,Pyrimidine metabolism,51,2632,26,12939,0.009878,0.509804,query_1,[KEGG:00000],"[TYMS, TK1, CAD, DTYMK, DCTPP1, UMPS, UCK2, CT...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
361,KEGG,KEGG:01230,Biosynthesis of amino acids,0.003144,True,Biosynthesis of amino acids,72,2890,33,12939,0.011419,0.458333,query_1,[KEGG:00000],"[PGK1, RPIA, CPS1, PGAM1, PSPH, PYCR2, ARG2, P...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
406,KEGG,KEGG:00970,Aminoacyl-tRNA biosynthesis,0.007731,True,Aminoacyl-tRNA biosynthesis,28,805,11,12939,0.013665,0.392857,query_1,[KEGG:00000],"[DARS2, TARS2, SEPSECS, HARS2, YARS2, WARS2, N...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
425,KEGG,KEGG:01240,Biosynthesis of cofactors,0.011269,True,Biosynthesis of cofactors,136,2975,57,12939,0.01916,0.419118,query_1,[KEGG:00000],"[FLAD1, CAD, NADK2, GCLC, HMBS, UMPS, UROD, UG...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
463,KEGG,KEGG:01200,Carbon metabolism,0.019601,True,Carbon metabolism,112,2975,45,12939,0.015126,0.401786,query_1,[KEGG:00000],"[PGK1, RPIA, CPS1, PGAM1, PSPH, GPI, TKFC, PFK...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."
494,KEGG,KEGG:03013,RNA transport,0.031887,True,RNA transport,152,735,23,12939,0.031293,0.151316,query_1,[KEGG:00000],"[STRAP, GEMIN2, EIF5B, XPO5, PAIP1, CLNS1A, EI...","[[KEGG], [KEGG], [KEGG], [KEGG], [KEGG], [KEGG..."


In [23]:

gene_ids = []
txt = ''
for index, row in delta_corr_kegg_results.iterrows():
    pathway = row.description
    
    old_genes = (row.intersections)
    print(old_genes)
    slopes_df = rc.calculate_regression(cancer_list, old_genes, cancer_names)
    print('\n' + str(pathway))
    print(slopes_df)
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    for gene, ID in zip(old_genes, new_genes):
        s = ID
        gene_df = slopes_df[slopes_df.gene == gene]
        avg_slope = np.mean(gene_df.interaction_coeff)
        if avg_slope > 0:
            s += ' green'
        else:
            s+= ' red'
        txt += s + '\n'
        
    

['RFC5', 'PRIM1', 'MCM7', 'MCM5', 'MCM2', 'MCM4', 'RFC1', 'POLA2', 'LIG1', 'RFC4', 'MCM6', 'RFC3', 'SSBP1', 'POLD2', 'POLD3']






ccrcc
RFC5
PRIM1
MCM7
MCM5
MCM2
MCM4
RFC1
POLA2
LIG1
RFC4
MCM6
RFC3
SSBP1
POLD2
POLD3
en
RFC5
PRIM1
MCM7
MCM5
MCM2
MCM4
RFC1
POLA2
LIG1
RFC4
MCM6
RFC3
SSBP1
POLD2
POLD3
luad
RFC5
PRIM1
MCM7
MCM5
MCM2
MCM4
RFC1
POLA2
LIG1
RFC4
MCM6
RFC3
SSBP1
POLD2
POLD3
hnscc
RFC5
PRIM1
MCM7
MCM5
MCM2
MCM4
RFC1
POLA2
LIG1
RFC4
MCM6
RFC3
SSBP1
POLD2
POLD3
lscc
RFC5
PRIM1
MCM7
MCM5
MCM2
MCM4
RFC1
POLA2
LIG1
RFC4
MCM6
RFC3
SSBP1
POLD2
POLD3

DNA replication
     gene cancer  interaction_coeff  condition_coeff  transcript_coeff  \
0    RFC5  ccrcc          -0.043390         0.446613          0.061585   
1   PRIM1  ccrcc           0.036482         0.411458         -0.001128   
2    MCM7  ccrcc          -0.015694         1.019508          0.045208   
3    MCM5  ccrcc          -0.045800         0.894918          0.090340   
4    MCM2  ccrcc          -0.039351         1.055137          0.088322   
..    ...    ...                ...              ...               ...   
10   MCM6   lscc           2.012947     





ccrcc
PMS2
RFC5
RFC1
MSH6
LIG1
RFC4
MSH3
RFC3
MSH2
SSBP1
POLD2
POLD3
en
PMS2
RFC5
RFC1
MSH6
LIG1
RFC4
MSH3
RFC3
MSH2
SSBP1
POLD2
POLD3
luad
PMS2
RFC5
RFC1
MSH6
LIG1
RFC4
MSH3
RFC3
MSH2
SSBP1
POLD2
POLD3
hnscc
PMS2
RFC5
RFC1
MSH6
LIG1
RFC4
MSH3
RFC3
MSH2
SSBP1
POLD2
POLD3
lscc
PMS2
RFC5
RFC1
MSH6
LIG1
RFC4
MSH3
RFC3
MSH2
SSBP1
POLD2
POLD3

Mismatch repair
     gene cancer  interaction_coeff  condition_coeff  transcript_coeff  \
0    PMS2  ccrcc           0.000794        -0.121777          0.024171   
1    RFC5  ccrcc          -0.043390         0.446613          0.061585   
2    RFC1  ccrcc           0.001244        -0.048388          0.013927   
3    MSH6  ccrcc          -0.061074         0.777337          0.106847   
4    LIG1  ccrcc          -0.129574         0.831057          0.181179   
5    RFC4  ccrcc          -0.068957         0.455625          0.087354   
6    MSH3  ccrcc           0.032595        -0.400533         -0.001073   
7    RFC3  ccrcc          -0.083975         0.32290

['CHEK1', 'CCNA2', 'PLK1', 'TP53', 'CHEK2', 'YWHAG', 'CCNB1', 'BUB1B', 'TFDP1', 'MCM7', 'MAD2L1', 'MCM5', 'MCM2', 'CDKN2A', 'MCM4', 'YWHAE', 'CDK1', 'EP300']






ValueError: Length mismatch: Expected axis has 3 elements, new values have 2 elements

In [None]:
gene_ids = []
for index, row in down_reg_kegg_results.iterrows():
    pathway = row.description
    print('\n' + str(pathway))
    old_genes = (row.intersections)
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    for gene, ID in zip(old_genes, new_genes):
        s = ID
        delta_corr = list(sig_genes[sig_genes.Gene == gene].avg_delta_corr)[0]
        if delta_corr > 0:
            s += ' green'
        else:
            s+= ' red'
        print(s)
        
    

In [None]:
pval_results_df = gp.profile(organism='hsapiens', query = pval_ranked_gene_list, no_iea=True,
                       ordered=False, no_evidences=False, background= background_genes)
pval_filtered_results = pval_results_df[pval_results_df.term_size <= 500]
pval_filtered_results = pval_filtered_results[pval_filtered_results.term_size >=5]
pval_kegg_results = pval_filtered_results[pval_filtered_results.source == 'KEGG']
for index, row in pval_kegg_results.iterrows():
    pathway = row.description
    print('\n' + str(pathway))
    old_genes = (row.intersections)
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    for gene, ID in zip(old_genes, new_genes):
        s = ID
        delta_corr = list(sig_genes[sig_genes.Gene == gene].avg_delta_corr)[0]
        if delta_corr > 0:
            s += ' green'
        else:
            s+= ' red'
        print(s)

In [None]:
pval_results_df = gp.profile(organism='hsapiens', query = pval_ranked_gene_list, no_iea=True,
                       ordered=False, no_evidences=False)
pval_filtered_results = pval_results_df[pval_results_df.term_size <= 500]
pval_filtered_results = pval_filtered_results[pval_filtered_results.term_size >=5]
pval_kegg_results = pval_filtered_results[pval_filtered_results.source == 'KEGG']
for index, row in pval_kegg_results.iterrows():
    pathway = row.description
    print('\n' + str(pathway))
    old_genes = (row.intersections)
    new_genes = list(gp.convert(query = old_genes, organism = 'hsapiens', target_namespace= 'uniprotswissprot_acc').converted)
    for gene, ID in zip(old_genes, new_genes):
        s = ID
        delta_corr = list(sig_genes[sig_genes.Gene == gene].avg_delta_corr)[0]
        if delta_corr > 0:
            s += ' green'
        else:
            s+= ' red'
        print(s)