In [13]:
import os
import pickle as pkl
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from functions import *
import cobra
from cobra.io import read_sbml_model,load_json_model
from cobra import Model, Reaction, Metabolite

In [14]:
model = load_json_model('../models/LP_WCFS1.json')
proB = pd.read_excel('../data/Proteomics/Proteomics_B.xlsx')
uniprot_info = pd.read_csv('../data/Proteomics/uniprotkb_taxonomy_id_220668.tsv',sep='\t')

# Map expression to reactions
* Central carbon metabolism
* Purine and pyrimidine

In [15]:
cc_rxns = ['GLCpts','PGMT','GALU','UDPG4E','MAN6PI','PMANM',
       'PGI','PFK','FBA','TPI','GAPD','PGK','PGM','ENO','PYK',
       'PDH','PTAr','ACKr','PFL','LDH_D','LDH_L','ACCOAC','MACPMT','kaasIII',
       'G6PDH2r','PGL','GND','RPE','RPI','TKT1','TKT2','TALA']

pyr_rxns = ['ASPCT','DHORTS','DHORD6','ORPT','OMPDC','PRPPS',
           'GLUPRT','PRAGS','GARFT','PRFGS','PRAIS','AIRC','PRASCS','ADSL2',
           'AICART','IMPC','IMPD','GMPS2']

In [37]:
def locus2id(locus, uniprot_info):
    temp = uniprot_info[uniprot_info['Gene Names (ordered locus)']==locus] 
    return list(temp['Entry'])[0]
    
    
def get_expr(gene, table, cond):
    if gene not in list(table['Accession']):
        return 0
    temp_pd = (table[table['Accession']==gene]).reset_index().drop(['index'],axis=1)
    expr = sum( [temp_pd[cond+'-B'+str(i)][0] for i in range(1,4) ] )/3
    return expr

def expr_or( gene_list, uniprot_info, expr_table, cond ):
    gids = [ locus2id(x.strip(), uniprot_info) for x in gene_list]
    expr = sum([get_expr(gid, expr_table, cond) for gid in gids])
    return expr

def expr_and( gene_list, uniprot_info, expr_table, cond ):
    gids = [ locus2id(x.strip(), uniprot_info) for x in gene_list]
    expr = min([get_expr(gid, expr_table, cond) for gid in gids])
    return expr

def get_rxn_expr( gpr, uniprot_info, expr_table, cond):
    if '(' not in gpr:
        if 'or' in gpr:
            expr = expr_or( gpr.split('or'), uniprot_info, expr_table, cond )
        else:
            expr = expr_and( gpr.split('and'), uniprot_info, expr_table, cond )
    elif ('(' in gpr) and ('and' in gpr) and ('or' in gpr): # for GLCpts/AIRC special case
        gpr_list = gpr.split('or')
        expr_1 = expr_and( gpr_list[0].replace('(','').replace(')','').split('and'), uniprot_info, expr_table, cond )
        expr_2 = expr_and( gpr_list[1].replace('(','').replace(')','').split('and'), uniprot_info, expr_table, cond )
        expr = expr_1 + expr_2
    elif ('(' in gpr) and ('and' in gpr):
        gpr = gpr.replace('(','').replace(')','')
        expr = expr_and( gpr.split('and'), uniprot_info, expr_table, cond )
        
    return expr  

In [22]:
data = []
for rxn_id in cc_rxns:
    temp = {'RXN':rxn_id}
    gpr = model.reactions.get_by_id( rxn_id ).gene_reaction_rule
    for cond in ['pH6.5','pH5.5','pH5','pH4.5']:
        expr = get_rxn_expr( gpr, uniprot_info, proB, cond)
        temp[cond] = expr
    data.append(temp)
    
cc_rxn_expr = pd.DataFrame(data)
# idx_keep = cc_rxn_expr.index[ cc_rxn_expr[cc_rxn_expr.columns[1:5]].sum(axis=1)>0 ]
# cc_rxn_expr = (cc_rxn_expr.iloc[idx_keep]).reset_index().drop(['index'],axis=1)

# for acid_cond in ['pH5.5','pH5','pH4.5']:
#     cc_rxn_expr[ 'Log2FC_'+ acid_cond + 'vsRef' ] = list( np.log2( cc_rxn_expr[acid_cond]/cc_rxn_expr['pH6.5'] ) )
# cc_rxn_expr.to_csv('../data/Proteomics/cc_RXN_lfc.csv',index=None)

In [41]:
gpr = model.reactions.get_by_id( 'GLCpts').gene_reaction_rule
gpr_list = gpr.split('or')
expr_1 = expr_and( gpr_list[0].replace('(','').replace(')','').split('and'), uniprot_info, proB, 'pH5' )
expr_2 = expr_and( gpr_list[1].replace('(','').replace(')','').split('and'), uniprot_info, proB, 'pH5' )

In [24]:
data = []
for rxn_id in pyr_rxns:
    temp = {'RXN':rxn_id}
    gpr = model.reactions.get_by_id( rxn_id ).gene_reaction_rule
    for cond in ['pH6.5','pH5.5','pH5','pH4.5']:
        expr = get_rxn_expr( gpr, uniprot_info, proAB, cond)
        temp[cond] = expr
    data.append(temp)
pyr_rxn_expr = pd.DataFrame(data)
for acid_cond in ['pH5.5','pH5','pH4.5']:
    pyr_rxn_expr[ 'Log2FC_'+ acid_cond + 'vsRef' ] = list( np.log2( pyr_rxn_expr[acid_cond]/pyr_rxn_expr['pH6.5'] ) )
# pyr_rxn_expr.to_csv('../data/Proteomics/pyr_RXN_lfc.csv' ,index=None)

In [28]:
pyr_rxn_expr.head()

Unnamed: 0,RXN,pH6.5,pH5.5,pH5,pH4.5,Log2FC_pH5.5vsRef,Log2FC_pH5vsRef,Log2FC_pH4.5vsRef
0,ASPCT,1.264764,1.577231,0.54419,0.514559,0.318526,-1.216686,-1.297461
1,DHORTS,1.050922,1.140222,0.427112,0.418967,0.117659,-1.298968,-1.326746
2,DHORD6,1.348214,1.393343,0.232157,0.20072,0.047501,-2.537874,-2.747795
3,ORPT,1.319486,1.388778,0.363979,0.366709,0.07384,-1.85805,-1.847269
4,OMPDC,1.63522,1.770384,0.815709,0.75471,0.114578,-1.003358,-1.115491


# RA relations

In [50]:
sectors = {'C':[],'T':[],'A':[],'EPS_syn':[]}
sectors['T'] = ['F9UST3', 'F9UR54','F9UN59','F9UN60']

In [11]:
for i in range(len(proB.index) ):
    if 'ribosomal subunit protein' in list(proB['Protein Name'])[i] \
            or 'ribosomal protein' in list(proB['Protein Name'])[i] :
        sectors['A'].append( list(proB['Accession'])[i].strip() )

In [51]:
column_names = ('query acc.ver, subject acc.ver, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score').split(', ')
data = []
ffile = open('../data/Genome_HMX2/eps_blast_uniprot.tsv', "rt")
lines = ffile.readlines()
ffile.close()
for line in lines:
    line = line.strip()
    if '#' not in line:
        data.append( line.split('\t'))
blast_result = pd.DataFrame(data ,columns=column_names)
sectors['EPS_syn'] = list( set(blast_result['subject acc.ver']) )