In [1]:
import os
import pickle as pkl
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from functions import *
import cobra
from cobra.io import read_sbml_model,load_json_model
from cobra import Model, Reaction, Metabolite

In [2]:
model = load_json_model('../models/LP_WCFS1.json')
proAB = pd.read_csv('../data/Proteomics/Proteomics_AB.csv')
uniprot_info = pd.read_csv('../data/Proteomics/uniprotkb_taxonomy_id_220668.tsv',sep='\t')

# Map expression to reactions
* Central carbon metabolism
* Purine and pyrimidine

In [3]:
rxns = ['GLCpts','PGMT','GALU','UDPG4E','MAN6PI','PMANM',
       'PGI','PFK','FBA','TPI','GAPD','PGK','PGM','ENO','PYK',
       'PDH','PTAr','ACKr','PFL','LDH_D','LDH_L','ACCOAC','MACPMT','kaasIII',
       'G6PDH2r','PGL','GND','RPE','RPI','TKT1','TKT2','TALA']

In [15]:
def locus2id(locus, uniprot_info):
    temp = uniprot_info[uniprot_info['Gene Names (ordered locus)']==locus] 
    return list(temp['Entry'])[0]
    
    
def get_expr(gene, table, cond):
    if gene not in list(table['ID']):
        return 0
    temp_pd = (table[table['ID']==gene]).reset_index().drop(['index'],axis=1)
    expr = sum( [temp_pd[cond+'-A'+str(i)][0] for i in range(1,4) ] )/3
    return expr

def expr_or( gene_list, uniprot_info, expr_table, cond ):
    gids = [ locus2id(x.strip(), uniprot_info) for x in gene_list]
    expr = sum([get_expr(gid, expr_table, cond) for gid in gids])
    return expr

def expr_and( gene_list, uniprot_info, expr_table, cond ):
    gids = [ locus2id(x.strip(), uniprot_info) for x in gene_list]
    expr = min([get_expr(gid, expr_table, cond) for gid in gids])
    return expr

def get_rxn_expr( gpr, uniprot_info, expr_table, cond):
    if '(' not in gpr:
        if 'or' in gpr:
            expr = expr_or( gpr.split('or'), uniprot_info, expr_table, cond )
        else:
            expr = expr_and( gpr.split('and'), uniprot_info, expr_table, cond )
    elif ('(' in gpr) and ('and' in gpr) and ('or' in gpr): # for GLCpts special case
        gpr_list = gpr.split('or')
        expr_1 = expr_and( gpr_list[0].replace('(','').replace(')','').split('and'), uniprot_info, expr_table, cond )
        expr_2 = expr_and( gpr_list[1].replace('(','').replace(')','').split('and'), uniprot_info, expr_table, cond )
        expr = expr_1 + expr_2
    elif ('(' in gpr) and ('and' in gpr):
        gpr = gpr.replace('(','').replace(')','')
        expr = expr_and( gpr.split('and'), uniprot_info, expr_table, cond )
        
    return expr  

In [16]:
data = []
for rxn_id in rxns:
    temp = {'RXN':rxn_id}
    gpr = model.reactions.get_by_id( rxn_id ).gene_reaction_rule
    for cond in ['pH6.5','pH5.5','pH5','pH4.5']:
        expr = get_rxn_expr( gpr, uniprot_info, proAB, cond)
        temp[cond] = expr
    data.append(temp)

In [26]:
rxn_expr = pd.DataFrame(data)
idx_keep = rxn_expr.index[ rxn_expr[rxn_expr.columns[1:5]].sum(axis=1)>0 ]
rxn_expr = (rxn_expr.iloc[idx_keep]).reset_index().drop(['index'],axis=1)

In [34]:
for acid_cond in ['pH5.5','pH5','pH4.5']:
    rxn_expr[ 'Log2FC_'+ acid_cond + 'vsRef' ] = list( np.log2( rxn_expr[acid_cond]/rxn_expr['pH6.5'] ) )

In [37]:
# rxn_expr.to_csv('../data/Proteomics/RXN_lfc.csv',index=None)