In [1]:
import pandas as pd
import cobra
from collections import OrderedDict
from copy import deepcopy

from custom_functions_scRBA import *

#### Load data

In [2]:
# Metabolic model (COBRApy json)
model = cobra.io.load_json_model('./input/GSM_iSace1144_rba.json')

# Protein
df_pro = pd.read_excel('./input/PROTEIN_stoich_curation.xlsx')
df_pro.index = df_pro.id.to_list()
df_aamap = pd.read_csv('./input/PROTEIN_amino_acid_map.txt', sep='\t')
df_aamap.index = df_aamap.aa_abbv.to_list()

# Dummy protein
df_aa_dummy = pd.read_excel('./input/PROTEIN_dummy_prot_calc.xlsx')
df_aa_dummy.index = df_aa_dummy.aa_abbv.to_list()
dummy_medianL = int(round(df_aa_dummy.loc['A', 'Unnamed: 5'], 0))
# Add 1e-5 g/mmol to prevent number round lost of protein MW
dummy_MW = round(df_aa_dummy.loc['C', 'Unnamed: 5'], 5) + 1e-5

# Enzyme
df_enz = pd.read_excel('./input/ENZYME_stoich_curation_2021-10-01.xlsx')

# RNA
df_rnas = pd.read_excel('./input/RNA_stoich.xlsx')
df_rnas.index = df_rnas.RNAid.to_list()

# Ribosome
df_ribo_nuc = pd.read_excel('./input/RIBOSOME_nucleus.xlsx')
df_ribo_nuc = df_ribo_nuc[df_ribo_nuc.paralog.isnull()]

df_ribo_mit = pd.read_excel('./input/RIBOSOME_mitochondria.xlsx')
df_ribo_mit = df_ribo_mit[df_ribo_mit.paralog.isnull()]

# Biomass
df_biom = pd.read_excel('./input/BIOMASS_RBA_v1_2022-05-09.xlsx', sheet_name='RBABioRxns')

#### Assemble reactions

In [3]:
df_eqn = pd.DataFrame(columns=['id', 'type', 'coupling_type', 'coupling_species', 'reaction'])
#c = df_eqn.shape[0] - 1
c = -1

### Metabolic network reaction
# Exchange reactions
for rxn in model.reactions:
    if rxn.id[:3] == 'EX_':
        met = [i for i in rxn.metabolites.keys()][0]
        
        c += 1
        new_id = 'RXN-' + rxn.id + '_FWD-SPONT'
        df_eqn.loc[c, 'id'] = new_id
        df_eqn.loc[c, 'type'] = 'metabolic'
        df_eqn.loc[c, 'reaction'] = 'MET-' + met.id + ' -->'
        
        c += 1
        new_id = 'RXN-' + rxn.id + '_REV-SPONT'
        df_eqn.loc[c, 'id'] = new_id
        df_eqn.loc[c, 'type'] = 'metabolic'
        df_eqn.loc[c, 'reaction'] = '-->' + 'MET-' + met.id
        

# Reactions that are not exchange reactions
for i in df_enz.index:
    rxn_id = df_enz.id[i]
    _,rxn_base_id,rxn_dir,enz_id = extract_details_from_rxnid(rxn_id)
    
    if rxn_base_id[:3] == 'EX_':
        continue
    
    c += 1
    rxn_base = model.reactions.get_by_id(rxn_base_id)
    
    met_dict = metabolites_dict_from_reaction_equation_RBA(rxn_base.reaction)
    met_dict = {k:v for k,v in met_dict.items() if k != ''}
    met_dict = {'MET-' + k:v for k,v in met_dict.items()}
    if rxn_dir == 'REV':
        met_dict = {k:-v for k,v in met_dict.items()}
    elif rxn_dir == 'FWD':
        None
    else:
        print("Unknown ID that indicate reaction direction, only accepting 'FWD' and 'REV'")
    
    if enz_id not in ['SPONT', 'UNKNOWN']:
        df_eqn.loc[c, 'coupling_type'] = 'rxn_enz'
        df_eqn.loc[c, 'coupling_species'] = enz_id
    
    df_eqn.loc[c, 'id'] = rxn_id
    df_eqn.loc[c, 'type'] = 'metabolic'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(met_dict, arrow='-->')
    
### Enzyme synthesis network reaction
enz_stoich = OrderedDict()
for i in df_enz.index:
    enz_stoich[df_enz.enz[i]] = df_enz.protein_stoich[i]

c = df_eqn.shape[0] - 1
for enz_id,prot_str in enz_stoich.items():
    if prot_str == 'zeroCost':
        continue
    
    c += 1
    prot_str = prot_str.split(',')
    coeffs = OrderedDict({'PRO-' + i.split(':')[0]:-int(i.split(':')[1]) for i in prot_str})
    coeffs['ENZ-' + enz_id] = 1
    
    df_eqn.loc[c, 'id'] = 'ENZSYN-' + enz_id
    df_eqn.loc[c, 'type'] = 'enzyme'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(coeffs, arrow='-->')
    
for i in df_enz.index:
    if df_enz.protein_stoich[i] == 'zeroCost':
        continue
        
    c += 1
    coeffs = OrderedDict()
    coeffs['ENZ-' + df_enz.enz[i]] = -1
    
    df_eqn.loc[c, 'id'] = 'ENZLOAD-' + df_enz.id[i][4:]
    df_eqn.loc[c, 'type'] = 'enzymeRxnLoad'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(coeffs, arrow='-->')
    
    
### Ribosome
rnas = ['rrna18s_c', 'rrna5s_c', 'rrna58s_c', 'rrna25s_c', 'rrna15s_m', 'rrna21s_m']
c = df_eqn.shape[0] - 1
for rna in rnas:
    c += 1
    rna_stoich = OrderedDict({i:0 for i in ['MET-'+rna, 'MET-atp_c', 'MET-ctp_c',
                                            'MET-gtp_c', 'MET-utp_c', 'MET-ppi_c']})
    rna_stoich['RIBO-'+rna] = 1
    rna_stoich['MET-atp_c'] = -int(df_rnas.A[rna])
    rna_stoich['MET-ctp_c'] = -int(df_rnas.C[rna])
    rna_stoich['MET-gtp_c'] = -int(df_rnas.G[rna])
    rna_stoich['MET-utp_c'] = -int(df_rnas.U[rna])
    rna_stoich['MET-ppi_c'] = int(df_rnas.loc[rna, ['A','C','G','U']].sum())
    rna_stoich['BIO-rrna'] = df_rnas.loc[rna, 'MW (g/mmol)']
    
    df_eqn.loc[c, 'id'] = 'RIBOSYN-' + rna
    df_eqn.loc[c, 'type'] = 'ribosome'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(rna_stoich, arrow='-->')
    
c += 1
ribo_stoich = OrderedDict()
rnas = ['rrna18s_c', 'rrna5s_c', 'rrna58s_c', 'rrna25s_c']
for i in df_ribo_nuc.index:
    if df_ribo_nuc.id[i] in rnas:
        ribo_stoich['RIBO-' + df_ribo_nuc.id[i]] = -1
    else:
        ribo_stoich['PRO-' + df_ribo_nuc.id[i]] = -1
df_eqn.loc[c, 'id'] = 'RIBOSYN-ribonuc'
df_eqn.loc[c, 'type'] = 'ribosome'
df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(ribo_stoich, arrow='-->')

c += 1
ribo_stoich = OrderedDict()
rnas = ['rrna15s_m', 'rrna21s_m']
for i in df_ribo_mit.index:
    if df_ribo_mit.id[i] in rnas:
        ribo_stoich['RIBO-' + df_ribo_mit.id[i]] = -1
    else:
        ribo_stoich['PRO-' + df_ribo_mit.id[i]] = -1
df_eqn.loc[c, 'id'] = 'RIBOSYN-ribomito'
df_eqn.loc[c, 'type'] = 'ribosome'
df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(ribo_stoich, arrow='-->')

### Protein
prot_mitoexp = ['Q0045', 'Q0080', 'Q0085', 'Q0105', 'Q0140', 'Q0250', 'Q0275']

c = df_eqn.shape[0] - 1
for i in df_pro.index:     
    c += 1
    prot_st = OrderedDict()
    for met in ['MET-atp_c', 'MET-h2o_c',
                'MET-adp_c', 'MET-pi_c', 'MET-h_c', 'MET-gtp_c',
                'MET-gdp_c']:
        prot_st[met] = 0

    seq = df_pro.sequence[i][:-1]
    for aa in df_aamap.index:
        prot_st[df_aamap.tRNA_in[aa]] = -seq.count(aa)
        prot_st[df_aamap.tRNA_out[aa]] = seq.count(aa)
        
    cofs_str = df_pro.cofactor_stoich[i]
    if pd.isnull(cofs_str) == False:
        cofs_st = cofs_str.split(',')
        cofs_st = OrderedDict({c.split(':')[0]:c.split(':')[1] for c in cofs_st})
        for k,v in cofs_st.items():
            prot_st['MET-' + k] = -int(v)
                
    prot_st['PRO-' + df_pro.id[i]] = 1
    
    # Set protein to be expressed by nucleus or mitochondrial ribosome
    if df_pro.id[i] in prot_mitoexp:
        df_eqn.loc[c, 'coupling_type'] = 'prot_ribo'
        df_eqn.loc[c, 'coupling_species'] = 'ribomito'
    else:
        df_eqn.loc[c, 'coupling_type'] = 'prot_ribo'
        df_eqn.loc[c, 'coupling_species'] = 'ribonuc'
        
    # Set protein to occupy cellular space in mitochondria or not
    if df_pro.subloc_assigned[i] in ['m', 'mm']:
        prot_st['BIO-protmito'] = df_pro.loc[i, 'MW (g/mmol)']
    else:
        prot_st['BIO-protcyt'] = df_pro.loc[i, 'MW (g/mmol)']
    
    # Cost: Initiation: 1 ATP + 2 GTP (initiate and bind Methionine)
    # Elongation: 2 GTP / cycle
    # (elongation process excludes Methionine since it is already bound in initiation process)
    for met in ['MET-atp_c', 'MET-h2o_c']:
        prot_st[met] -= 1
    for met in ['MET-adp_c', 'MET-pi_c', 'MET-h_c']:
        prot_st[met] += 1
                
    for met in ['MET-gtp_c', 'MET-h2o_c']:
        prot_st[met] -= 2*len(seq)
    for met in ['MET-gdp_c', 'MET-pi_c', 'MET-h_c']:
        prot_st[met] += 2*len(seq)
    
    df_eqn.loc[c, 'id'] = 'PROSYN-' + df_pro.id[i]
    df_eqn.loc[c, 'type'] = 'protein'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(prot_st, arrow='-->')
    
### Dummy protein
prot_st = OrderedDict()
for met in ['MET-atp_c', 'MET-h2o_c',
            'MET-adp_c', 'MET-pi_c', 'MET-h_c', 'MET-gtp_c',
            'MET-gdp_c']:
    prot_st[met] = 0

seq = df_pro.sequence[i][:-1]
for aa in df_aamap.index:
    prot_st[df_aamap.tRNA_in[aa]] = -round(df_aa_dummy.N_AA[aa], 4)
    prot_st[df_aamap.tRNA_out[aa]] = round(df_aa_dummy.N_AA[aa], 4)

for met in ['MET-atp_c', 'MET-h2o_c']:
    prot_st[met] -= 1
for met in ['MET-adp_c', 'MET-pi_c', 'MET-h_c']:
    prot_st[met] += 1

for met in ['MET-gtp_c', 'MET-h2o_c']:
    prot_st[met] -= 2*dummy_medianL
for met in ['MET-gdp_c', 'MET-pi_c', 'MET-h_c']:
    prot_st[met] += 2*dummy_medianL

c += 1
prot_st['BIO-protdummy'] = dummy_MW
df_eqn.loc[c, 'id'] = 'PROSYN-PROTDUMMY'
df_eqn.loc[c, 'coupling_type'] = 'prot_ribo'
df_eqn.loc[c, 'coupling_species'] = 'ribonuc'
df_eqn.loc[c, 'type'] = 'protein'
df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(prot_st,
                                                        arrow='-->', floatdecimal=6)
# Protein waste reactions
c = df_eqn.shape[0] - 1
for i in df_pro.index:     
    c += 1
    prot_st = OrderedDict()
    prot_st['PRO-' + df_pro.id[i]] = -1
    # add the protein's MW as the coefficient for BIO-protwasted
    prot_st['BIO-protwasted'] = df_pro.loc[i, 'MW (g/mmol)']
    
    df_eqn.loc[c, 'id'] = 'PROWASTE-' + df_pro.id[i]
    df_eqn.loc[c, 'type'] = 'proteinWaste'
    df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA(prot_st, arrow='-->')

# Pseudoreaction representing mass of waste protein (for kapps): PROWASTE-TOTALPROTEIN
c += 1
df_eqn.loc[c, 'id'] = 'PROWASTE-TOTALPROTEIN'
df_eqn.loc[c, 'type'] = 'proteinWaste'
df_eqn.loc[c, 'reaction'] = build_reaction_equation_from_metabolites_dict_RBA({'BIO-protwasted':-1}, arrow='-->')

### Biomass
for i in df_biom.index:
    c += 1
    df_eqn.loc[c, 'id'] = df_biom.rxn_id[i]
    df_eqn.loc[c, 'type'] = 'biomass'
    df_eqn.loc[c, 'reaction'] = df_biom.rxn_equation[i]

#### Save excel files

In [None]:
df_eqn.to_excel('./model/RBA_stoichiometry.xlsx', index=None)