In [1]:
import cobra,rba,lxml
from lxml import etree
import pandas as pd
from collections import OrderedDict

### Opening
Provided information are separated into two categories: process and processingMap

In [2]:
proET = etree.Element('RBAProcesses')

elems = ['listOfProcesses', 'listOfProcessingMaps']
for e in elems:
    proET.append(etree.Element(e))

### List of processes
#### Declare process entries

In [3]:
prolistET = proET.find('listOfProcesses')

att_dict = OrderedDict({'id':'PRO-TRNSL', 'name':'Translation process'})
prolistET.append(etree.Element('process', attrib=att_dict))
att_dict = OrderedDict({'id':'PRO-TRNSC', 'name':'Transcription process'})
prolistET.append(etree.Element('process', attrib=att_dict))
att_dict = OrderedDict({'id':'PRO-REPL', 'name':'Replication process'})
prolistET.append(etree.Element('process', attrib=att_dict))
att_dict = OrderedDict({'id':'PRO-RNADEG', 'name':'RNA degradation process'})
prolistET.append(etree.Element('process', attrib=att_dict))

#### Translation

Declare entries

In [4]:
ET = [i for i in prolistET if i.attrib['id'] == 'PRO-TRNSL'][0]
elems = ['machinery', 'processings']
for e in elems:
    ET.append(etree.Element(e))

Machinery composition

In [5]:
ETmach = ET.find('machinery')
ETmach.append(etree.Element('machineryComposition'))
ETmach.append(etree.Element('capacity', attrib={'value': 'ribosome_capacity'}))

In [6]:
ETmachcomp = ETmach.find('machineryComposition')
df_mach = pd.read_excel('./input/translation_elongation_machinery.xlsx')
df_mach = df_mach[df_mach.paralog.isnull()]

In [7]:
ETmachcomp.append(etree.Element('listOfReactants'))

In [8]:
ETmcLOR = ETmachcomp.find('listOfReactants')
for i in df_mach.index:
    att_dict = OrderedDict({'species':'MAC-'+df_mach.id[i], 'stoichiometry':'1'})
    ETmcLOR.append(etree.Element('speciesReference', attrib=att_dict))

Processing map

In [9]:
prg = ET.find('processings')
prg.append(etree.Element('listOfProductions'))

In [10]:
prgLOP = prg.find('listOfProductions')
att_dict = OrderedDict({'processingMap':'PRM-TRNSL', 'set':'protein'})
prgLOP.append(etree.Element('processing', attrib=att_dict))

In [11]:
pLPp = prgLOP.find('processing')
pLPp.append(etree.Element('listOfInputs'))

In [12]:
pLPpLOI = pLPp.find('listOfInputs')
df_prot = pd.read_excel('./curation/prot_stoich_curation.xlsx')
for i in df_prot.index:
    att_dict = OrderedDict({'id':'MAC-' + df_prot.id[i], 'stoichiometry':'1'})
    pLPpLOI.append(etree.Element('speciesReference', attrib=att_dict))

#### Transcription
Processings

In [13]:
ET = [i for i in prolistET if i.attrib['id'] == 'PRO-TRNSC'][0]
ET.append(etree.Element('processings'))

In [14]:
ETpg = ET.find('processings')
ETpg.append(etree.Element('listOfProductions'))

In [15]:
ETpgLOP = ETpg.find('listOfProductions')
att_dict = OrderedDict({'processingMap':'PRM-TRNSC', 'set':'rna'})
ETpgLOP.append(etree.Element('processing', attrib=att_dict))

In [16]:
EpLPp = ETpgLOP.find('processing')
EpLPp.append(etree.Element('listOfInputs'))

In [17]:
EpLPpLOI = EpLPp.find('listOfInputs')

df_rnas = pd.read_excel('./input/scRNA/RNA_stoich.xlsx', sheet_name='RNAs')
df_rnas.index = df_rnas.RNAid.to_list()
df_rnas = df_rnas.iloc[:25, :]

for i in df_rnas.index:
    att_dict = OrderedDict({'species':'MAC-' + df_rnas.RNAid[i], 'stoichiometry':'1'})
    EpLPpLOI.append(etree.Element('speciesReference', attrib=att_dict))

#### Replication
Processings

In [18]:
ET = [i for i in prolistET if i.attrib['id'] == 'PRO-REPL'][0]
ET.append(etree.Element('processings'))

In [19]:
ETpg = ET.find('processings')
ETpg.append(etree.Element('listOfProductions'))

In [20]:
ETpgLOP = ETpg.find('listOfProductions')
att_dict = OrderedDict({'processingMap':'PRM-REPL', 'set':'dna'})
ETpgLOP.append(etree.Element('processing', attrib=att_dict))

In [21]:
EpLPp = ETpgLOP.find('processing')
EpLPp.append(etree.Element('listOfInputs'))

In [22]:
EpLPpLOI = EpLPp.find('listOfInputs')
att_dict = OrderedDict({'species':'MAC-dna', 'stoichiometry':'1'})
EpLPpLOI.append(etree.Element('speciesReference', attrib=att_dict))

#### RNA degradation
Processings

In [23]:
ET = [i for i in prolistET if i.attrib['id'] == 'PRO-RNADEG'][0]
ET.append(etree.Element('processings'))

In [24]:
ETpg = ET.find('processings')
ETpg.append(etree.Element('listOfDegradations'))

In [25]:
ETpgLOD = ETpg.find('listOfDegradations')
att_dict = OrderedDict({'processingMap':'PRM-RNADEG', 'set':'rna'})
ETpgLOD.append(etree.Element('processing', attrib=att_dict))

In [26]:
EpLDp = ETpgLOD.find('processing')
EpLDp.append(etree.Element('listOfInputs'))

In [27]:
EpLDpLOI = EpLDp.find('listOfInputs')

df_rnas = pd.read_excel('./input/scRNA/RNA_stoich.xlsx', sheet_name='RNAs')
df_rnas.index = df_rnas.RNAid.to_list()
df_rnas = df_rnas.iloc[:25, :]

for i in df_rnas.index:
    att_dict = OrderedDict({'id':'MAC-' + df_rnas.RNAid[i], 'stoichiometry':'1'})
    EpLDpLOI.append(etree.Element('speciesReference', attrib=att_dict))

### List of processing maps
#### Declare processing map entries

In [28]:
prmLET = proET.find('listOfProcessingMaps')

att_dict = OrderedDict({'id':'PRM-TRNSL'})
prmLET.append(etree.Element('processingMap', attrib=att_dict))
att_dict = OrderedDict({'id':'PRM-TRNSC'})
prmLET.append(etree.Element('processingMap', attrib=att_dict))
att_dict = OrderedDict({'id':'PRM-REPL'})
prmLET.append(etree.Element('processingMap', attrib=att_dict))
att_dict = OrderedDict({'id':'PRM-RNADEG'})
prmLET.append(etree.Element('processingMap', attrib=att_dict))

#### Translation
Constant processing: Energetic cost of initiation and formylation of Met-tRNA

In [29]:
ET = [i for i in prmLET if i.attrib['id'] == 'PRM-TRNSL'][0]

In [30]:
ET.append(etree.Element('constantProcessing'))
ETpmCP = ET.find('constantProcessing')
ETpmCP.append(etree.Element('listOfReactants'))

In [31]:
ETpmCPlor = ETpmCP.find('listOfReactants')
att_dict = OrderedDict({'species':'MET-fmettrna_c', 'stoichiometry':'1'})
ETpmCPlor.append(etree.Element('speciesReference', attrib=att_dict))
att_dict = OrderedDict({'species':'MET-atp_c', 'stoichiometry':'1'})
ETpmCPlor.append(etree.Element('speciesReference', attrib=att_dict))
att_dict = OrderedDict({'species':'MET-h2o_c', 'stoichiometry':'1'})
ETpmCPlor.append(etree.Element('speciesReference', attrib=att_dict))

In [32]:
ETpmCP.append(etree.Element('listOfProducts'))

In [33]:
ETpmCPlop = ETpmCP.find('listOfProducts')
att_dict = OrderedDict({'species':'MET-mettrna_c', 'stoichiometry':'1'})
ETpmCPlop.append(etree.Element('speciesReference', attrib=att_dict))
att_dict = OrderedDict({'species':'MET-for_c', 'stoichiometry':'1'})
ETpmCPlop.append(etree.Element('speciesReference', attrib=att_dict))
att_dict = OrderedDict({'species':'MET-adp_c', 'stoichiometry':'1'})
ETpmCPlop.append(etree.Element('speciesReference', attrib=att_dict))
att_dict = OrderedDict({'species':'MET-pi_c', 'stoichiometry':'1'})
ETpmCPlop.append(etree.Element('speciesReference', attrib=att_dict))
att_dict = OrderedDict({'species':'MET-h_c', 'stoichiometry':'1'})
ETpmCPlop.append(etree.Element('speciesReference', attrib=att_dict))

Component processing: incoming amino acid and energetic cost of translation elongation

In [34]:
trnaL = [('A', 'alatrna_c', 'trnaala_c'), ('R', 'argtrna_c', 'trnaarg_c'),
         ('N', 'asntrna_c', 'trnaasn_c'), ('D', 'asptrna_c', 'trnaasp_c'),
         ('C', 'cystrna_c', 'trnacys_c'), ('Q', 'glntrna_c', 'trnagln_c'),
         ('E', 'glutrna_c', 'trnaglu_c'), ('G', 'glytrna_c', 'trnagly_c'),
         ('H', 'histrna_c', 'trnahis_c'), ('I', 'iletrna_c', 'trnaile_c'),
         ('L', 'leutrna_c', 'trnaleu_c'), ('K', 'lystrna_c', 'trnalys_c'),
         ('M', 'mettrna_c', 'trnamet_c'), ('F', 'phetrna_c', 'trnaphe_c'),
         ('P', 'protrna_c', 'trnapro_c'), ('S', 'sertrna_c', 'trnaser_c'),
         ('T', 'thrtrna_c', 'trnathr_c'), ('W', 'trptrna_c', 'trnatrp_c'),
         ('Y', 'tyrtrna_c', 'trnatyr_c'), ('V', 'valtrna_c', 'trnaval_c')]

In [35]:
ET.append(etree.Element('listOfComponentProcessings'))

In [36]:
ETpmLCP = ET.find('listOfComponentProcessings')
for aa,m_in,m_out in trnaL:
    att_dict = OrderedDict({'component':'COM-' + aa, 'machineryCost':'1'})
    ETpmLCPcp = etree.SubElement(ETpmLCP, 'componentProcessing', attrib=att_dict)
    
    ETpmLCPcpLOR = etree.SubElement(ETpmLCPcp, 'listOfReactants')
    att_dict = OrderedDict({'speciesReference':'MET-' + m_in, 'stoichiometry':'1'})
    ETpmLCPcpLOR.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-gtp_c', 'stoichiometry':'2'})
    ETpmLCPcpLOR.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-h2o_c', 'stoichiometry':'2'})
    ETpmLCPcpLOR.append(etree.Element('species', attrib=att_dict))
    
    ETpmLCPcpLOP = etree.SubElement(ETpmLCPcp, 'listOfProducts')
    att_dict = OrderedDict({'speciesReference':'MET-' + m_out, 'stoichiometry':'1'})
    ETpmLCPcpLOP.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-gdp_c', 'stoichiometry':'2'})
    ETpmLCPcpLOP.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-pi_c', 'stoichiometry':'2'})
    ETpmLCPcpLOP.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-h_c', 'stoichiometry':'3'})
    ETpmLCPcpLOP.append(etree.Element('species', attrib=att_dict))

In [37]:
df_prot = pd.read_excel('./curation/prot_stoich_curation.xlsx')
df_prot.index = df_prot.id.to_list()

cofs_all = []
for i in df_prot.index:
    cofs = df_prot.cofactor_stoich[i]
    if pd.isnull(cofs) == False:
        cofs_all += [cof.split(':')[0] for cof in cofs.split(',')]
        
cofs_all = sorted(list(set(cofs_all)))

In [38]:
for cof in cofs_all:
    att_dict = OrderedDict({'component':'COM-' + cof, 'machineryCost':'0'})
    ETpmLCPcp = etree.SubElement(ETpmLCP, 'componentProcessing', attrib=att_dict)
    
    ETpmLCPcpLOR = etree.SubElement(ETpmLCPcp, 'listOfReactants')
    att_dict = OrderedDict({'species':'MET-' + cof, 'stoichiometry':'1'})
    ETpmLCPcpLOR.append(etree.Element('speciesReference', attrib=att_dict))

#### Transcription
Component processing: incoming nucleotide splitting out pyrophosphate

In [39]:
ET = [i for i in prmLET if i.attrib['id'] == 'PRM-TRNSC'][0]
ETlcp = etree.SubElement(ET, 'listOfComponentProcessings')

In [40]:
nurL = [('nurA', 'atp_c'), ('nurC', 'ctp_c'),
        ('nurG', 'gtp_c'), ('nurU', 'utp_c')]

In [41]:
for nur,m_in in nurL:
    att_dict = OrderedDict({'component':'COM-' + nur, 'machineryCost':'0'})
    ETlcpCP = etree.SubElement(ETlcp, 'componentProcessing', attrib=att_dict)
    
    ETlcpCPlor = etree.SubElement(ETlcpCP, 'listOfReactants')
    att_dict = OrderedDict({'speciesReference':'MET-' + m_in, 'stoichiometry':'1'})
    ETlcpCPlor.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-h2o_c', 'stoichiometry':'1'})
    ETlcpCPlor.append(etree.Element('species', attrib=att_dict))
    
    ETlcpCPlop = etree.SubElement(ETlcpCP, 'listOfProducts')
    att_dict = OrderedDict({'speciesReference':'MET-ppi_c', 'stoichiometry':'1'})
    ETlcpCPlop.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-h_c', 'stoichiometry':'1'})
    ETlcpCPlop.append(etree.Element('species', attrib=att_dict))

#### RNA degradation
Component processing: degrade into monophosphate nucleotide

In [42]:
ET = [i for i in prmLET if i.attrib['id'] == 'PRM-RNADEG'][0]
ETlcp = etree.SubElement(ET, 'listOfComponentProcessings')

In [43]:
nurL = [('nurA', 'amp_c'), ('nurC', 'cmp_c'),
        ('nurG', 'gmp_c'), ('nurU', 'ump_c')]

In [44]:
for nur,m_out in nurL:
    att_dict = OrderedDict({'component':'COM-' + nur, 'machineryCost':'0'})
    ETlcpCP = etree.SubElement(ETlcp, 'componentProcessing', attrib=att_dict)
    
    ETlcpCPlor = etree.SubElement(ETlcpCP, 'listOfReactants')
    att_dict = OrderedDict({'speciesReference':'MET-h2o_c', 'stoichiometry':'1'})
    ETlcpCPlor.append(etree.Element('species', attrib=att_dict))
    
    ETlcpCPlop = etree.SubElement(ETlcpCP, 'listOfProducts')
    att_dict = OrderedDict({'speciesReference':'MET-' + m_out, 'stoichiometry':'1'})
    ETlcpCPlop.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-h_c', 'stoichiometry':'1'})
    ETlcpCPlop.append(etree.Element('species', attrib=att_dict))

#### Replication
Duplicate DNA

In [45]:
ET = [i for i in prmLET if i.attrib['id'] == 'PRM-REPL'][0]
ETlcp = etree.SubElement(ET, 'listOfComponentProcessings')

In [46]:
nudL = [('nudA', 'amp_c'), ('nudC', 'cmp_c'),
        ('nudG', 'gmp_c'), ('nudU', 'ump_c')]

In [47]:
for nud,m_in in nudL:
    att_dict = OrderedDict({'component':'COM-' + nud, 'machineryCost':'0'})
    ETlcpCP = etree.SubElement(ETlcp, 'componentProcessing', attrib=att_dict)
    
    ETlcpCPlor = etree.SubElement(ETlcpCP, 'listOfReactants')
    att_dict = OrderedDict({'speciesReference':'MET-' + m_in, 'stoichiometry':'1'})
    ETlcpCPlor.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-h2o_c', 'stoichiometry':'1'})
    ETlcpCPlor.append(etree.Element('species', attrib=att_dict))
    
    ETlcpCPlop = etree.SubElement(ETlcpCP, 'listOfProducts')
    att_dict = OrderedDict({'speciesReference':'MET-ppi_c', 'stoichiometry':'1'})
    ETlcpCPlop.append(etree.Element('species', attrib=att_dict))
    att_dict = OrderedDict({'speciesReference':'MET-h_c', 'stoichiometry':'1'})
    ETlcpCPlop.append(etree.Element('species', attrib=att_dict))

#### Save

In [48]:
with open('./rbaModel/processes.xml', 'wb') as f:
    f.write(etree.tostring(proET, pretty_print=True))