In [1]:
import cobra,rba,lxml
from lxml import etree
import pandas as pd
from collections import OrderedDict

from Bio import Seq,SeqIO

#### Writing requirement file

In [2]:
rtGSM = cobra.io.load_json_model('./gsmodel/iRhtoC_r2_2.json')

Academic license - for non-commercial use only


In [3]:
rnaET = etree.Element('RBARnas')

elems = ['listOfComponents', 'listOfMacromolecules']
for e in elems:
    rnaET.append(etree.Element(e))

In [4]:
comET = rnaET.find('listOfComponents')

att_dict = OrderedDict({'id':'COM-nurA', 'type': 'Nucleotide', 'weight':'2.9036'})
comET.append(etree.Element('component', attrib=att_dict))

att_dict = OrderedDict({'id':'COM-nurC', 'type': 'Nucleotide', 'weight':'2.7017'})
comET.append(etree.Element('component', attrib=att_dict))

att_dict = OrderedDict({'id':'COM-nurG', 'type': 'Nucleotide', 'weight':'3.0382'})
comET.append(etree.Element('component', attrib=att_dict))

att_dict = OrderedDict({'id':'COM-nurT', 'type': 'Nucleotide', 'weight':'2.7102'})
comET.append(etree.Element('component', attrib=att_dict))

In [5]:
df_rnas = pd.read_excel('./data/SaceRNA/RNA_stoich.xlsx', sheet_name='RNAs')
df_rnas.index = df_rnas.RNAid.to_list()
df_rnas = df_rnas.iloc[:25, :]

In [6]:
macET = rnaET.find('listOfMacromolecules')

for i in df_rnas.index:
    att_dict = OrderedDict({'id':'MAC-' + i, 'compartment': 'PART-c'})
    macE = etree.SubElement(macET, 'macromolecule', attrib=att_dict)

    macE_com = etree.SubElement(macE, 'composition')
    for base in 'ACGU':
        stoich = float(df_rnas.loc[i,base])
        stoich_str = str(stoich)
        if len(stoich_str.split('.')[1]) > 6:
            stoich_in = round(stoich, 6)
        else:
            stoich_in = stoich
        
        att_dict = {'component':'COM-nur' + base, 'stoichiometry':str(stoich_in)}
        macE_com.append(etree.Element('componentReference', attrib=att_dict))

In [7]:
with open('./rnas.xml', 'wb') as f:
    f.write(etree.tostring(rnaET, pretty_print=True))

#### Working scripts

In [65]:
df_rnas = pd.read_excel('./data/SaceRNA/RNA_stoich.xlsx', sheet_name='RNAs')
df_rnas.index = df_rnas.RNAid.to_list()

In [74]:
rnaid = 'rrna25s_c'
base_dict = count_rna(df_rnas.Sequence[rnaid])
print('\t'.join([str(base_dict[k]) for k in 'ACGU']))

899	662	966	869


In [66]:
df_rnas

Unnamed: 0,RNAid,A,C,G,U,Reference,Notes,Sequence
trnaala_c,trnaala_c,10.500000,19.5,26.000000,17.000000,,,
trnaarg_c,trnaarg_c,14.666667,19.0,22.333333,16.333333,,,
trnaasn_c,trnaasn_c,16.000000,18.0,22.000000,18.000000,,,
trnaasp_c,trnaasp_c,12.000000,16.5,24.000000,19.000000,,,
trnacys_c,trnacys_c,13.000000,16.0,23.000000,20.000000,,,
...,...,...,...,...,...,...,...,...
Tyr|GTA,Tyr|GTA,16.000000,19.0,23.000000,17.000000,tRNAdb:tdbD00003654,,CUCUCGGUAGCCAAGUUGGUUUAAGGCGCAAGACUGUAAAUCUUGA...
Tyr|GTA,Tyr|GTA,15.000000,19.0,23.000000,18.000000,tRNAdb:tdbD00003655,,CUCUCGGUAGCCAAGUUGGUUUAAGGCGCAAGACUGUAAUUCUUGA...
Val|AAC,Val|AAC,15.000000,19.0,20.000000,20.000000,tRNAdb:tdbD00003245,,GGUUUCGUGGUCUAGUCGGUUAUGGCAUCUGCUUAACACGCAGAAC...
Val|AAC,Val|AAC,14.000000,19.0,21.000000,20.000000,tRNAdb:tdbD00003246,,GGUUUCGUGGUCUAGUCGGUUAUGGCAUCUGCUUAACACGCAGAAC...


In [49]:
def count_dna(seq):
    base_dict = {i:0 for i in 'ACGT'}
    for i in 'ACGT':
        base_dict[i] = seq.count(i)
    return base_dict

def count_rna(seq):
    base_dict = {i:0 for i in 'ACGU'}
    for i in 'ACGU':
        base_dict[i] = seq.count(i)
    return base_dict

In [30]:
x = SeqIO.read('./data/Sace_rrna/S288C_RDN5-1_RDN5-1_coding.fsa', 'fasta')

In [31]:
str(x.seq.transcribe())

'GGUUGCGGCCAUAUCUACCAGAAAGCACCGUUUCCCGUCCGAUCAACUGUAGUUAAGCUGGUAAGAGCCUGACCGAGUAGUGUAGUGGGUGACCAUACGCGAAACUCAGGUGCUGCAAUCU'

In [36]:
seqs = []
for seq in SeqIO.parse('./data/SaceRNA/tRNA_64_codons.fst', 'fasta'):
    seqs.append(seq)

In [55]:
x1.id

'tdbD00000219|Saccharomyces_cerevisiae|4932|Ala|AGC'

In [58]:
count_rna(str(seq.seq))

{'A': 16, 'C': 20, 'G': 21, 'U': 0}

In [60]:
for seq in seqs:
    base_dict = count_rna(str(seq.seq.transcribe()))
    print('\t'.join([str(base_dict[b]) for b in 'ACGU']))

9	20	27	17
12	19	25	17
15	20	23	15
14	19	23	16
15	18	21	18
16	18	22	18
12	16	24	19
12	17	24	19
13	16	23	20
14	18	19	20
13	17	20	22
13	18	20	22
13	22	20	17
15	21	19	17
15	21	20	16
12	19	22	18
12	19	22	18
17	14	20	21
15	20	23	16
13	21	20	19
14	21	27	10
15	22	25	10
20	18	23	21
15	21	29	19
15	20	29	20
14	18	22	19
14	17	23	18
14	16	22	21
19	17	20	17
19	18	20	16
17	16	23	17
16	16	24	17
15	18	22	18
16	17	22	18
8	19	25	20
8	20	25	19
16	17	27	22
14	21	27	20
13	22	27	19
13	20	27	22
14	20	27	21
20	15	20	18
15	18	23	16
15	15	21	21
16	16	20	20
15	16	20	22
16	19	23	17
15	19	23	18
15	19	20	20
14	19	21	20
16	20	21	17
9	20	27	17
12	19	25	17
13	20	25	15
15	20	23	15
10	18	24	19
14	19	23	16
15	18	21	18
16	18	22	18
14	18	21	17
12	17	24	19
13	16	23	20
14	18	19	20
14	15	19	23
13	17	20	22
15	15	18	23
13	22	20	17
15	21	20	16
15	21	19	17
12	19	22	18
12	18	22	19
12	19	22	18
11	19	22	19
17	14	20	21
15	20	23	16
13	21	20	19
14	21	20	18
20	18	23	21
20	18	23	21
20	18	23	21
20	18	23	21
20	18	23	21
12	22	29	18
15	20	29

In [40]:
x1 = seqs[0]
x2 = seqs[1]

In [50]:
count_rna(x1.seq.transcribe())

{'A': 9, 'C': 20, 'G': 27, 'U': 17}

In [51]:
count_rna(x2.seq.transcribe())

{'A': 12, 'C': 19, 'G': 25, 'U': 17}

In [48]:
x1

SeqRecord(seq=Seq('GGGCGTGTGGCGTAGTCGGTAGCGCGCTCCCTTAGCATGGGAGAGGTCTCCGGT...CCA', SingleLetterAlphabet()), id='tdbD00000219|Saccharomyces_cerevisiae|4932|Ala|AGC', name='tdbD00000219|Saccharomyces_cerevisiae|4932|Ala|AGC', description='tdbD00000219|Saccharomyces_cerevisiae|4932|Ala|AGC', dbxrefs=[])

In [42]:
x2

SeqRecord(seq=Seq('GGGCACATGGCGCAGTTGGTAGCGCGCTTCCCTTGCAAGGAAGAGGTCATCGGT...CCA', SingleLetterAlphabet()), id='tdbD00000218|Saccharomyces_cerevisiae|4932|Ala|TGC', name='tdbD00000218|Saccharomyces_cerevisiae|4932|Ala|TGC', description='tdbD00000218|Saccharomyces_cerevisiae|4932|Ala|TGC', dbxrefs=[])

In [52]:
x1.id

'tdbD00000219|Saccharomyces_cerevisiae|4932|Ala|AGC'

In [2]:
rtGSM = cobra.io.load_json_model('./gsmodel/iRhtoC_r2_2.json')

Academic license - for non-commercial use only


In [3]:
for met in rtGSM.metabolites:
    if met.id[:4] == 'trna':
        print(met.id)

trnaala_c
trnaarg_c
trnaasn_c
trnaasn_m
trnaasp_c
trnacys_c
trnagln_c
trnaglu_c
trnaglu_m
trnagly_c
trnahis_c
trnahis_m
trnaile_c
trnaile_m
trnaleu_c
trnalys_c
trnamet_c
trnamet_m
trnaphe_c
trnaphe_m
trnapro_c
trnaser_c
trnathr_c
trnatrp_c
trnatrp_m
trnatyr_c
trnatyr_m
trnaval_c
trnaval_m
