In [1]:
import cobra
from cobra.io import read_sbml_model,load_json_model
from cobra import Model, Reaction, Metabolite
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

# Extract tRNA from Lactobacillus plantarum WCFS1

In [18]:
# trna_counts = {}
# rna_records = SeqIO.parse( '../LP_HMX2/220668.9.PATRIC.frn', "fasta")
# trna_records=[]
# for r in rna_records:
#     if 'tRNA' in r.description:
#         if '?' in r.description:
#             continue
#         seq = str(r.seq)
#         des = str(r.description).split('   ')[1].strip()
#         if des not in trna_counts:
#             trna_counts[des] = 1
#             name = des + '_0'
#         else:
#             trna_counts[des] += 1
#             name = des + '_' + str( trna_counts[des]-1 )
#         trna_records.append( SeqRecord(Seq(seq), id = "", name="",description=name) )
# SeqIO.write( trna_records, '../LP_HMX2/LP_WCFS1_trna.fa' ,"fasta")        

70

# Extract chaperones

In [17]:
pro_annot = pd.read_csv('../LP_HMX2/BCH.all_annotation.csv')
pro_annot = pro_annot[['Gene_ID','GO_terms','Kegg_Gene_Name','Nr_Description']]
keep_ind=[]
for i in range(len(pro_annot['Gene_ID'])):
    if 'chaper' in list(pro_annot['Nr_Description'])[i]:
        keep_ind.append(i)
    elif list(pro_annot['Kegg_Gene_Name'])[i] in ['tig','groL','groS','dnaK','grpE','dnaJ']:
        keep_ind.append(i)
keep_ind = list(set(keep_ind))
cpr_annot = (pro_annot.iloc[keep_ind]).reset_index().drop(['index'],axis=1)
cpr_annot

Unnamed: 0,Gene_ID,GO_terms,Kegg_Gene_Name,Nr_Description
0,1_449,cytoplasm,hslO,MULTISPECIES: Hsp33 family molecular chaperone...
1,1_549,ATP binding,"groES, HSPE1",MULTISPECIES: co-chaperone GroES [Lactobacillus]
2,1_550,ATP binding,"groEL, HSPD1",MULTISPECIES: chaperonin GroEL [Lactobacillus]
3,1_1801,protein folding,tig,MULTISPECIES: trigger factor [Lactobacillus]
4,1_2927,metal ion transport,--,MULTISPECIES: copper chaperone [Lactobacillales]
5,1_1716,ATP binding,dnaJ,MULTISPECIES: molecular chaperone DnaJ [Lactob...
6,1_1717,ATP binding,dnaK,MULTISPECIES: molecular chaperone DnaK [Lactob...
7,1_1718,adenyl-nucleotide exchange factor activity,GRPE,co-chaperone GrpE [Lactobacillus plantarum WJL]
8,1_1627,ATP binding,clpB,MULTISPECIES: ATP-dependent chaperone ClpB [La...


In [23]:
chaperones = {'1_449':'hslO','1_549':'groES','1_550':'groEL','1_1801':'tig',
              '1_1716':'dnaJ','1_1717':'dnaK','1_1718':'GRPE','1_1627':'clpB'}
prot_records = SeqIO.parse( '../LP_HMX2/BCH.protein.fa', "fasta")
cpr_records=[]
for r in prot_records:
    if r.id in chaperones.keys():
        ID = '>rba|'+r.id+'|'+chaperones[r.id]
        cpr_records.append( SeqRecord(Seq(str(r.seq)), id = ID, name="",description="") )
SeqIO.write( cpr_records, '../LP_HMX2/chaperones.fasta' ,"fasta")

8

# Extract ribosomal proteins

In [41]:
rb_prots= [ 'rps' + chr(ord('A')+i) for i in range(21) ] + \
       ['sra'] + ['rpl' + chr(ord('A')+i) for i in range(6)] + ['rpl' + chr(ord('I')+i) for i in range(17)] + \
      [ 'rpm' + chr(ord('A')+i) for i in range(10)] + [ 'inf' + chr(ord('A')+i) for i in range(3)] + \
      ['tsf','tuf','fusA','efp','prfA','prfB']

In [61]:
pro_annot = pd.read_csv('../LP_HMX2/BCH.all_annotation.csv')
pro_annot = pro_annot[['Gene_ID','GO_terms','Kegg_Gene_Name','Nr_Description']]
keep_ind=[]
for i in range(len(pro_annot['Gene_ID'])):
    if list(pro_annot['Kegg_Gene_Name'])[i] in ['rimI', 'rimL', 'ysxB']:
        continue 
    if 'ribosomal' in list(pro_annot['Nr_Description'])[i]:
        keep_ind.append(i)
    else:
        for ID in rb_prots:
            if ID in list(pro_annot['Kegg_Gene_Name'])[i]:
                keep_ind.append(i)
keep_ind = list(set(keep_ind) )
rb_annot = (pro_annot.iloc[keep_ind]).reset_index().drop(['index'],axis=1)

In [71]:
names = []
for i in range(len(rb_annot['Kegg_Gene_Name'])):
    temp_name = list( rb_annot['Kegg_Gene_Name'] )[i]
    for rbp in rb_prots:
        if rbp in temp_name:
            temp_name = rbp
    names.append(temp_name)
rb_annot['name'] = names
ID_list = list(rb_annot['Gene_ID'])
rb_annot['Gene_ID'] = [ ID.strip() for ID in ID_list]

In [73]:
prot_records = SeqIO.parse( '../LP_HMX2/BCH.protein.fa', "fasta")
rbp_records=[]
for r in prot_records:
    if r.id in list(rb_annot['Gene_ID']):
        temp_name = list(rb_annot[rb_annot['Gene_ID']==r.id]['name'])[0]
        ID = '>rba|'+r.id+'|'+temp_name
        rbp_records.append( SeqRecord(Seq(str(r.seq)), id = ID, name="",description="") )

In [76]:
SeqIO.write( rbp_records, '../LP_HMX2/ribosomal_proteins.fasta' ,"fasta")

60

Ref:Analysis of Growth of Lactobacillus plantarum WCFS1 on a Complex Medium Using a Genome-scale Metabolic Model
    DOI:https://doi.org/10.1074/jbc.M606263200

In [8]:
lphmx2 = read_sbml_model('../models/lphmx2.xml')
# cobra.io.save_json_model(lphmx2, '../models/lphmx2.json')

In [24]:
lphmx2

0,1
Name,lphmx2
Memory address,0x07f9d0acbac40
Number of metabolites,1010
Number of reactions,1537
Number of groups,0
Objective expression,1.0*Growth - 1.0*Growth_reverse_699ae
Compartments,"cytosol, periplasm, extracellular space"


In [36]:
lphmx2.reactions.LacR

0,1
Reaction identifier,LacR
Name,Lactate racemase
Memory address,0x07f9d0a1d9a60
Stoichiometry,lac__L_c <=> lac__D_c  L-Lactate <=> D-Lactate
GPR,1_87 or 1_875
Lower bound,-1000.0
Upper bound,1000.0


In [37]:
complex_gpr=[]
enzymes=[]
for r in lphmx2.reactions:
    if r.gene_reaction_rule == '' or r.gene_reaction_rule == 'spontaneous':
        continue
    if '(' in r.gene_reaction_rule:
        complex_gpr.append(r.id)
    if 'or' in r.gene_reaction_rule:
        g_list = r.gene_reaction_rule.split('or')
        for g in g_list:
            g = g.strip()
            enzymes.append( {'Gene':g,'Reaction':r.id,'Relation':'isozyme'})
    elif 'and' in r.gene_reaction_rule:
        g_list = r.gene_reaction_rule.split('and')
        for g in g_list:
            g = g.strip()
            enzymes.append( {'Gene':g,'Reaction':r.id,'Relation':'subunit'})
    else:
        g = r.gene_reaction_rule.strip()
        enzymes.append( {'Gene':g,'Reaction':r.id,'Relation':'single'})      

In [41]:
met_tb = pd.DataFrame(enzymes)

In [42]:
met_tb

Unnamed: 0,Gene,Reaction,Relation
0,1_2564,13PPDH,single
1,1_2191,2MAHMP,single
2,1_1478,3A2OA,single
3,1_1437,3HAD120,single
4,1_1437,3HAD140,single
...,...,...,...
1684,1_852,r0963,subunit
1685,1_853,r0963,subunit
1686,1_854,r0963,subunit
1687,1_1317,r2465_1,isozyme


In [45]:
complex_gpr

['ACCOAC',
 'ACGApts',
 'ACTNabc',
 'AEPabcpp',
 'AIRC1',
 'ALAabc',
 'ANS',
 'ARBTpts',
 'ARGabc',
 'ASPabc',
 'CBPS',
 'CELBpts',
 'CELLBpts_2',
 'CHLabc',
 'CHOLSabc_1',
 'CITL',
 'COabc',
 'CYSabc',
 'CYSabcpp',
 'CYTB_B2',
 'Cut1',
 'DHAPT',
 'DRIBabc',
 'FADH2D',
 'FE2abc',
 'FRUpts',
 'G3PCabcpp',
 'G3PEabcpp',
 'G3PGabcpp',
 'G3PIabcpp',
 'G3PSabcpp',
 'GALTpts',
 'GLCabc',
 'GLCpts',
 'GLNabc',
 'GLUabc',
 'GLYBabc',
 'GLYC3Pabc',
 'GLYC3Pabcpp',
 'GTHRDt2_1',
 'HMPPtr',
 'IG3PS',
 'IG3PS_1',
 'ILEabc',
 'ILEabcpp',
 'KAS1',
 'KAS11',
 'KAS12',
 'KAS13',
 'KAS2',
 'KAS3',
 'KAS4',
 'KAS6',
 'KAS7',
 'KAS8',
 'LEUabc',
 'LEUabcpp',
 'MALTabc',
 'MALTpts',
 'MANpts',
 'METDabc',
 'METSOXR2',
 'METabc',
 'MNLpts',
 'MNLptspp',
 'MNabc',
 'NI2uabcpp',
 'NTP3',
 'PDH',
 'PIabc',
 'PRFGS_1',
 'PROabc',
 'PTRCabc',
 'RBFSa',
 'RBFSb',
 'RIBFLVt2',
 'RIBabc',
 'RIBabc1',
 'RIBabcpp',
 'RNDR1',
 'RNDR2',
 'RNDR3',
 'RNDR4',
 'SALCpts',
 'SBTpts',
 'SERD_L',
 'SERGLUGLNtr',
 'SPMDabc',
