# Curating a genome scale model

This notebook has been tested on [jprime.lbl.gov](jprime.lbl.gov) with the biodesign_3.7 kernel.

It starts with the model that gets output by the annotation_gr.ipynb notebook.

In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
from IPython.display import IFrame
import numpy as np
import pandas as pd
import json
import urllib
import cobra
import cplex
import os
import requests
import collections
import itertools

# Getting and preparing the genome-scale model

## Load *R.opacus* NCBI model generated by CarveMe

In [2]:
model = cobra.io.read_sbml_model("GSMs/Ropacus_some_reaction_deletions.xml")
model

0,1
Name,ropacus_curated_with_some_deleted_reactions
Memory address,0x07fed972eefd0
Number of metabolites,1581
Number of reactions,2375
Number of groups,0
Objective expression,1.0*Growth - 1.0*Growth_reverse_699ae
Compartments,"cytosol, periplasm, extracellular space"


## Starting MEMOTE Output
Need to update

In [3]:
IFrame('memotes/ropacus_carveme_grampos.htm', 1500, 800)

# Look into Various methods to play with the GSM

#### follow simulating FBA documentation page 

In [11]:
model.objective = 'Growth'

In [12]:
solution = model.optimize()
print(solution)

<Solution 0.000 at 0x7fee49ebfdd0>


In [13]:
solution.objective_value

0.0

In [15]:
%%time
model.optimize().objective_value

CPU times: user 28 ms, sys: 0 ns, total: 28 ms
Wall time: 27.6 ms


0.0

In [16]:
%%time
model.slim_optimize()

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 1.04 ms


0.0

In [17]:
model.summary()

Unnamed: 0_level_0,IN_FLUXES,IN_FLUXES,OUT_FLUXES,OUT_FLUXES,OBJECTIVES,OBJECTIVES
Unnamed: 0_level_1,ID,FLUX,ID,FLUX,ID,FLUX
0,12ppd__S_e,0.0,,,Growth,0.0
1,14glucan_e,0.0,,,,
2,2hxmp_e,0.0,,,,
3,2pglyc_e,0.0,,,,
4,34dhcinm_e,0.0,,,,
5,3hcinnm_e,0.0,,,,
6,3hoxpac_e,0.0,,,,
7,3hpppn_e,0.0,,,,
8,3mb_e,0.0,,,,
9,4abut_e,0.0,,,,


In [18]:
model.metabolites.nadh_c.summary()

Unnamed: 0_level_0,Unnamed: 1_level_0,PERCENT,FLUX,REACTION_STRING
RXN_STAT,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PRODUCING,3HCINNMH,,0.0,3hcinnm_c + h_c + nadh_c + o2_c --> dhcinnm_c ...
PRODUCING,3HPAOX,,0.0,3hoxpac_c + nadh_c + o2_c --> 34dhpha_c + h2o_...
PRODUCING,3HPPPNH,,0.0,3hpppn_c + h_c + nadh_c + o2_c --> dhpppn_c + ...
PRODUCING,3MBZALDH,,0.0,3mbzalc_c + nad_c --> 3mbzald_c + h_c + nadh_c
PRODUCING,3MBZDH,,0.0,3mbzald_c + h2o_c + nad_c --> 2.0 h_c + m_tol_...
PRODUCING,4MBZALDH,,0.0,4mbzalc_c + nad_c --> 4mbzald_c + h_c + nadh_c
PRODUCING,4MBZDH,,0.0,4mbzald_c + h2o_c + nad_c --> 2.0 h_c + nadh_c...
PRODUCING,ABUTD,,0.0,4abutn_c + h2o_c + nad_c --> 4abut_c + 2.0 h_c...
PRODUCING,ACALD,,0.0,acald_c + coa_c + nad_c <=> accoa_c + h_c + na...
PRODUCING,ACOAD1,,0.0,b2coa_c + h_c + nadh_c --> btcoa_c + nad_c


In [19]:

model.metabolites.atp_c.summary()

Unnamed: 0_level_0,Unnamed: 1_level_0,PERCENT,FLUX,REACTION_STRING
RXN_STAT,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PRODUCING,14GLUCANabcpp,,0.0,14glucan_p + atp_c + h2o_c --> 14glucan_c + ad...
PRODUCING,2AGPEAT120,,0.0,2agpe120_c + atp_c + ddca_c --> amp_c + pe120_...
PRODUCING,2AGPEAT140,,0.0,2agpe140_c + atp_c + ttdca_c --> amp_c + pe140...
PRODUCING,2AGPEAT141,,0.0,2agpe141_c + atp_c + ttdcea_c --> amp_c + pe14...
PRODUCING,2AGPEAT160,,0.0,2agpe160_c + atp_c + hdca_c --> amp_c + pe160_...
PRODUCING,2AGPEAT161,,0.0,2agpe161_c + atp_c + hdcea_c --> amp_c + pe161...
PRODUCING,2AGPEAT180,,0.0,2agpe180_c + atp_c + ocdca_c --> amp_c + pe180...
PRODUCING,2AGPEAT181,,0.0,2agpe181_c + atp_c + ocdcea_c --> amp_c + pe18...
PRODUCING,2AGPGAT120,,0.0,2agpg120_c + atp_c + ddca_c --> amp_c + pg120_...
PRODUCING,2AGPGAT140,,0.0,2agpg140_c + atp_c + ttdca_c --> amp_c + pg140...


In [20]:
biomass_rxn = model.reactions.get_by_id("Growth")

In [21]:
from cobra.util.solver import linear_reaction_coefficients
linear_reaction_coefficients(model)

{<Reaction Growth at 0x7fee496f96d0>: 1.0}

In [22]:
# change the objective to ATPM
model.objective = "ATPM"

# The upper bound should be 1000, so that we get
# the actual optimal value
model.reactions.get_by_id("ATPM").upper_bound = 1000.
linear_reaction_coefficients(model)

{<Reaction ATPM at 0x7fee496f9910>: 1.0}

In [23]:
model.optimize().objective_value

65.0

In [24]:
model.summary()

Unnamed: 0_level_0,IN_FLUXES,IN_FLUXES,OUT_FLUXES,OUT_FLUXES,OBJECTIVES,OBJECTIVES
Unnamed: 0_level_1,ID,FLUX,ID,FLUX,ID,FLUX
0,glc__D_e,10.0,acald_e,20.0,ATPM,65.0
1,o2_e,10.0,co2_e,20.0,,
2,,,h2o_e,20.0,,


In [25]:
from cobra.flux_analysis import flux_variability_analysis

In [26]:
flux_variability_analysis(model, model.reactions[:10])

Unnamed: 0,minimum,maximum
12DGR120tipp,0.0,0.0
12DGR140tipp,0.0,0.0
12DGR141tipp,0.0,0.0
12DGR161tipp,0.0,0.0
12DGR180tipp,0.0,0.0
12DGR181tipp,0.0,0.0
12PPDStpp,0.0,0.0
12PPDt,0.0,0.0
14GLUCANabcpp,0.0,0.0
14GLUCANtexi,0.0,0.0


In [27]:
cobra.flux_analysis.flux_variability_analysis(
    model, model.reactions[:10], fraction_of_optimum=0.9)

Unnamed: 0,minimum,maximum
12DGR120tipp,0.0,2.166667
12DGR140tipp,0.0,2.166667
12DGR141tipp,0.0,2.166667
12DGR161tipp,0.0,2.166667
12DGR180tipp,0.0,2.166667
12DGR181tipp,0.0,2.166667
12PPDStpp,0.0,0.0
12PPDt,0.0,0.0
14GLUCANabcpp,0.0,0.0
14GLUCANtexi,0.0,0.0


In [31]:
loop_reactions = [model.reactions[800], model.reactions[1500]]
flux_variability_analysis(model, reaction_list=loop_reactions, loopless=False)

Unnamed: 0,minimum,maximum
FAS140,0.0,0.0
NDPK3,0.0,0.0


In [29]:
model.reactions.get_by_id('FRD7')

KeyError: 'FRD7'

In [32]:
flux_variability_analysis(model, reaction_list=loop_reactions, loopless=True)

Unnamed: 0,minimum,maximum
FAS140,0.0,0.0
NDPK3,0.0,0.0


In [33]:
model.optimize()
model.summary(fva=0.95)

Unnamed: 0_level_0,IN_FLUXES,IN_FLUXES,IN_FLUXES,IN_FLUXES,OUT_FLUXES,OUT_FLUXES,OUT_FLUXES,OUT_FLUXES,OBJECTIVES,OBJECTIVES
Unnamed: 0_level_1,ID,FLUX,FLUX_MIN,FLUX_MAX,ID,FLUX,FLUX_MIN,FLUX_MAX,ID,FLUX
0,o2_e,10.0,9.277778,10.0,h2o_e,20.0,18.0,28.084112,ATPM,65.0
1,glc__D_e,10.0,8.375,10.0,co2_e,20.0,6.136364,23.785095,,
2,nh4_e,-0.0,-0.0,10.0,acald_e,20.0,0.0,20.236364,,
3,so4_e,-0.0,-0.0,1.181818,h_e,-0.0,-1.940299,22.8125,,
4,pi_e,0.0,-0.0,1.083333,ala__D_e,-0.0,0.0,10.0,,
5,,,,,ala__L_e,0.0,0.0,10.0,,
6,,,,,val__L_e,-0.0,0.0,8.362745,,
7,,,,,succ_e,-0.0,0.0,7.375,,
8,,,,,acac_e,-0.0,0.0,6.95,,
9,,,,,leu__L_e,-0.0,0.0,6.745455,,


In [34]:
model.metabolites.pyr_c.summary(fva=0.95)

Unnamed: 0_level_0,Unnamed: 1_level_0,PERCENT,FLUX,FLUX_MIN,FLUX_MAX,REACTION_STRING
RXN_STAT,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PRODUCING,PYK,50.0,10.0,0.0,20.75,adp_c + h_c + pep_c --> atp_c + pyr_c
PRODUCING,GLCpts,50.0,10.0,0.0,10.0,glc__D_e + pep_c --> g6p_c + pyr_c
PRODUCING,ALAD_L,0.0,0.0,0.0,1000.0,ala__L_c + h2o_c + nad_c --> h_c + nadh_c + nh...
PRODUCING,PYRt4pp,0.0,0.0,0.0,1000.0,na1_p + pyr_p --> na1_c + pyr_c
PRODUCING,ALATA_D,0.0,0.0,-1000.0,1000.0,akg_c + ala__D_c <=> glu__D_c + pyr_c
PRODUCING,ALATA_L,0.0,0.0,-1000.0,1000.0,akg_c + ala__L_c <=> glu__L_c + pyr_c
PRODUCING,LDH_D,0.0,0.0,-1000.0,1000.0,lac__D_c + nad_c <=> h_c + nadh_c + pyr_c
PRODUCING,LDH_L,0.0,0.0,-1000.0,1000.0,lac__L_c + nad_c <=> h_c + nadh_c + pyr_c
PRODUCING,DHAPT,0.0,0.0,0.0,12.6,dha_c + pep_c --> dhap_c + pyr_c
PRODUCING,GLCptspp,0.0,0.0,0.0,10.0,glc__D_p + pep_c --> g6p_c + pyr_c


In [35]:

model.objective = 'Growth'
fba_solution = model.optimize()
pfba_solution = cobra.flux_analysis.pfba(model)

In [37]:
abs(fba_solution.fluxes["Growth"] - pfba_solution.fluxes[
    "Growth"])

0.0

In [38]:
fba_solution.fluxes["Growth"]

0.0

# Simulating Deletions

In [39]:
import pandas
from time import time

import cobra.test
from cobra.flux_analysis import (
    single_gene_deletion, single_reaction_deletion, double_gene_deletion,
    double_reaction_deletion)

In [None]:
print('complete model: ', cobra_model.optimize())
with cobra_model:
    cobra_model.reactions.PFK.knock_out()
    print('pfk knocked out: ', cobra_model.optimize())

# Growth Media

In [41]:
model.medium

{'EX_glc__D_e': 10.0,
 'EX_h2o_e': 10.0,
 'EX_h_e': 10.0,
 'EX_cl_e': 10.0,
 'EX_pi_e': 10.0,
 'EX_nh4_e': 10.0,
 'EX_fe3_e': 10.0,
 'EX_k_e': 10.0,
 'EX_ca2_e': 10.0,
 'EX_mg2_e': 10.0,
 'EX_mn2_e': 10.0,
 'EX_cobalt2_e': 10.0,
 'EX_zn2_e': 10.0,
 'EX_cu2_e': 10.0,
 'EX_o2_e': 10.0,
 'EX_fe2_e': 10.0,
 'EX_mobd_e': 10.0,
 'EX_so4_e': 10.0}

In [45]:
for x in model.medium:
    print(type(model.medium[x]))

<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>
<class 'float'>


This is a complicated way to set medium composition <br>
The reason is that model.medium is a copy of the medium. Changing it won't effect the medium. You have to assign a full dictionary to the medium to change it

In [48]:
medium = model.medium
medium['EX_o2_e'] = 0
model.medium = medium

model.medium

{'EX_glc__D_e': 10.0,
 'EX_h2o_e': 10.0,
 'EX_h_e': 10.0,
 'EX_cl_e': 10.0,
 'EX_pi_e': 10.0,
 'EX_nh4_e': 10.0,
 'EX_fe3_e': 10.0,
 'EX_k_e': 10.0,
 'EX_ca2_e': 10.0,
 'EX_mg2_e': 10.0,
 'EX_mn2_e': 10.0,
 'EX_cobalt2_e': 10.0,
 'EX_zn2_e': 10.0,
 'EX_cu2_e': 10.0,
 'EX_fe2_e': 10.0,
 'EX_mobd_e': 10.0,
 'EX_so4_e': 10.0}

In [49]:
model.slim_optimize()

0.0

In [50]:
medium = model.medium
medium['EX_o2_e'] = 10
model.medium = medium

model.medium

{'EX_glc__D_e': 10.0,
 'EX_h2o_e': 10.0,
 'EX_h_e': 10.0,
 'EX_cl_e': 10.0,
 'EX_pi_e': 10.0,
 'EX_nh4_e': 10.0,
 'EX_fe3_e': 10.0,
 'EX_k_e': 10.0,
 'EX_ca2_e': 10.0,
 'EX_mg2_e': 10.0,
 'EX_mn2_e': 10.0,
 'EX_cobalt2_e': 10.0,
 'EX_zn2_e': 10.0,
 'EX_cu2_e': 10.0,
 'EX_o2_e': 10,
 'EX_fe2_e': 10.0,
 'EX_mobd_e': 10.0,
 'EX_so4_e': 10.0}

In [51]:
from cobra.medium import minimal_medium

max_growth = model.slim_optimize()
minimal_medium(model, max_growth)

Series([], dtype: float64)

In [52]:
model.medium

{'EX_glc__D_e': 10.0,
 'EX_h2o_e': 10.0,
 'EX_h_e': 10.0,
 'EX_cl_e': 10.0,
 'EX_pi_e': 10.0,
 'EX_nh4_e': 10.0,
 'EX_fe3_e': 10.0,
 'EX_k_e': 10.0,
 'EX_ca2_e': 10.0,
 'EX_mg2_e': 10.0,
 'EX_mn2_e': 10.0,
 'EX_cobalt2_e': 10.0,
 'EX_zn2_e': 10.0,
 'EX_cu2_e': 10.0,
 'EX_o2_e': 10,
 'EX_fe2_e': 10.0,
 'EX_mobd_e': 10.0,
 'EX_so4_e': 10.0}

In [53]:
minimal_medium(model, 0.1, minimize_components=True)

Minimization of medium was infeasible.


In [54]:
minimal_medium(model, 0.8, minimize_components=8, open_exchanges=True)

Minimization of medium was infeasible.


In [56]:
for r in model.exchanges:
    print(r)

EX_12ppd__S_e: 12ppd__S_e --> 
EX_glc__D_e: glc__D_e <=> 
EX_h2o_e: h2o_e <=> 
EX_14glucan_e: 14glucan_e --> 
EX_h_e: h_e <=> 
EX_hco3_e: hco3_e --> 
EX_2hxmp_e: 2hxmp_e --> 
EX_2pglyc_e: 2pglyc_e --> 
EX_34dhcinm_e: 34dhcinm_e --> 
EX_leu__L_e: leu__L_e --> 
EX_3hoxpac_e: 3hoxpac_e --> 
EX_3mb_e: 3mb_e --> 
EX_4hbz_e: 4hbz_e --> 
EX_ala__L_e: ala__L_e --> 
EX_cl_e: cl_e <=> 
EX_LalaDgluMdapDala_e: LalaDgluMdapDala_e --> 
EX_5mtr_e: 5mtr_e --> 
EX_arab__L_e: arab__L_e --> 
EX_4abut_e: 4abut_e --> 
EX_acac_e: acac_e --> 
EX_acald_e: acald_e --> 
EX_acgam_e: acgam_e --> 
EX_ac_e: ac_e --> 
EX_chol_e: chol_e --> 
EX_acmana_e: acmana_e --> 
EX_acnam_e: acnam_e --> 
EX_pi_e: pi_e <=> 
EX_acser_e: acser_e --> 
EX_actn__R_e: actn__R_e --> 
EX_acysbmn_e: acysbmn_e --> 
EX_adn_e: adn_e --> 
EX_ins_e: ins_e --> 
EX_nh4_e: nh4_e <=> 
EX_ad_e: ad_e --> 
EX_akg_e: akg_e --> 
EX_mal__L_e: mal__L_e --> 
EX_asn__L_e: asn__L_e --> 
EX_ala__D_e: ala__D_e --> 
EX_gln__L_e: gln__L_e --> 
EX_gly_e: gly_e -

In [57]:
for r in model.demands:
    print(r)

ATPM: atp_c + h2o_c --> adp_c + h_c + pi_c


In [65]:
for r in model.sinks:
    print(r)
    for m in r.metabolites:
        print(m.name)
    print()

sink_2ohph_c: 2ohph_c --> 
2-Octaprenyl-6-hydroxyphenol

sink_4hba_c: 4hba_c --> 
4-Hydroxy-benzyl alcohol

sink_5drib_c: 5drib_c --> 
5'-deoxyribose

sink_bmocogdp_c: bmocogdp_c --> 
Bis-molybdopterin guanine dinucleotide

sink_lipopb_c: lipopb_c --> 
Lipoate (protein bound)

sink_sheme_c: sheme_c --> 
Siroheme



What is the difference between a sink and an exchange?

Exchange reactions - are reactions that move metabolites across in silico compartments. These in silico compartments are representive of intra- and inter- cellular membranes.<br><br>
Sink reactions - The metabolites, produced in reactions that are outside of an ambit of the system or in unknown reactions, are supplied to the network with reversible sink reactions.<br><br>
Demand reactions - Irreversible reactions added to the model to consume metabolites that are deposited in the system.

In [67]:
for r in model.boundary:
    print(r)

EX_12ppd__S_e: 12ppd__S_e --> 
EX_glc__D_e: glc__D_e <=> 
EX_h2o_e: h2o_e <=> 
EX_14glucan_e: 14glucan_e --> 
EX_h_e: h_e <=> 
EX_hco3_e: hco3_e --> 
EX_2hxmp_e: 2hxmp_e --> 
EX_2pglyc_e: 2pglyc_e --> 
EX_34dhcinm_e: 34dhcinm_e --> 
EX_leu__L_e: leu__L_e --> 
EX_3hoxpac_e: 3hoxpac_e --> 
EX_3mb_e: 3mb_e --> 
EX_4hbz_e: 4hbz_e --> 
EX_ala__L_e: ala__L_e --> 
EX_cl_e: cl_e <=> 
EX_LalaDgluMdapDala_e: LalaDgluMdapDala_e --> 
EX_5mtr_e: 5mtr_e --> 
EX_arab__L_e: arab__L_e --> 
EX_4abut_e: 4abut_e --> 
EX_acac_e: acac_e --> 
EX_acald_e: acald_e --> 
EX_acgam_e: acgam_e --> 
EX_ac_e: ac_e --> 
EX_chol_e: chol_e --> 
EX_acmana_e: acmana_e --> 
EX_acnam_e: acnam_e --> 
EX_pi_e: pi_e <=> 
EX_acser_e: acser_e --> 
EX_actn__R_e: actn__R_e --> 
EX_acysbmn_e: acysbmn_e --> 
EX_adn_e: adn_e --> 
EX_ins_e: ins_e --> 
EX_nh4_e: nh4_e <=> 
EX_ad_e: ad_e --> 
EX_akg_e: akg_e --> 
EX_mal__L_e: mal__L_e --> 
EX_asn__L_e: asn__L_e --> 
EX_ala__D_e: ala__D_e --> 
EX_gln__L_e: gln__L_e --> 
EX_gly_e: gly_e -