## This examples show how to integrate metabolomics data into constraint-based model with thermodynamic MFA

Imports and load CBM

In [305]:
from time import time
import numpy as np
import pandas as pd
from cobra.io import load_json_model

cobra_model = load_json_model('data/models/iJO1366.json')

Load thermodynamic model

In [168]:
import pytfa
from pytfa.io import load_thermoDB, read_lexicon, annotate_from_lexicon, read_compartment_data, apply_compartment_data

thermo_data = load_thermoDB('/data/thermo_data.thermodb')
lexicon = read_lexicon('data/models/iJO1366/lexicon.csv')
compartment_data = read_compartment_data('data/models/iJO1366/compartment_data.json')

tmodel = pytfa.ThermoModel(thermo_data, cobra_model)

annotate_from_lexicon(tmodel, lexicon)
apply_compartment_data(tmodel, compartment_data)

biomass_rxn = 'Ec_biomass_iJO1366_WT_53p95M'

tmodel.objective = biomass_rxn
tmodel.solver.problem.Params.NumericFocus = 3
tmodel.solver.configuration.tolerances.feasibility = 1e-9
tmodel.solver.configuration.presolve = True

tmodel.prepare()
tmodel.convert()

Read LP format model from file C:\Users\riheme\AppData\Local\Temp\tmp318fovi2.lp
Reading time = 0.03 seconds
: 1807 rows, 5170 columns, 20334 nonzeros
Read LP format model from file C:\Users\riheme\AppData\Local\Temp\tmpm1w2zd9y.lp
Reading time = 0.02 seconds
: 1807 rows, 5170 columns, 20334 nonzeros


2020-10-07 17:00:55,082 - thermomodel_None - INFO - # Model initialized with units kcal/mol and temperature 298.15 K
2020-10-07 17:00:55,097 - thermomodel_None - INFO - # Model preparation starting...
2020-10-07 17:00:58,417 - thermomodel_None - INFO - # Model preparation done.
2020-10-07 17:00:58,417 - thermomodel_None - INFO - # Model conversion starting...
2020-10-07 17:01:33,852 - thermomodel_None - INFO - # Model conversion done.
2020-10-07 17:01:33,852 - thermomodel_None - INFO - # Updating cobra_model variables...
2020-10-07 17:01:33,893 - thermomodel_None - INFO - # cobra_model variables are up-to-date


# Data preparation

Load and prune metabolomic data from [McCloskey et al (2018)](https://www.sciencedirect.com/science/article/pii/S1096717617304433)

In [208]:
data = pd.read_csv('data/1-s2.0-S1096717617304433-mmc9.csv', index_col=-1)

# remove ratios
query = 'calculated_concentration_units == "umol*gDW-1"'
data.query(query, inplace = True)
# remove 2nd fragments
data = data[ ~data[ 'component_name' ].str.contains( '_2.Light' ) ]

The lower and upper confidence interval is not always lower/higher than the min and max value.

In [260]:
data[ (data['ci_lb'] < data['min']) | (data['ci_ub'] > data['max']) ][ 'sample_name_abbreviation' ].value_counts()

EColi_W3110     47
EColi_W         44
EColi_Crooks    43
EColi_C         43
EColi_DH5a      40
EColi_MG1655    34
EColi_BL21      32
Name: sample_name_abbreviation, dtype: int64

There are negative values (impossible for concentration!) in some rows:

In [270]:
data[ (data.select_dtypes('number') < 0).any(axis=1) ].head()

Unnamed: 0_level_0,sample_name_abbreviation,component_group_name,component_name,calculated_concentration_units,var,ci_level,max,test_stat,median,used_,ci_lb,iq_1,cv,min,n,iq_3,ci_ub,mean,pvalue_corrected
index_,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
398,EColi_C,oaa,oaa.oaa_1.Light,umol*gDW-1,42.16751,0.95,16.465845,1.946371,1.784942,True,-1.654797,0.876958,125.849068,0.615111,6,7.74681,11.974542,5.159873,0.109176
430,EColi_C,skm,skm.skm_1.Light,umol*gDW-1,0.000128,0.95,0.030046,2.033105,0.005806,True,-0.002482,0.001707,120.480257,0.000191,6,0.012066,0.021263,0.00939,0.097724
477,EColi_Crooks,akg,akg.akg_1.Light,umol*gDW-1,17.479111,0.95,11.206608,2.443862,2.601302,True,-0.216286,1.134987,100.230282,0.698111,6,6.105559,8.558681,4.171197,0.058376
563,EColi_Crooks,gua,gua.gua_1.Light,umol*gDW-1,0.001675,0.95,0.113992,2.559303,0.029614,True,-0.000188,0.019103,95.709247,0.001531,6,0.057048,0.085703,0.042757,0.050689
627,EColi_Crooks,oaa,oaa.oaa_1.Light,umol*gDW-1,9.510722,0.95,8.606953,1.89147,1.164905,True,-0.855013,0.793649,129.501919,0.699329,6,1.77799,5.617793,2.38139,0.117139


According to [Bennett et al (2008)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2710577/):

>  \[The\] ratio of aqueous volume to cellular dry weight is 0.0023 liter g<sup>−1</sup> for *E. coli*.

In [158]:
# factor to convert umol/gCDW to mol/L
conversion_factor = 1e-6 / .0023

Convert &mu;mol/g<sub>CDW</sub> to M and then apply log.

In [283]:
cols = [ 'sample_name_abbreviation', 'component_group_name', 'min', 'max' ]

data[ cols[-2:] ] = np.log( data[ cols[-2:] ] * conversion_factor )
data = data[ cols ]

Discard metabolites that are not present in model!

In [292]:
names = data[ 'component_group_name' ].unique()

sel_names = [ name for name in names if tmodel.log_concentration.has_id( name + '_c' ) ]

data = data[ data['component_group_name'].isin(sel_names) ]

Set thermo model bounds

In [297]:
# select 1 strain (for test)
strain_name = 'EColi_BL21'
strain = data[ data['sample_name_abbreviation'] == strain_name ]

for name, lb, ub in zip( strain['component_group_name'], strain['min'], strain['max'] ):
    tmodel.log_concentration.get_by_id(name + '_c').variable.set_bounds(lb, ub)

# TFA

In [298]:
fba_solution = cobra_model.optimize()
fba_value = fba_solution.objective_value

In [300]:
tmodel.name = strain_name
tfa_solution = tmodel.optimize()
tfa_value = tfa_solution.objective_value
if tfa_value < 0.1:
    print('Requires relaxation!')

Requires relaxation!


In [309]:
if tfa_value < 0.1:
    start = time()
    
    from pytfa.optim.relaxation import relax_dgo

    tmodel.reactions.get_by_id(biomass_rxn).lower_bound = 0.5*fba_value
    relaxed_model, slack_model, relax_table = relax_dgo(tmodel)

    original_model, tmodel = tmodel, relaxed_model
    
    elapsed_time = time()-start
    print(f'Elapsed time: {elapsed_time:.2f} s')

    print('Relaxation: ')
    print(relax_table)
    
    tfa_solution = tmodel.optimize()
    tfa_value = tfa_solution.objective_value

Read LP format model from file C:\Users\riheme\AppData\Local\Temp\tmpvqi61cty.lp
Reading time = 0.02 seconds
: 1807 rows, 5170 columns, 20304 nonzeros
Read LP format model from file C:\Users\riheme\AppData\Local\Temp\tmpm3uofog_.lp
Reading time = 0.04 seconds
: 1807 rows, 5170 columns, 20304 nonzeros


2020-10-07 18:44:01,617 - thermomodel_EColi_BL21 - INFO - # Model initialized with units kcal/mol and temperature 298.15 K


Read LP format model from file C:\Users\riheme\AppData\Local\Temp\tmp3np58ovs.lp
Reading time = 0.02 seconds
: 1807 rows, 5170 columns, 20304 nonzeros
Read LP format model from file C:\Users\riheme\AppData\Local\Temp\tmp1qp0qedg.lp
Reading time = 0.02 seconds
: 1807 rows, 5170 columns, 20304 nonzeros


2020-10-07 18:44:28,264 - thermomodel_EColi_BL21 - INFO - # Model initialized with units kcal/mol and temperature 298.15 K
2020-10-07 18:44:37,892 - thermomodel_EColi_BL21 - INFO - Adding slack constraints
adding slacks: 100%|███████████████████████████████████████████████████████████████| 1824/1824 [00:33<00:00, 54.29it/s]
2020-10-07 18:45:13,108 - thermomodel_EColi_BL21 - INFO - Optimizing slack model
2020-10-07 18:45:18,468 - thermomodel_EColi_BL21 - INFO - Extracting relaxation
applying slack: 100%|███████████████████████████████████████████████████████████| 2585/2585 [00:00<00:00, 41403.12it/s]
2020-10-07 18:45:18,618 - thermomodel_EColi_BL21 - INFO - Testing relaxation


Elapsed time: 97.18 s
Relaxation: 
              lb_in      ub_in  lb_change  ub_change     lb_out     ub_out
ACONTa     0.580427   2.376633   0.610496   0.000000  -0.030069   2.376633
ADSL1r     7.868318   9.734394   0.342906   0.000000   7.525413   9.734394
AIRC3    -11.941085  -8.988685   0.000000   2.582409 -11.941085  -6.406276
ARGSL     16.770653  18.636729   9.523862   0.000000   7.246792  18.636729
ATPS4rpp  -9.518201  -8.083092   1.211046   0.000000 -10.729247  -8.083092
GALT1     67.754735  69.018800  59.790453   0.000000   7.964282  69.018800
GLCTR3     8.888108  10.152173   0.923826   0.000000   7.964282  10.152173
HEPK2     44.872230  45.345690  35.916725   0.000000   8.955506  45.345690
HEPT1     25.810459  27.074524  16.532603   0.000000   9.277857  27.074524
HEPT2      7.030447   8.294512   8.087680   0.000000  -1.057233   8.294512
MECDPS    26.135683  30.646876  14.579913   0.000000  11.555770  30.646876
MTHTHFSs  13.297862  14.814659   6.891586   0.000000   6.406276  

In [310]:
print('FBA Solution found : {0:.5g}'.format(fba_value))
print('TFA Solution found : {0:.5g}'.format(tfa_value))

FBA Solution found : 0.814
TFA Solution found : 0.76181
