# Toy example for Radix-based for multiple conditions: test the improved Benders implementation

In [1]:
SCALE_CROWDING=False
PREVENT_ZERO = True

In [2]:
%load_ext line_profiler

In [3]:
from gurobipy import *

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['svg.fonttype'] = 'none'
pd.set_option('display.max_colwidth', -1)
%matplotlib inline

from dynamicme.decomposition import Decomposer
from dynamicme.callback_gurobi import cb_benders
from dynamicme.optimize import Optimizer, StackOptimizer
from dynamicme.optimize import Constraint, Variable

from cobra.io import load_json_model
from cobra import Metabolite, Reaction
from six import iteritems

import numpy as np
import cobra

(<type 'exceptions.ImportError'>, ImportError('No module named cplex',), <traceback object at 0x7f8f60450ef0>)


### Try optimizing using radix for one condition first

In [4]:
#----------------------------------------
# Starting from basal model
ijomc = load_json_model('/home/laurence/ME/models/BiGG_M/json/e_coli_core.json')
mdl_ref = ijomc
keff0 = 1./65/3600
#crowding_bound = 0.0003
crowding_bound0 = 0.001
crowding_bound = crowding_bound0

not_crowded = ['ATPM']
rxns_c = [r for r in ijomc.reactions if all([m.compartment=='c' for m in r.metabolites.keys()]) and 'BIOMASS' not in r.id and r.id not in not_crowded]
crowding_dict = {rxn:keff0 for rxn in rxns_c}
#----------------------------------------

# Temporarily add crowding constraint for the duality gap constraint
crowding = Constraint('crowding')
crowding._bound = crowding_bound
crowding._constraint_sense = 'L'
for rxn,keff in iteritems(crowding_dict):
    rxn.add_metabolites({crowding:keff})

In [5]:
ijomc.optimize()
mu_crowd0 = ijomc.reactions.BIOMASS_Ecoli_core_w_GAM.x
print(mu_crowd0)

0.873921506968


In [6]:
N_CONDS = 3

df_meas = pd.read_csv('/home/laurence/ME/data/dynamicME/beg/growth_meas.csv')
ex_rxns = [r for r in df_meas.ex_rxn.unique() if mdl_ref.reactions.has_id(r)]
df_meas = df_meas[ df_meas.ex_rxn.isin(ex_rxns)]
conds = df_meas.substrate.unique()
df_conds = pd.DataFrame([{'cond':r['substrate'], 'rxn':ex_rxn, 'lb':-10 if r['ex_rxn']==ex_rxn else 0, 'ub':1000., 'obj':0.} for i,r in df_meas.iterrows() for ex_rxn in ex_rxns])
#df_conds = df_conds[ df_conds.cond.isin(conds[0:N_CONDS])]
df_conds = df_conds[ df_conds.cond.isin(['glucose','acetate','succinate'])]

## Need to allow higher growth for acetate with higher uptake rate

In [7]:
df_conds.loc[ df_conds.lb <= -10, 'lb'] = -20

In [8]:
df_conds

Unnamed: 0,cond,lb,obj,rxn,ub
0,glucose,-20,0.0,EX_glc__D_e,1000.0
1,glucose,0,0.0,EX_fru_e,1000.0
2,glucose,0,0.0,EX_succ_e,1000.0
3,glucose,0,0.0,EX_mal__L_e,1000.0
4,glucose,0,0.0,EX_ac_e,1000.0
10,succinate,0,0.0,EX_glc__D_e,1000.0
11,succinate,0,0.0,EX_fru_e,1000.0
12,succinate,-20,0.0,EX_succ_e,1000.0
13,succinate,0,0.0,EX_mal__L_e,1000.0
14,succinate,0,0.0,EX_ac_e,1000.0


In [9]:
stacker = StackOptimizer()
stacker.stack_models(ijomc, df_conds)

In [10]:
stacker.model.optimize(solver='gurobi')

<Solution 3.02 at 0x7f8f0018aa90>

In [11]:
# stacker.model_dict['glucose'].optimize()
# print stacker.model_dict['glucose'].reactions.query('BIOMASS')[0].x
# print stacker.model_dict['glucose'].reactions.query('EX_glc')[0].x
# print stacker.model_dict['glucose'].reactions.query('EX_fru')[0].x

# stacker.model_dict['acetate'].optimize()
# print stacker.model_dict['acetate'].reactions.query('BIOMASS')[0].x
# print stacker.model_dict['acetate'].reactions.query('EX_glc')[0].x
# print stacker.model_dict['acetate'].reactions.query('EX_ac')[0].x
for mdl_ind,mdl in iteritems(stacker.model_dict):
    mdl.optimize()
    print('%s. mu=%g.' % (mdl_ind, mdl.solution.f))

succinate. mu=0.840134.
acetate. mu=0.389313.
glucose. mu=1.79057.


In [12]:
for mdl_ind,mdl in iteritems(stacker.model_dict):
    opt = Optimizer(mdl)
    gapi = opt.add_duality_gap_constraint(INF=1e3, inplace=True, index=mdl_ind)

In [13]:
stacker.model.optimize(solver='gurobi')

<Solution 6.04 at 0x7f8f0018ad90>

In [14]:
import numpy as np

radix = 2.
print('Radix:',radix)
powers = np.arange(-3,4)
print('Powers:', powers)
digits_per_power = radix
pwr_max = max(powers)
digits = list(set(np.linspace(1, radix-1, digits_per_power)))
print('Digits:', digits)

# Discretize crowding coefficients into radix
var_cons_dict = {}
# Get the group ID from reference model
mdl_ref = ijomc
crowding_ref = mdl_ref.metabolites.crowding
for rxn_ref in crowding_ref.reactions:    
    for mdl_ind, mdl in iteritems(stacker.model_dict):
        crowding_p = mdl.metabolites.get_by_id('crowding_%s'%mdl_ind)
        var_d = mdl.reactions.get_by_id('wa_%s'%crowding_p.id)
        rxn_p = mdl.reactions.get_by_id(rxn_ref.id+'_%s'%mdl_ind)
        # Get the coefficient in the dual
        cons_ds = [m for m in var_d.metabolites.keys() if rxn_p.id==m.id]        
        a0 = rxn_p.metabolites[crowding_p]
        if var_cons_dict.has_key(rxn_ref.id):
            var_cons_dict[rxn_ref.id] += [(rxn_p, crowding_p, a0)] + [(var_d, cons_d, a0) for cons_d in cons_ds]
        else:
            var_cons_dict[rxn_ref.id] = [(rxn_p, crowding_p, a0)] + [(var_d, cons_d, a0) for cons_d in cons_ds]

('Radix:', 2.0)
('Powers:', array([-3, -2, -1,  0,  1,  2,  3]))
('Digits:', [1.0])




In [15]:
# %lprun -f opt.to_radix opt.to_radix(gap, var_cons_dict, radix, powers, digits_per_power, prevent_zero=True)

In [16]:
%time opt.to_radix(stacker.model, var_cons_dict, radix, powers, digits=digits, prevent_zero=PREVENT_ZERO)

CPU times: user 305 ms, sys: 17.6 ms, total: 323 ms
Wall time: 345 ms


[1.0]

### Now add in abs error min obj

In [17]:
from dynamicme.optimize import Variable, Constraint

mu_id = 'BIOMASS_Ecoli_core_w_GAM'

for mdl_ind,mdl in iteritems(stacker.model_dict):
    dfi = df_meas[ df_meas.substrate==mdl_ind]
    mu_measi = dfi.growth_rate_1_h.iloc[0]
    
    for rxn in mdl.reactions:
        rxn.objective_coefficient = 0.
    
    rxn_mu = mdl.reactions.get_by_id(mu_id+'_%s'%mdl_ind)

    sp = Variable('sp_%s'%mdl_ind, lower_bound=0., upper_bound=1e3)
    sn = Variable('sn_%s'%mdl_ind, lower_bound=0., upper_bound=1e3)
    sp.objective_coefficient = 1.
    sn.objective_coefficient = 1.
    cons = Constraint('abs_err_%s'%mdl_ind)
    cons._constraint_sense = 'E'
    cons._bound = mu_measi
    mdl.add_metabolites(cons)
    mdl.add_reactions([sp,sn])

    # mu - mu_meas = sp-sn
    # mu -sp + sn = mu_meas
    # min sp + sn
    sp.add_metabolites({cons:-1.})
    sn.add_metabolites({cons:1.})
    rxn_mu.add_metabolites({cons:1.})

In [18]:
from cobra.solvers import gurobi_solver
from gurobipy import *

milp = gurobi_solver.create_problem(stacker.model)
milp.ModelSense = GRB.MINIMIZE
milp.Params.IntFeasTol = 1e-9
milp.Params.OutputFlag = 1
# milp.Params.FeasibilityTol = 1e-9
# milp.Params.OptimalityTol = 1e-9
milp.Params.NodefileStart = 3   # Start writing nodes to disk if x GB RAM exceeded (for each thread)
milp.Params.TimeLimit = 2*3600  # Time limit in seconds    
milp.Params.Method = 2 # 2: Barrier
milp.Params.NodeMethod = 2 # 2: Barrier
milp.Params.BarHomogeneous = 1
%time milp.optimize()

Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1
Changed value of parameter NodefileStart to 3.0
   Prev: 1e+100  Min: 0.0  Max: 1e+100  Default: 1e+100
Changed value of parameter TimeLimit to 7200.0
   Prev: 1e+100  Min: 0.0  Max: 1e+100  Default: 1e+100
Changed value of parameter Method to 2
   Prev: 0  Min: -1  Max: 5  Default: -1
Changed value of parameter NodeMethod to 2
   Prev: -1  Min: -1  Max: 2  Default: -1
Changed value of parameter BarHomogeneous to 1
   Prev: -1  Min: -1  Max: 1  Default: -1
Optimize a model with 8958 rows, 3432 columns and 24549 nonzeros
Variable types: 3096 continuous, 336 integer (0 binary)
Coefficient statistics:
  Matrix range     [5e-07, 1e+03]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+03]
  RHS range        [1e-03, 1e+03]
Presolve removed 2199 rows and 390 columns
Presolve time: 0.12s
Presolved: 6759 rows, 3042 columns, 19699 nonzeros
Variable types: 2706 continuous, 336 integer (336 binary)
Pre

In [22]:
sol = gurobi_solver.format_solution(milp, stacker.model)
x_dict = sol.x_dict
yopt = [x_dict[rxn.id] for rxn in stacker.model.reactions.query('binary_')]
print('Number of non-zero binaries: %g' % sum(yopt))
for mdl_ind,mdl in iteritems(stacker.model_dict):
    dfi = df_meas[ df_meas.substrate==mdl_ind]
    mu_measi = dfi.growth_rate_1_h.iloc[0]
    rxn_mu = stacker.model.reactions.get_by_id(mu_id+'_%s'%mdl_ind)
    muopt = x_dict[rxn_mu.id]
    err_fit = (muopt-mu_measi)/mu_measi
    err0 = (mu_crowd0-mu_measi)/mu_measi
    print('%s. mu_meas:%g. Unfitted mu_crowd=%g (error=%.3g%%). Fitted mu: %g (error=%.3g%%)' % (mdl_ind, mu_measi, mu_crowd0, 100*err0, muopt, err_fit))
    print('Number of non-zero binaries: %g' % sum(yopt))

Number of non-zero binaries: 119
succinate. mu_meas:0.46. Unfitted mu_crowd=0.873922 (error=90%). Fitted mu: 0.46 (error=0%)
Number of non-zero binaries: 119
acetate. mu_meas:0.256. Unfitted mu_crowd=0.873922 (error=241%). Fitted mu: 0.256 (error=0%)
Number of non-zero binaries: 119
glucose. mu_meas:0.74. Unfitted mu_crowd=0.873922 (error=18.1%). Fitted mu: 0.74 (error=0%)
Number of non-zero binaries: 119


In [23]:
# Fitted parameters
kfit_dict = {}
for group_id, var_dict in iteritems(var_cons_dict):
    var = var_dict[0]
    cons = var_dict[1]
    a0  = var_dict[0][2]
    kfit = 0.
    for l,pwr in enumerate(powers):
        for k,digit in enumerate(digits):            
            yid = 'binary_%s%s%s' % (group_id,k,l)
            y   = x_dict[yid]            
            if abs(y)>1e-10:
                print('%s. Value=%s. Power=%g. Digit=%g' % (yid, y, pwr, digit))
            kfit += y*a0*radix**pwr*digit
    kfit_dict[group_id] = kfit

binary_G6PDH2r01. Value=1.0. Power=-2. Digit=1
binary_AKGDH05. Value=1.0. Power=2. Digit=1
binary_ME200. Value=1.0. Power=-3. Digit=1
binary_ME202. Value=1.0. Power=-1. Digit=1
binary_ME205. Value=1.0. Power=2. Digit=1
binary_ME206. Value=1.0. Power=3. Digit=1
binary_PGK02. Value=1.0. Power=-1. Digit=1
binary_GLUN03. Value=1.0. Power=0. Digit=1
binary_GLUN06. Value=1.0. Power=3. Digit=1
binary_ME103. Value=1.0. Power=0. Digit=1
binary_ME104. Value=1.0. Power=1. Digit=1
binary_ME105. Value=1.0. Power=2. Digit=1
binary_PGI01. Value=1.0. Power=-2. Digit=1
binary_PGI04. Value=1.0. Power=1. Digit=1
binary_GND00. Value=1.0. Power=-3. Digit=1
binary_ACKr00. Value=1.0. Power=-3. Digit=1
binary_GLNS04. Value=1.0. Power=1. Digit=1
binary_FUM05. Value=1.0. Power=2. Digit=1
binary_FUM06. Value=1.0. Power=3. Digit=1
binary_SUCDi00. Value=1.0. Power=-3. Digit=1
binary_PPC02. Value=1.0. Power=-1. Digit=1
binary_PPC05. Value=1.0. Power=2. Digit=1
binary_PPC06. Value=1.0. Power=3. Digit=1
binary_MDH01.

In [24]:
kfit_changed = [(k,v, abs(v-a0)/a0) for k,v in iteritems(kfit_dict) if abs(v-a0)/a0>1e-6]
print('Changed keffs: %d/%d' % (len(kfit_changed), len(var_cons_dict)))
kfit_changed

Changed keffs: 48/48


[(u'G6PDH2r', 1.0683760683760685e-06, 0.75),
 (u'AKGDH', 1.7094017094017095e-05, 3.0),
 (u'ME2', 5.395299145299146e-05, 11.625),
 (u'MALS', 4.2735042735042735e-05, 9.0),
 (u'GLUN', 3.846153846153846e-05, 8.0),
 (u'ME1', 2.991452991452992e-05, 6.000000000000001),
 (u'GND', 5.341880341880342e-07, 0.875),
 (u'ACKr', 5.341880341880342e-07, 0.875),
 (u'GLNS', 8.547008547008548e-06, 1.0),
 (u'ADK1', 5.1282051282051286e-05, 11.0),
 (u'SUCDi', 5.341880341880342e-07, 0.875),
 (u'PPC', 5.341880341880342e-05, 11.5),
 (u'MDH', 6.73076923076923e-05, 14.749999999999996),
 (u'FUM', 5.1282051282051286e-05, 11.0),
 (u'GLUDy', 7.47863247863248e-06, 0.7500000000000001),
 (u'GLUSy', 1.7094017094017095e-05, 3.0),
 (u'PGL', 2.136752136752137e-06, 0.5),
 (u'PGM', 3.73931623931624e-06, 0.12499999999999994),
 (u'ACALD', 6.784188034188034e-05, 14.874999999999998),
 (u'PGK', 2.136752136752137e-06, 0.5),
 (u'PGI', 9.615384615384616e-06, 1.25),
 (u'PPS', 1.7094017094017095e-05, 3.0),
 (u'PTAr', 4.05982905982906e-0

### Plug back in to be sure

In [25]:
#----------------------------------------
# Starting from basal model
csrcs = df_conds.cond.unique()
for csrc in csrcs:
    ijofit = load_json_model('/home/laurence/ME/models/BiGG_M/json/e_coli_core.json')    
    crowding = Constraint('crowding')
    crowding._bound = crowding_bound0
    crowding._constraint_sense = 'L'
    
    df_condi = df_conds[ df_conds.cond==csrc]    
    for i,row in df_condi.iterrows():
        rid = row['rxn']
        rxn = ijofit.reactions.get_by_id(rid)
        rxn.lower_bound = row['lb']
        rxn.upper_bound = row['ub']

    for rid,kfit in iteritems(kfit_dict):
        rxn = ijofit.reactions.get_by_id(rid)
        rxn.add_metabolites({crowding:kfit})
    
    ijofit.optimize()
    
    mu_measi = df_meas[ df_meas.substrate==csrc].growth_rate_1_h.iloc[0]
    mu_fiti = ijofit.reactions.BIOMASS_Ecoli_core_w_GAM.x
    
    # Get unfit
    for rxn in ijofit.metabolites.crowding.reactions:
        rxn._metabolites[crowding] = a0
    ijofit.optimize()
    mu_unfiti = ijofit.reactions.BIOMASS_Ecoli_core_w_GAM.x
    
    err = 100*(mu_fiti - mu_measi)/mu_measi
    print('Cond=%s. mu_meas=%g. mu_sim=%g (unfit=%g). Error=%.3g%%' % (csrc, mu_measi, mu_fiti, mu_unfiti, err))
    for i,row in df_condi.iterrows():
        rid = row['rxn']
        rxn = ijofit.reactions.get_by_id(rid)        
        print('\t%s uptake=%g' % (rxn.id, rxn.x))

Cond=glucose. mu_meas=0.74. mu_sim=0.740019 (unfit=1.79057). Error=0.00251%
	EX_glc__D_e uptake=-20
	EX_fru_e uptake=0
	EX_succ_e uptake=0
	EX_mal__L_e uptake=0
	EX_ac_e uptake=0
Cond=succinate. mu_meas=0.46. mu_sim=0.460011 (unfit=0.840134). Error=0.00239%
	EX_glc__D_e uptake=0
	EX_fru_e uptake=0
	EX_succ_e uptake=-20
	EX_mal__L_e uptake=0
	EX_ac_e uptake=0
Cond=acetate. mu_meas=0.256. mu_sim=0.256011 (unfit=0.389313). Error=0.00446%
	EX_glc__D_e uptake=0
	EX_fru_e uptake=0
	EX_succ_e uptake=0
	EX_mal__L_e uptake=0
	EX_ac_e uptake=-20
