# Toy Radix Lagrangean decomposition validation

In [1]:
REG_WEIGHT   = 0. #1e-4
MAX_NONZERO  = None #48*2
PREVENT_ZERO = True

In [2]:
%load_ext line_profiler

In [3]:
from gurobipy import *

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['svg.fonttype'] = 'none'
pd.set_option('display.max_colwidth', -1)
%matplotlib inline

from cobra.io import load_json_model
from six import iteritems
import numpy as np
import cobra

In [4]:
ijomc = load_json_model('/home/laurence/ME/models/e_coli_core_pc.json')

In [5]:
ijomc.optimize()
mu_crowd0 = ijomc.reactions.BIOMASS_Ecoli_core_w_GAM.x
print(mu_crowd0)

0.873921506968


In [6]:
df_meas = pd.read_csv('/home/laurence/ME/data/dynamicME/beg/growth_meas.csv')

ex_rxns = [r for r in df_meas.ex_rxn.unique() if ijomc.reactions.has_id(r)]
df_meas = df_meas[ df_meas.ex_rxn.isin(ex_rxns)]
conds = df_meas.substrate.unique()

N_CONDS = len(conds)
#N_CONDS = 3

df_conds = pd.DataFrame([{'cond':r['substrate'], 'rxn':ex_rxn, 'lb':-10 if r['ex_rxn']==ex_rxn else 0, 'ub':1000., 'obj':0.} for i,r in df_meas.iterrows() for ex_rxn in ex_rxns])

if N_CONDS==1:
    df_conds = df_conds[ df_conds.cond=='acetate']
elif N_CONDS<=3:
    df_conds = df_conds[ df_conds.cond.isin(['glucose','acetate','succinate'][0:N_CONDS])]
else:
    df_conds = df_conds[ df_conds.cond.isin(conds[0:N_CONDS])]

df_conds.loc[ (df_conds.cond=='acetate') & (df_conds.rxn=='EX_ac_e'), 'lb'] = -20

# Make radix problem

In [7]:
from dynamicme.estimate import RadixEstimator

In [8]:
df_Y = df_meas.rename(columns={'growth_rate_1_h':'output', 'substrate':'cond'})
df_Y.loc[:,'output_id'] = 'BIOMASS_Ecoli_core_w_GAM'
df_X = df_conds

In [9]:
import json
with open('/home/laurence/ME/data/dynamicME/kfit_changed.json') as f:
    kfit_changed = json.load(f)
changed_keffs = [kv[0] for kv in kfit_changed]

In [10]:
est = RadixEstimator()
est.fit(ijomc, df_X, df_Y, reg_weight = REG_WEIGHT, max_nonzero_binaries=MAX_NONZERO, optimize=False)

Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1


In [11]:
est.milp.Params.ScaleFlag = 0
est.milp.Params.OutputFlag = 1
est.milp.Params.TimeLimit = 2*3600.
# est.milp.Params.Presolve = 0
# est.milp.Params.FeasibilityTol = 1e-8
# est.milp.Params.OptimalityTol = 1e-8

Changed value of parameter ScaleFlag to 0
   Prev: -1  Min: -1  Max: 3  Default: -1
Parameter OutputFlag unchanged
   Value: 1  Min: 0  Max: 1  Default: 1
Changed value of parameter TimeLimit to 7200.0
   Prev: 1e+100  Min: 0.0  Max: 1e+100  Default: 1e+100


In [12]:
est.optimize()

Optimize a model with 6802 rows, 3384 columns and 20363 nonzeros
Variable types: 3240 continuous, 144 integer (0 binary)
Coefficient statistics:
  Matrix range     [2e-06, 1e+03]
  Objective range  [6e-01, 8e-01]
  Bounds range     [1e+00, 1e+03]
  RHS range        [1e-03, 1e+03]
Presolve removed 1633 rows and 639 columns
Presolve time: 0.04s
Presolved: 5169 rows, 2745 columns, 16156 nonzeros
Variable types: 2601 continuous, 144 integer (144 binary)
Presolve removed 5 rows and 15 columns

Root relaxation: objective 9.086192e-02, 4698 iterations, 0.26 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.09086    0    9          -    0.09086      -     -    0s
H    0     0                       0.3342977    0.09086  72.8%     -    0s
H    0     0                       0.2735587    0.09086  66.8%     -    0s
     0     0    0.09086    0   10    0.27356    0.09086  6

<Solution 0.21 at 0x7f03c8030ad0>

## Validate the fit

In [13]:
base_model = load_json_model('/home/laurence/ME/models/e_coli_core_pc.json')

In [14]:
mu_id = 'BIOMASS_Ecoli_core_w_GAM'
df_pred = est.predict(df_X, base_model, meas_id=mu_id)

In [15]:
df_Y2 = df_Y[[c for c in df_Y.columns if c not in ['order','ex_rxn']]]
df_val = pd.merge(df_Y2.rename(columns={'output':'meas'}), df_pred.rename(columns={'rxn':'output_id','x':'pred'}), on=['cond','output_id'])
df_val.loc[:,'error'] = (df_val.pred - df_val.meas)
df_val.loc[:,'perror'] = df_val.error / df_val.meas * 100
df_val

Unnamed: 0,cond,meas,output_id,pred,error,perror
0,glucose,0.74,BIOMASS_Ecoli_core_w_GAM,0.540138,-0.199862,-27.008358
1,fructose,0.55,BIOMASS_Ecoli_core_w_GAM,0.550064,6.4e-05,0.011705
2,succinate,0.46,BIOMASS_Ecoli_core_w_GAM,0.397563,-0.062437,-13.573258
3,malate,0.44,BIOMASS_Ecoli_core_w_GAM,0.370741,-0.069259,-15.740791
4,acetate,0.256,BIOMASS_Ecoli_core_w_GAM,0.255994,-6e-06,-0.002179


In [16]:
a0 = 1./65/3600
kfit_dict0 = {k:a0 for k in est.kfit_dict.keys()}
df_ref = est.predict(df_X, base_model, meas_id=mu_id, kfit_dict=kfit_dict0)
df_ref.rename(columns={'rxn':'output_id','x':'output'}, inplace=True)

In [17]:
df_val0 = pd.merge(df_Y2.rename(columns={'output':'meas'}), df_ref.rename(columns={'output':'pred0'}), on=['cond','output_id'])
df_val0.loc[:,'error0'] = (df_val0.pred0 - df_val0.meas)
df_val0.loc[:,'perror0'] = df_val0.error0 / df_val0.meas * 100

In [18]:
df_comp = pd.merge(df_val, df_val0, on=['cond','output_id','meas'])
df_comp.loc[:,'error_change'] = (df_comp.error.abs()-df_comp.error0.abs()) / df_comp.error0.abs() * 100
df_comp

Unnamed: 0,cond,meas,output_id,pred,error,perror,pred0,error0,perror0,error_change
0,glucose,0.74,BIOMASS_Ecoli_core_w_GAM,0.540138,-0.199862,-27.008358,0.873922,0.133922,18.097501,49.23805
1,fructose,0.55,BIOMASS_Ecoli_core_w_GAM,0.550064,6.4e-05,0.011705,0.873922,0.323922,58.894819,-99.98013
2,succinate,0.46,BIOMASS_Ecoli_core_w_GAM,0.397563,-0.062437,-13.573258,0.397563,-0.062437,-13.573258,-2.573605e-09
3,malate,0.44,BIOMASS_Ecoli_core_w_GAM,0.370741,-0.069259,-15.740791,0.370741,-0.069259,-15.740791,0.0
4,acetate,0.256,BIOMASS_Ecoli_core_w_GAM,0.255994,-6e-06,-0.002179,0.389313,0.133313,52.07542,-99.99582


In [19]:
print('Total abs error: %g' % df_comp.error.abs().sum())
print('Total perc error: %g%%' % df_comp.perror.abs().sum())
print('Total error change: %g%%' % df_comp.error_change.sum())
changed = [abs(est.kfit_dict[k]-kfit_dict0[k])>1e-9 for k in est.kfit_dict.keys()]
print('Number of keffs changed: %d/%d (%g%%)' % (sum(changed),len(est.kfit_dict), 100*sum(changed)/len(est.kfit_dict) ))

Total abs error: 0.331628
Total perc error: 56.3363%
Total error change: -150.738%
Number of keffs changed: 45/48 (93%)


# Each Lagrange submodel should start with the optimum

In [12]:
from dynamicme.decomposition import LagrangeMaster, LagrangeSubmodel

In [26]:
conds = df_conds.cond.unique()
sub_dict = {}
for cond in conds:
    df_Xk = df_X[ df_X.cond==cond]
    df_Yk = df_Y[ df_Y.cond==cond]
    estk = RadixEstimator()
    estk.fit(ijomc, df_Xk, df_Yk, reg_weight = REG_WEIGHT, max_nonzero_binaries=MAX_NONZERO, optimize=False)    
    sub = LagrangeSubmodel(estk.stacker.model, cond)
    sub_dict[cond] = sub

Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1
Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1
Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1
Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1
Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1


In [27]:
master = LagrangeMaster(estk.stacker.model)
master.add_submodels(sub_dict)

### Two-phase

In [28]:
master.time_limit = 2*3600
#master.max_iter = 500
master.verbosity = 1
master.print_iter = 5
# master.gaptol = 0.01
master.solve_relaxed()
sol_master = master.optimize()

    Iter                    UB                    LB       gap relgap(%)     delta                       Time(s)
  ------   -------------------   -------------------  -------- ---------  -------- -----------------------------
    Iter       Dual       Feas        Sub       Best                                   total    master       sub
       0       1000     0.1628    0.09086    0.09086     999.9     99.99       0.5  0.265120  0.003842  0.100587
       5       10.2     0.1628     -208.2    0.09086     10.11     99.11    0.0156  1.304455  0.006130  0.060335
      10      5.477     0.1628       -842    0.09086     5.387     98.34  0.000488  2.353585  0.009106  0.055928
      15      3.696     0.1628      -3826    0.09086     3.605     97.54  1.53e-05  3.412504  0.011826  0.055593
      20      2.557     0.1628 -1.761e+04    0.09086     2.466     96.45  4.77e-07  4.525658  0.020271  0.068737
      25       1.24     0.1628 -6.553e+04    0.09086     1.149     92.67  1.49e-08  5.631735  0.

In [29]:
var_cons_dict = estk.var_cons_dict
powers = estk.powers
digits = estk.digits
radix  = estk.radix

In [30]:
#sol_masterk = sub_dict['acetate'].x_dict
#sol_masterk = sub_dict['glucose'].x_dict

In [40]:
kfit_dict = {}
for group_id, var_dict in iteritems(var_cons_dict):
    var = var_dict[0]
    cons = var_dict[1]
    a0  = var_dict[0][2]
    kfit = 0.
    for l,pwr in enumerate(powers):
        for k,digit in enumerate(digits):            
            yid = 'binary_%s%s%s' % (group_id,k,l)
            y   = sol_master[yid]
            kfit += y*a0*radix**pwr*digit
    kfit_dict[group_id] = kfit

kfit_changed = [(k,v, abs(v-a0)/a0) for k,v in iteritems(kfit_dict) if abs(v-a0)/a0>1e-6]
print('Changed keffs: %d/%d' % (len(kfit_changed), len(var_cons_dict)))
#----------------------------------------
# Starting from basal model
perrs = []
csrcs = df_conds.cond.unique()
for csrc in csrcs:    
    ijofit = load_json_model('/home/laurence/ME/models/e_coli_core_pc.json')
    crowding = ijofit.metabolites.get_by_id('crowding')
    df_condi = df_conds[ df_conds.cond==csrc]    
    for i,row in df_condi.iterrows():
        rid = row['rxn']
        rxn = ijofit.reactions.get_by_id(rid)
        rxn.lower_bound = row['lb']
        rxn.upper_bound = row['ub']

    for rid,kfit in iteritems(kfit_dict):
        rxn = ijofit.reactions.get_by_id(rid)
        rxn.add_metabolites({crowding:kfit}, combine=False)
    
    ijofit.optimize()
    
    mu_measi = df_meas[ df_meas.substrate==csrc].growth_rate_1_h.iloc[0]
    mu_fiti = ijofit.reactions.BIOMASS_Ecoli_core_w_GAM.x
    
    # Get unfit
    for rxn in ijofit.metabolites.crowding.reactions:
        rxn._metabolites[crowding] = a0
    ijofit.optimize()
    mu_unfiti = ijofit.reactions.BIOMASS_Ecoli_core_w_GAM.x
    err0= 100*(mu_unfiti-mu_measi)/mu_measi
    err = 100*(mu_fiti - mu_measi)/mu_measi
    derr= 100*(abs(err)-abs(err0))/abs(err0)
    perrs.append(err)
    print('Cond=%s. mu_meas=%g. mu_sim=%g (unfit=%g, error=%.3g%%). Error=%.3g%% (%.3g%% change)' % (
        csrc, mu_measi, mu_fiti, mu_unfiti, err0, err, derr))
    for i,row in df_condi.iterrows():
        rid = row['rxn']
        rxn = ijofit.reactions.get_by_id(rid)        
        print('\t%s uptake=%g' % (rxn.id, rxn.x))
perrs = np.array(perrs)
print("Absolute percent error: %g%% -- %g%%"%(min(abs(perrs)), max(abs(perrs))))
print("Mean abs percent error = %g%%"%(np.mean(abs(perrs))))
print("Median abs percent error = %g%%"%(np.median(abs(perrs))))

Changed keffs: 38/48
Cond=glucose. mu_meas=0.74. mu_sim=0.731132 (unfit=0.873922, error=18.1%). Error=-1.2% (-93.4% change)
	EX_glc__D_e uptake=-10
	EX_fru_e uptake=0
	EX_succ_e uptake=0
	EX_mal__L_e uptake=0
	EX_ac_e uptake=0
Cond=fructose. mu_meas=0.55. mu_sim=0.764636 (unfit=0.873922, error=58.9%). Error=39% (-33.7% change)
	EX_glc__D_e uptake=0
	EX_fru_e uptake=-10
	EX_succ_e uptake=0
	EX_mal__L_e uptake=0
	EX_ac_e uptake=0
Cond=succinate. mu_meas=0.46. mu_sim=0.397563 (unfit=0.397563, error=-13.6%). Error=-13.6% (-4.03e-09% change)
	EX_glc__D_e uptake=0
	EX_fru_e uptake=0
	EX_succ_e uptake=-10
	EX_mal__L_e uptake=0
	EX_ac_e uptake=0
Cond=malate. mu_meas=0.44. mu_sim=0.370741 (unfit=0.370741, error=-15.7%). Error=-15.7% (0% change)
	EX_glc__D_e uptake=0
	EX_fru_e uptake=0
	EX_succ_e uptake=0
	EX_mal__L_e uptake=-10
	EX_ac_e uptake=0
Cond=acetate. mu_meas=0.256. mu_sim=0.286242 (unfit=0.389313, error=52.1%). Error=11.8% (-77.3% change)
	EX_glc__D_e uptake=0
	EX_fru_e uptake=0
	EX_su

## Total error

In [32]:
y_sames = []
for y in sub._ys:
    sub1 = sub_dict['acetate']
    sub2 = sub_dict['glucose']
    y1 = sub1.x_dict[y.VarName]
    y2 = sub2.x_dict[y.VarName]
    y_sames.append(abs(y1-y2)<1e-10)
    print('%12.10s%12.10s%12.10s%12.10s'%(y.VarName,y1,y2, abs(y1-y2)<1e-10))    

  binary_G6P         1.0         1.0        True
  binary_G6P         0.0         0.0        True
  binary_G6P         0.0         0.0        True
  binary_AKG         1.0         1.0        True
  binary_AKG         1.0         1.0        True
  binary_AKG         0.0         0.0        True
  binary_ACK         1.0         1.0        True
  binary_ACK         0.0         0.0        True
  binary_ACK         0.0         0.0        True
  binary_PGK         1.0         1.0        True
  binary_PGK         0.0         0.0        True
  binary_PGK         1.0         1.0        True
  binary_GLU         0.0         0.0        True
  binary_GLU         0.0         0.0        True
  binary_GLU         1.0         1.0        True
  binary_ME1         1.0         1.0        True
  binary_ME1         0.0         0.0        True
  binary_ME1         1.0         1.0        True
  binary_GND         1.0         1.0        True
  binary_GND         1.0         1.0        True
  binary_GND        

In [33]:
print('num same = %s/%s'%(sum(y_sames), len(y_sames)))

num same = 144/144
