# Toy Radix Lagrangean decomposition validation.
# Guaranteed primal recovery via branch and bound.

- Obviously, full space solution still gets a better incumbent if solved to small enough gap.
- LB for LP relaxation is the same for decomposition, obviously.
- Decomposition gets better integer LB sooner
- Incorporate branching strategies and heuristics from full space approach to recover better feasible primal.

In [1]:
#OBJECTIVE    = 'minsse'
OBJECTIVE    = 'minerr'
REG_WEIGHT   = 0. #1e-4
MAX_NONZERO  = None #48*2
PREVENT_ZERO = True
POWERS       = [-1,0,1]
TWO_PHASE    = True
SIMPLE_MM    = True

if SIMPLE_MM:
    BASE_MODEL_FILE = '/home/laurence/ME/models/e_coli_core_mm_simple.json'
else:
    BASE_MODEL_FILE = '/home/laurence/ME/models/e_coli_core_mm.json'

In [2]:
BASE_MODEL_FILE = '/home/laurence/ME/models/e_coli_core_pc.json'

In [3]:
%load_ext line_profiler

In [4]:
from gurobipy import *

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['svg.fonttype'] = 'none'
#pd.set_option('display.max_colwidth', -1)
%matplotlib inline

from cobra.io import load_json_model
from six import iteritems
import numpy as np
import cobra

In [5]:
ijomc = load_json_model(BASE_MODEL_FILE)

In [6]:
ijomc.optimize()
mu_crowd0 = ijomc.reactions.BIOMASS_Ecoli_core_w_GAM.x
print(mu_crowd0)

0.873921506968


In [7]:
df_meas = pd.read_csv('/home/laurence/ME/data/dynamicME/beg/growth_meas.csv')

ex_rxns = [r for r in df_meas.ex_rxn.unique() if ijomc.reactions.has_id(r)]
df_meas = df_meas[ df_meas.ex_rxn.isin(ex_rxns)]
conds = df_meas.substrate.unique()

#N_CONDS = len(conds)
#N_CONDS = ['succinate','malate']
#N_CONDS = ['glucose','acetate']
N_CONDS = ['glucose','acetate','fructose']

df_conds = pd.DataFrame([{'cond':r['substrate'], 'rxn':ex_rxn, 'lb':-10 if r['ex_rxn']==ex_rxn else 0, 'ub':1000., 'obj':0.} for i,r in df_meas.iterrows() for ex_rxn in ex_rxns])

if hasattr(N_CONDS,'__iter__'):
    df_conds = df_conds[ df_conds.cond.isin(N_CONDS)]
    N_CONDS = len(N_CONDS)
else:
    if N_CONDS==1:
        df_conds = df_conds[ df_conds.cond=='acetate']
    elif N_CONDS<=3:
        df_conds = df_conds[ df_conds.cond.isin(['glucose','acetate','succinate'][0:N_CONDS])]
    else:
        df_conds = df_conds[ df_conds.cond.isin(conds[0:N_CONDS])]

df_conds.loc[ (df_conds.cond=='acetate') & (df_conds.rxn=='EX_ac_e'), 'lb'] = -20


In [8]:
def sol_to_kdict(sol_final):
    var_cons_dict = estk.var_cons_dict
    powers = estk.powers
    digits = estk.digits
    radix  = estk.radix

    kfit_dict = {}
    for group_id, var_dict in iteritems(var_cons_dict):
        var = var_dict[0]
        cons = var_dict[1]
        a0  = var_dict[0][2]
        kfit = 0.
        for l,pwr in enumerate(powers):
            for k,digit in enumerate(digits):            
                yid = 'binary_%s%s%s' % (group_id,k,l)
                y   = sol_final[yid]
                kfit += y*a0*radix**pwr*digit
        kfit_dict[group_id] = kfit

    kfit_changed = [(k,v, abs(v-a0)/a0) for k,v in iteritems(kfit_dict) if abs(v-a0)/a0>1e-6]
    print('Changed keffs: %d/%d' % (len(kfit_changed), len(var_cons_dict)))
    
    return kfit_dict

def compute_perf(kfit_dict, ijofit):
    #----------------------------------------
    # Starting from basal model
    perrs = []
    errs_fit = []
    errs_unfit = []
    csrcs = df_conds.cond.unique()
    rows = []
    for csrc in csrcs:        
        crowding = ijofit.metabolites.crowding
        #--------------------------------------------------
        # Constrain to medium
        df_condi = df_conds[ df_conds.cond==csrc]    
        for i,row in df_condi.iterrows():
            rid = row['rxn']
            rxn = ijofit.reactions.get_by_id(rid)
            rxn.lower_bound = row['lb']
            rxn.upper_bound = row['ub']
        # Get unfit growth rate first    
        ijofit.optimize(solver='gurobi')
        mu_unfiti = ijofit.reactions.BIOMASS_Ecoli_core_w_GAM.x

        #--------------------------------------------------
        # Fitted
        for rid,kfit in iteritems(kfit_dict):
            rxn = ijofit.reactions.get_by_id(rid)
            rxn.add_metabolites({crowding:kfit}, combine=False)

        ijofit.optimize(solver='gurobi')

        mu_measi = df_meas[ df_meas.substrate==csrc].growth_rate_1_h.iloc[0]
        mu_fiti = ijofit.reactions.BIOMASS_Ecoli_core_w_GAM.x

        err0= 100*(mu_unfiti-mu_measi)/mu_measi
        err = 100*(mu_fiti - mu_measi)/mu_measi
        derr= 100*(abs(err)-abs(err0))/abs(err0)
        perrs.append(err)
        errs_unfit.append(mu_unfiti - mu_measi)
        errs_fit.append(mu_fiti - mu_measi)
        print('Cond=%s. mu_meas=%g. mu_sim=%g (unfit=%g, error=%.3g%%). Error=%.3g%% (%.3g%% change)' % (
            csrc, mu_measi, mu_fiti, mu_unfiti, err0, err, derr))
        for i,row in df_condi.iterrows():
            rid = row['rxn']
            rxn = ijofit.reactions.get_by_id(rid)        
            print('\t%s uptake=%g' % (rxn.id, rxn.x))

        rows.append({'substrate':csrc, 'mu_fit':mu_fiti, 'mu_unfit':mu_unfiti, 'mu_meas':mu_measi})

    perrs = np.array(perrs)
    errs_fit = np.array(errs_fit)
    errs_unfit = np.array(errs_unfit)
    tot_err = sum(abs(errs_fit))
    tot_err0 = sum(abs(errs_unfit))
    mape = np.mean(abs(perrs))
    mdape = np.median(abs(perrs))
    print("Absolute total error: %g"%(tot_err))
    print("Total error change: %g%%"%( 100*(tot_err-tot_err0)/tot_err0  ))
    print("Absolute percent error: %g%% -- %g%%"%(min(abs(perrs)), max(abs(perrs))))
    print("Mean abs percent error = %g%%"%(mape))
    print("Median abs percent error = %g%%"%(mdape))
    
    f_change = (tot_err-tot_err0)/tot_err0
    perf_dict = {'tot_err':tot_err, 'err_change':f_change, 'mape':mape, 'mdape':mdape}    
    df_results = pd.DataFrame(rows)
    
    return perf_dict, df_results

# Make radix problem

In [9]:
from dynamicme import decomposition


In [10]:
from dynamicme.estimate import RadixEstimator

In [11]:
df_Y = df_meas.rename(columns={'growth_rate_1_h':'output', 'substrate':'cond'})
df_Y.loc[:,'output_id'] = 'BIOMASS_Ecoli_core_w_GAM'
df_X = df_conds

# Each Lagrange submodel should start with the optimum

In [12]:
from dynamicme.decomposition.LagrangeMaster import LagrangeMaster
from dynamicme.decomposition.LagrangeSubmodel import LagrangeSubmodel

In [13]:
conds = df_conds.cond.unique()
sub_dict = {}
for cond in conds:
    df_Xk = df_X[ df_X.cond==cond]
    df_Yk = df_Y[ df_Y.cond==cond]
    estk = RadixEstimator()
    if SIMPLE_MM:
        cons_var_pairs = 'crowding'
    else:
        cons_var_pairs = [(cons, rxn) for cons in ijomc.metabolites.query('^enz_cap') for rxn in cons.reactions if rxn.id==cons.id.replace('enz_cap','e')]
    estk.fit(ijomc, df_Xk, df_Yk, objective=OBJECTIVE, 
             fit_constraint_id = cons_var_pairs,
             reg_weight = REG_WEIGHT, max_nonzero_binaries=MAX_NONZERO, optimize=False, powers=POWERS)
    sub = LagrangeSubmodel(estk.stacker.model, cond, Q=estk.stacker.Q)    
    sub_dict[cond] = sub

Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1
Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1
Changed value of parameter OutputFlag to 1
   Prev: 0  Min: 0  Max: 1  Default: 1


In [14]:
master = LagrangeMaster(estk.stacker.model)
master.add_submodels(sub_dict)
master.covered_dict = estk.covered_dict

In [15]:
for sub in sub_dict.values():
    sub.model.Params.MIPGapAbs = 1e-3
    sub.model.Params.MIPGap    = 1e-3

# B&B to recover primal

In [None]:
from dynamicme.decomposition.LagrangeBB import LagrangeBB

In [None]:
master.time_limit = 2*3600
#master.max_iter = 500
master.verbosity = 1
master.print_iter = 5
# master.gaptol = 0.02
master.gaptol = 1e-3
master.absgaptol = 1e-3
master.delta_min  = 1e-10
master.delta_mult = 0.5
#master.bundle_mult = 0.1
#master.model.Params.NumericFocus = 3   # the QP eventually runs into numerical issues

heuristics = ['reduced_cost','parsimonious','best_rounding']

lagrangeBB = LagrangeBB(master, heuristics)
sol_master = lagrangeBB.optimize(two_phase=True, only_relaxed=False, feasible_methods=[])
df_phase2 = pd.DataFrame(master.log_rows)

0         . NODES TO EXPLORE: 1
    Iter                    UB                    LB       gap relgap(%)   penalty                       Time(s)
  ------   -------------------   -------------------  -------- ---------  -------- -----------------------------
               Dual   Feasible        Sub       Best                                   total    master       sub
       0       1000     1e+100          0          0      1000       100         0  0.067074  0.002652  0.051841
       5      0.389     1e+100     -16.59          0     0.389       100     0.178  0.240132  0.001234  0.033139
      10      0.493     1e+100     -92.75          0     0.493       100     0.228  0.424330  0.002034  0.029603
      15     0.2446     1e+100     -386.7          0    0.2446       100     0.102  0.580271  0.002876  0.029099
      20     0.1057     1e+100      -1561          0    0.1057       100    0.0325  0.740524  0.004740  0.027673
      25    0.04335     1e+100      -1538          0   0.04335  



     127  0.0002135     1e+100    -0.3392          0         0         0  1.84e-13  7.412101  0.080729  0.040247
    Iter                    UB                    LB       gap relgap(%)   penalty                       Time(s)
  ------   -------------------   -------------------  -------- ---------  -------- -----------------------------
               Dual   Feasible        Sub       Best                                   total    master       sub
       0 -8.106e-05     1e+100   -0.03789   -0.03789   0.03781 4.664e+04  2.95e-05  1.373549  0.248305  1.105695
       5 -0.0001053     1e+100  -0.007994  -0.005423  0.005318      5049  5.37e-07  6.505131  0.244554  0.459359
       9 -1.882e-05     1e+100 -0.0004033 -0.0004033         0         0  4.75e-10  9.412105  0.240757  0.143959
0 NODES DROPPED BY PRUNING: z > 0.192654
BRANCHING ON y[0]: binary_G6PDH2r00
1         . NODES TO EXPLORE: 2
    Iter                    UB                    LB       gap relgap(%)   penalty                  

## Feasible?

In [None]:
lagrangeBB.tree.nodes

In [None]:
for sub in sub_dict.values():
    print(sub.yopt)

## Check best feasible solution

In [None]:
if sol_master:
    sol_final = sol_master.copy()

In [None]:
mdl0 = load_json_model(BASE_MODEL_FILE)
kd = sol_to_kdict(sol_master)
perf, df_results = compute_perf(kd, mdl0)

In [None]:
from scipy import stats

dsplot = df_results.melt(['substrate','mu_meas'])
dsplot.loc[dsplot.variable=='mu_fit','model'] = 'Fit'
dsplot.loc[dsplot.variable=='mu_unfit','model'] = 'Unfit'
df_perf = dsplot.groupby('variable').apply(lambda x: pd.Series({
    'mdape':100*np.median(abs(x['value']-x['mu_meas'])/abs(x['mu_meas'])),
    'sse':sum( (x['value']-x['mu_meas'])**2 ),
    'rho':stats.spearmanr(x['mu_meas'],x['value'])[0],
    'r':stats.pearsonr(x['mu_meas'],x['value'])[0]
}))

dsplot.variable = dsplot.variable.astype('category')
dsplot.model = dsplot.model.astype('category')
dsplot.variable.cat.reorder_categories(['mu_unfit','mu_fit'], inplace=True)
g = sns.FacetGrid(dsplot, col='variable', hue='model', size=4)
g.map(plt.plot, 'mu_meas','value', linestyle='None', marker='o', markeredgecolor='#000000', lw=0.2, markersize=8, alpha=0.75)
for ax in g.axes.flat:
    ax.plot([0,2],[0,2], zorder=1, color='#333333')
    mdl  = ax.get_title().replace('variable = ','')
    mdape= df_perf.loc[mdl].mdape
    sse  = df_perf.loc[mdl].sse
    r    = df_perf.loc[mdl].r
    rho  = df_perf.loc[mdl].rho    
    ax.set_title('%s (MdAPE=%.3g%%, SSE=%.3g)\n(r=%.3g, rho=%.3g)'%(mdl,mdape, sse, r,rho))
g.set_xlabels('Measured growth rate (h^{-1})')
g.set_ylabels('Simulated growth rate (h^{-1})')
g.add_legend()