# Load libraries

In [4]:
from dingo import MetabolicNetwork
import numpy as np

import sys
sys.path.insert(0, '../scripts/')
from load_modify_sample_utils import load_model, get_objective_functions, get_reaction_bounds, modify_model, sample_gapsplit, plot_grid_95_reactions
from load_modify_sample_utils import sample_dingo, sample_optgp, sampling_statistics
from graphs_utils import construct_graph, plot_graph
from distributions_comparison_utils import significantly_altered_reactions
from loopless_utils import get_loopless_solutions_from_samples, calculate_affected_samples, set_bounds_from_loopless_solution_samples
from loopless_utils import calculate_distances_from_samples, calculate_distances_from_reactions, violin_plot_samples_distances

Add constraints and variables to the modelâ€™s mathematical problem.

Useful for variables and constraints that can not be expressed with reactions and simple lower and upper bounds.

Additions are reversed upon exit if the model itself is used as context.

# Load and Modify the model to get optimal percentage 0% with biomass as the objective function

In [21]:

ec_cobra_model, ec_cobra_reactions, ec_dingo_model, ec_dingo_reactions = load_model("../ext_data/models/e_coli_core.xml")

objective_functions = get_objective_functions(ec_cobra_model)
print(objective_functions)

default_reaction_bounds = get_reaction_bounds(ec_cobra_model)
print(default_reaction_bounds)

ec_cobra_model_biomass_obj_0_opt, ec_dingo_model_biomass_obj_0_opt = modify_model(ec_cobra_model, objective_function="BIOMASS_Ecoli_core_w_GAM", optimal_percentage=0)


['BIOMASS_Ecoli_core_w_GAM']
{'PFK': (0.0, 1000.0), 'PFL': (0.0, 1000.0), 'PGI': (-1000.0, 1000.0), 'PGK': (-1000.0, 1000.0), 'PGL': (0.0, 1000.0), 'ACALD': (-1000.0, 1000.0), 'AKGt2r': (-1000.0, 1000.0), 'PGM': (-1000.0, 1000.0), 'PIt2r': (-1000.0, 1000.0), 'ALCD2x': (-1000.0, 1000.0), 'ACALDt': (-1000.0, 1000.0), 'ACKr': (-1000.0, 1000.0), 'PPC': (0.0, 1000.0), 'ACONTa': (-1000.0, 1000.0), 'ACONTb': (-1000.0, 1000.0), 'ATPM': (8.39, 1000.0), 'PPCK': (0.0, 1000.0), 'ACt2r': (-1000.0, 1000.0), 'PPS': (0.0, 1000.0), 'ADK1': (-1000.0, 1000.0), 'AKGDH': (0.0, 1000.0), 'ATPS4r': (-1000.0, 1000.0), 'PTAr': (-1000.0, 1000.0), 'PYK': (0.0, 1000.0), 'BIOMASS_Ecoli_core_w_GAM': (0.0, 1000.0), 'PYRt2': (-1000.0, 1000.0), 'CO2t': (-1000.0, 1000.0), 'RPE': (-1000.0, 1000.0), 'CS': (0.0, 1000.0), 'RPI': (-1000.0, 1000.0), 'SUCCt2_2': (0.0, 1000.0), 'CYTBD': (0.0, 1000.0), 'D_LACt2': (-1000.0, 1000.0), 'ENO': (-1000.0, 1000.0), 'SUCCt3': (0.0, 1000.0), 'ETOHt2r': (-1000.0, 1000.0), 'SUCDi': (0.0, 10


# Firstly sample a default (loopy) model to have the Loopy Samples as control for loopy distances 


In [22]:

samples_dingo_default = sample_dingo(ec_dingo_model_biomass_obj_0_opt, 
                                     reaction_in_rows = True, 
                                     ess=2000)


Set parameter Username
Set parameter LicenseID to value 2642044
Academic license - for non-commercial use only - expires 2026-03-25
phase 1: number of correlated samples = 500, effective sample size = 11, ratio of the maximum singilar value over the minimum singular value = 1078.31
phase 2: number of correlated samples = 500, effective sample size = 117, ratio of the maximum singilar value over the minimum singular value = 2.12516
phase 3: number of correlated samples = 2400, effective sample size = 1031
phase 4: number of correlated samples = 2300, effective sample size = 912
[5]total ess 2071: number of correlated samples = 5700




[5]maximum marginal PSRF: 1.00204


In [23]:

samples_dingo_default_loopless_solutions = get_loopless_solutions_from_samples(samples_dingo_default, ec_cobra_model_biomass_obj_0_opt)




0.025743780269525507
0.025203957184659176
0.011159023969419035
0.04947571325979527
0.005001239809718228
0.03761561261089741
0.06702013517820526
0.0849477076303915
0.00895707117259581
0.036413051719075606
0.031531424469063984
0.007656036145861189
0.006437541905496946
0.024519532480179018
0.04407472833113468
0.06312059611919896
0.012272264279518774
0.03643448830973328
0.04012124189926891
0.12771630601349104
0.015301167011361638
0.09153324544335206
0.0661249940254588
0.017921494581063846
0.011794924497033676
0.0069921340999902215
0.022747130220280853
0.0864149587054297
0.05558066839368525
0.024674080418681688
0.02064044065350823
0.05936059839709551
0.025174695118946655
0.031241675338856578
0.025412394324818677
0.08540651163553425
0.02286029293381331
0.017757559613022833
0.0937818218425247
0.13944924899767197
0.005491437802671251
0.003111555626700069
0.04586377154370391
0.07093184354193394
0.0006468673052677509
0.10772357386277598
0.016148054660285016
0.001474663411826338
0.019943444012287

In [25]:
frd7 = ec_cobra_reactions.index("FRD7")
print(frd7)
np.mean(samples_dingo_default_loopless_solutions[67])

67


0.0002196159929228532

In [9]:

distances_array = calculate_distances_from_reactions(samples_dingo_default, samples_dingo_default_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)


distances_array = calculate_distances_from_samples(samples_dingo_default, samples_dingo_default_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)


In [16]:
distances_array = calculate_distances_from_reactions(samples_dingo_default, samples_dingo_default_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)

avg = np.mean(distances_array)
std = np.std(distances_array)
tol = avg + 1*std

print(avg, std, tol)

indices = np.where(distances_array >= tol)[0]
reactions_in_loops = [ec_cobra_reactions[index] for index in indices]
print(reactions_in_loops)


1306.8428585585177 6100.985305823758 7407.828164382276
[<Reaction SUCDi at 0x78be66cdeb00>, <Reaction FRD7 at 0x78be65e66950>]


# Secondly sample a modified (loopless) model to have the LoopLess Samples as control for loopless distances 

In [4]:

modified_cobra_model_frd7_removal = ec_cobra_model_biomass_obj_0_opt.copy()

modified_cobra_model_frd7_removal.reactions.get_by_id("FRD7").lower_bound = 0
modified_cobra_model_frd7_removal.reactions.get_by_id("FRD7").upper_bound = 0


modified_dingo_model_frd7_removal = MetabolicNetwork.from_cobra_model(modified_cobra_model_frd7_removal)
samples_modified_dingo_model_frd7_removal = sample_dingo(modified_dingo_model_frd7_removal, reaction_in_rows = True, ess=2000)


Read LP format model from file /tmp/tmpll0wdalq.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros
Set parameter Username
Set parameter LicenseID to value 2642044
Academic license - for non-commercial use only - expires 2026-03-25
phase 1: number of correlated samples = 500, effective sample size = 47, ratio of the maximum singilar value over the minimum singular value = 418.239
phase 2: number of correlated samples = 500, effective sample size = 154, ratio of the maximum singilar value over the minimum singular value = 2.18439
phase 3: number of correlated samples = 2300, effective sample size = 767
phase 4: number of correlated samples = 2300, effective sample size = 781
phase 5: number of correlated samples = 2300, effective sample size = 752
[5]total ess 2501: number of correlated samples = 7900




[5]maximum marginal PSRF: 1.00139


In [7]:

samples_modified_dingo_model_frd7_removal_loopless_solutions = get_loopless_solutions_from_samples(samples_modified_dingo_model_frd7_removal, ec_cobra_model_biomass_obj_0_opt)




In [24]:

distances_array = calculate_distances_from_reactions(samples_modified_dingo_model_frd7_removal, samples_modified_dingo_model_frd7_removal_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)

indices = np.where(distances_array >= 0.004)[0]
reactions_in_loops = [ec_cobra_reactions[index] for index in indices]
print(reactions_in_loops)

distances_array = calculate_distances_from_samples(samples_modified_dingo_model_frd7_removal, samples_modified_dingo_model_frd7_removal_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)


[<Reaction PPC at 0x7cacdb991cc0>, <Reaction PYK at 0x7cacdb993fa0>, <Reaction MDH at 0x7cace0629c90>, <Reaction ME1 at 0x7cace0629cc0>]


# LoopLess Sampling Approach no.1: gapsplit

In [3]:

samples_gapsplit = sample_gapsplit(ec_cobra_model_biomass_obj_0_opt, 
                                   n_samples = 3000, 
                                   reaction_in_rows = True, 
                                   add_loopless_cobrapy = False, 
                                   fraction_of_optimum=0)



Read LP format model from file /tmp/tmpujnwxi5v.lp
Reading time = 0.01 seconds
: 72 rows, 190 columns, 720 nonzeros
Calculating feasible ranges using FVA.
Targeting 87/95 unblocked primary variables.
Targeting 4 secondary variables.

   Sample   Coverage   MinGap   Median   MaxGap     Elapsed     Remaining   Infeasible
 300/3000     94.60%   0.0144   0.0497   0.1379       12.15        109.34            0
 600/3000     97.14%   0.0072   0.0251   0.1099       25.25        101.00            0
 900/3000     98.04%   0.0057   0.0185   0.0553       38.07         88.83            0
1200/3000     98.52%   0.0045   0.0133   0.0490       53.31         79.97            0
1500/3000     98.83%   0.0035   0.0116   0.0282       65.87         65.87            0
1800/3000     99.01%   0.0027   0.0087   0.0276       80.02         53.35            0
2100/3000     99.15%   0.0023   0.0079   0.0268       94.56         40.52            0
2400/3000     99.26%   0.0020   0.0071   0.0183      112.46         28

In [4]:

samples_gapsplit_loopless_solutions = get_loopless_solutions_from_samples(samples_gapsplit, ec_cobra_model_biomass_obj_0_opt)




In [5]:

distances_array = calculate_distances_from_reactions(samples_gapsplit, samples_gapsplit_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)


distances_array = calculate_distances_from_samples(samples_gapsplit, samples_gapsplit_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)


# LoopLess Sampling Approach no.2: add_loopless + gapsplit

In [6]:

samples_add_loopless_gapsplit = sample_gapsplit(ec_cobra_model_biomass_obj_0_opt, 
                                   n_samples = 3000, 
                                   reaction_in_rows = True, 
                                   add_loopless_cobrapy = True, 
                                   fraction_of_optimum=0)


Read LP format model from file /tmp/tmp063ao1b3.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros
Calculating feasible ranges using FVA.
Targeting 87/95 unblocked primary variables.
Targeting 4 secondary variables.

   Sample   Coverage   MinGap   Median   MaxGap     Elapsed     Remaining   Infeasible
 300/3000     94.56%   0.0138   0.0540   0.1390       41.36        372.28            0
 600/3000     97.18%   0.0079   0.0291   0.0701       80.32        321.28            0
 900/3000     98.07%   0.0050   0.0187   0.0469      121.39        283.25            0
1200/3000     98.59%   0.0036   0.0144   0.0349      159.50        239.25            0
1500/3000     98.81%   0.0033   0.0121   0.0347      204.13        204.13            0
1800/3000     98.99%   0.0028   0.0096   0.0248      247.25        164.83            0
2100/3000     99.15%   0.0024   0.0083   0.0218      290.59        124.54            0
2400/3000     99.27%   0.0022   0.0073   0.0177      334.28         83

In [7]:

samples_add_loopless_gapsplit_loopless_solutions = get_loopless_solutions_from_samples(samples_add_loopless_gapsplit, ec_cobra_model_biomass_obj_0_opt)



Solver status is 'infeasible'.



In [9]:

distances_array = calculate_distances_from_reactions(samples_add_loopless_gapsplit, samples_add_loopless_gapsplit_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)


distances_array = calculate_distances_from_samples(samples_add_loopless_gapsplit, samples_add_loopless_gapsplit_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)


# LoopLess Sampling Approach no.3: bounds from loopless_solutions

In [3]:

samples_dingo = sample_dingo(ec_dingo_model_biomass_obj_0_opt, 
                             reaction_in_rows = True, 
                             ess=3000)

samples_optgp = sample_optgp(ec_cobra_model_biomass_obj_0_opt, 
                             n_samples = 3000, 
                             thinning = 100, 
                             reaction_in_rows = True)


Set parameter Username
Set parameter LicenseID to value 2642044
Academic license - for non-commercial use only - expires 2026-03-25
phase 1: number of correlated samples = 500, effective sample size = 20, ratio of the maximum singilar value over the minimum singular value = 1043.6
phase 2: number of correlated samples = 500, effective sample size = 120, ratio of the maximum singilar value over the minimum singular value = 2.06053
phase 3: number of correlated samples = 2400, effective sample size = 1056
phase 4: number of correlated samples = 2400, effective sample size = 1028
phase 5: number of correlated samples = 2400, effective sample size = 992
[5]total ess 3216: number of correlated samples = 8200




[5]maximum marginal PSRF: 1.0003


In [4]:

samples_dingo_loopless_solutions = get_loopless_solutions_from_samples(samples_dingo, ec_cobra_model_biomass_obj_0_opt)

#samples_optgp_loopless_solutions = get_loopless_solutions_from_samples(samples_optgp, ec_cobra_model_biomass_obj_0_opt)




In [5]:

loopless_solutions_bounds_ec_cobra_model_biomass_obj_0_opt = set_bounds_from_loopless_solution_samples(samples_dingo_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)

loopless_solutions_bounds_ec_dingo_model_biomass_obj_0_opt = MetabolicNetwork.from_cobra_model(loopless_solutions_bounds_ec_cobra_model_biomass_obj_0_opt)

samples_dingo_loopless_solutions_constrained_bounds = sample_dingo(loopless_solutions_bounds_ec_dingo_model_biomass_obj_0_opt, 
                                                  reaction_in_rows = True, 
                                                  ess=3000)



#loopless_solutions_bounds_ec_cobra_model_biomass_obj_0_opt = set_bounds_from_loopless_solution_samples(samples_optgp_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)

#samples_optgp_loopless_solutions_constrained_bounds = sample_optgp(loopless_solutions_bounds_ec_cobra_model_biomass_obj_0_opt, 
#                             n_samples = 3000, 
#                             thinning = 100, 
#                             reaction_in_rows = True)


Read LP format model from file /tmp/tmpit_2hrgj.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros


phase 1: number of correlated samples = 400, effective sample size = 2, ratio of the maximum singilar value over the minimum singular value = 1650.34
phase 2: number of correlated samples = 400, effective sample size = 2, ratio of the maximum singilar value over the minimum singular value = 1487.62
phase 3: number of correlated samples = 400, effective sample size = 64, ratio of the maximum singilar value over the minimum singular value = 40.5631
phase 4: number of correlated samples = 400, effective sample size = 146, ratio of the maximum singilar value over the minimum singular value = 2.67891
phase 5: number of correlated samples = 1800, effective sample size = 622
phase 6: number of correlated samples = 1800, effective sample size = 954
phase 7: number of correlated samples = 1800, effective sample size = 971
phase 8: number of correlated samples = 600, effective sample size = 267
[5]total ess 3028: number of correlated samples = 7600




[5]maximum marginal PSRF: 1.08475


In [6]:

samples_dingo_loopless_solutions_constrained_bounds_loopless_solutions = get_loopless_solutions_from_samples(samples_dingo_loopless_solutions_constrained_bounds, loopless_solutions_bounds_ec_cobra_model_biomass_obj_0_opt)

#samples_optgp_loopless_solutions_constrained_bounds_loopless_solutions = get_loopless_solutions_from_samples(samples_optgp_loopless_solutions_constrained_bounds, loopless_solutions_bounds_ec_cobra_model_biomass_obj_0_opt)




In [7]:

distances_array = calculate_distances_from_reactions(samples_dingo_loopless_solutions_constrained_bounds, samples_dingo_loopless_solutions_constrained_bounds_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)

distances_array = calculate_distances_from_samples(samples_dingo_loopless_solutions_constrained_bounds, samples_dingo_loopless_solutions_constrained_bounds_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
violin_plot_samples_distances(distances_array)



#distances_array = calculate_distances_from_reactions(samples_optgp_loopless_solutions_constrained_bounds, samples_optgp_loopless_solutions_constrained_bounds_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
#violin_plot_samples_distances(distances_array)


#distances_array = calculate_distances_from_samples(samples_optgp_loopless_solutions_constrained_bounds, samples_optgp_loopless_solutions_constrained_bounds_loopless_solutions, ec_cobra_model_biomass_obj_0_opt)
#violin_plot_samples_distances(distances_array)
