### Load libraries

In [1]:
from scipy.spatial.distance import jensenshannon
import numpy as np
import sys

sys.path.insert(0, '../scripts/')
from load_modify_sample_utils import load_model, get_objective_functions, get_reaction_bounds, modify_model
from load_modify_sample_utils import sample_dingo, sample_optgp
from correlations_utils import compute_copula
from correlations_utils import plot_copula

### Load model

In [2]:
ec_cobra_model, ec_cobra_reactions, ec_dingo_model, ec_dingo_reactions = load_model("../ext_data/models/e_coli_core.xml")

objective_functions = get_objective_functions(ec_cobra_model)
print(objective_functions)

default_reaction_bounds = get_reaction_bounds(ec_cobra_model)
print(default_reaction_bounds)


Set parameter Username
Set parameter LicenseID to value 2634947
Academic license - for non-commercial use only - expires 2026-03-11
['BIOMASS_Ecoli_core_w_GAM']
{'PFK': (0.0, 1000.0), 'PFL': (0.0, 1000.0), 'PGI': (-1000.0, 1000.0), 'PGK': (-1000.0, 1000.0), 'PGL': (0.0, 1000.0), 'ACALD': (-1000.0, 1000.0), 'AKGt2r': (-1000.0, 1000.0), 'PGM': (-1000.0, 1000.0), 'PIt2r': (-1000.0, 1000.0), 'ALCD2x': (-1000.0, 1000.0), 'ACALDt': (-1000.0, 1000.0), 'ACKr': (-1000.0, 1000.0), 'PPC': (0.0, 1000.0), 'ACONTa': (-1000.0, 1000.0), 'ACONTb': (-1000.0, 1000.0), 'ATPM': (8.39, 1000.0), 'PPCK': (0.0, 1000.0), 'ACt2r': (-1000.0, 1000.0), 'PPS': (0.0, 1000.0), 'ADK1': (-1000.0, 1000.0), 'AKGDH': (0.0, 1000.0), 'ATPS4r': (-1000.0, 1000.0), 'PTAr': (-1000.0, 1000.0), 'PYK': (0.0, 1000.0), 'BIOMASS_Ecoli_core_w_GAM': (0.0, 1000.0), 'PYRt2': (-1000.0, 1000.0), 'CO2t': (-1000.0, 1000.0), 'RPE': (-1000.0, 1000.0), 'CS': (0.0, 1000.0), 'RPI': (-1000.0, 1000.0), 'SUCCt2_2': (0.0, 1000.0), 'CYTBD': (0.0, 1000.

### Modify model and perform sampling

In [3]:
_, ec_dingo_model_condition_100 = modify_model(ec_cobra_model, objective_function="BIOMASS_Ecoli_core_w_GAM", optimal_percentage=100)
_, ec_dingo_model_condition_0 = modify_model(ec_cobra_model, objective_function="BIOMASS_Ecoli_core_w_GAM", optimal_percentage=0)


samples_dingo_condition_100 = sample_dingo(ec_dingo_model_condition_100, reaction_in_rows = True, ess=2000)
print(samples_dingo_condition_100.shape)

samples_dingo_condition_0 = sample_dingo(ec_dingo_model_condition_0, reaction_in_rows = True, ess=2000)
print(samples_dingo_condition_0.shape)


Read LP format model from file /tmp/tmphsmp28dh.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros
Read LP format model from file /tmp/tmpc3drhwnh.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros
Set parameter Username
Set parameter LicenseID to value 2634947
Academic license - for non-commercial use only - expires 2026-03-11
phase 1: number of correlated samples = 500, effective sample size = 5, ratio of the maximum singilar value over the minimum singular value = 5070.45
phase 2: number of correlated samples = 500, effective sample size = 14, ratio of the maximum singilar value over the minimum singular value = 171.93
phase 3: number of correlated samples = 500, effective sample size = 3, ratio of the maximum singilar value over the minimum singular value = 239.065
phase 4: number of correlated samples = 500, effective sample size = 49, ratio of the maximum singilar value over the minimum singular value = 34.9643
phase 5: number of correlated sampl

[5]maximum marginal PSRF: 1.01349


phase 1: number of correlated samples = 500, effective sample size = 3, ratio of the maximum singilar value over the minimum singular value = 4645.94
phase 2: number of correlated samples = 500, effective sample size = 18, ratio of the maximum singilar value over the minimum singular value = 136.71
phase 3: number of correlated samples = 500, effective sample size = 22, ratio of the maximum singilar value over the minimum singular value = 140.524
phase 4: number of correlated samples = 500, effective sample size = 49, ratio of the maximum singilar value over the minimum singular value = 46.892
phase 5: number of correlated samples = 500, effective sample size = 142, ratio of the maximum singilar value over the minimum singular value = 2.56059
phase 6: number of correlated samples = 2400, effective sample size = 1135
phase 7: number of correlated samples = 1500, effective sample size = 757
[5]total ess 2126: number of correlated samples = 6400


(95, 6400)


[5]maximum marginal PSRF: 1.00704


Below a test function shows how the corresponding `copula_tail_dependence` works in `correlations_utils.py`. 

We can see which are the _cells_ (positions) that are selected in each edge of the copula.

In [5]:
import plotly.io as pio
pio.renderers.default = 'browser'

def copula_tail_dependence_test(copula, cop_coeff_1, cop_coeff_2, cop_coeff_3):

    rows, cols = copula.shape
    
    red_mass = 0
    blue_mass = 0
        
    top_left_count = 0
    bottom_right_count = 0
    top_right_count = 0
    bottom_left_count = 0
    
    top_left = []
    bottom_right = []
    top_right = []
    bottom_left = []
    
                    
    for row in range(rows):
        for col in range(cols):
            # values in the diagonal
            if ((row-col >= -cop_coeff_1*rows) & (row-col <= cop_coeff_1*rows)): 
                # values near the top left
                if (row+col < cop_coeff_2*rows -1):
                    red_mass = red_mass + copula[row][col]       
                    top_left_count = top_left_count + 1
                    top_left.append((row, col))
                    
                # values near the bottom right
                elif (row+col > cop_coeff_3*rows -1):
                    red_mass = red_mass + copula[row][col]       
                    bottom_right_count = bottom_right_count + 1
                    bottom_right.append((row, col))
            
            # values in the other diagonal
            else:
                # values near the top right and bottom left corner
                if (row+col >= cop_coeff_2*rows -1) & (row+col <= cop_coeff_3*rows -1):                    
                    # values near the top right
                    if row < rows / 2:
                        blue_mass = blue_mass + copula[row][col]       
                        top_right_count = top_right_count + 1
                        top_right.append((row, col))
                    
                    # values near the bottom left
                    elif row >= rows / 2:
                        blue_mass = blue_mass + copula[row][col]       
                        bottom_left_count = bottom_left_count + 1
                        bottom_left.append((row, col))
                                                

    print(top_left, "\n", bottom_right, "\n", top_right, "\n" , bottom_left)
    print(top_left_count, bottom_right_count, top_right_count, bottom_left_count)
    print(red_mass, blue_mass, red_mass/blue_mass)

    

# parameters for the width of the copula's diagonal
cop_coeff = 0.2
cop_coeff_1 = cop_coeff
cop_coeff_2 = 1 - cop_coeff
cop_coeff_3 = 1 + cop_coeff

n1 = ec_dingo_reactions.index("PGK")
n2 = ec_dingo_reactions.index("PFK")

flux1 = np.absolute(samples_dingo_condition_0[n1])    # do you need this in this case? 
flux2 = np.absolute(samples_dingo_condition_0[n2])
flux1 = samples_dingo_condition_0[n1]
flux2 = samples_dingo_condition_0[n2]

data_flux1=[flux1, ec_dingo_reactions[n1]]
data_flux2=[flux2, ec_dingo_reactions[n2]]


n = 10
plot_copula(data_flux1, data_flux2, n = n)

copula = compute_copula(flux1, flux2, n = n)
copula_tail_dependence_test(copula, cop_coeff_1, cop_coeff_2, cop_coeff_3)

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (3, 1), (3, 2), (3, 3), (4, 2)] 
 [(5, 7), (6, 6), (6, 7), (6, 8), (7, 5), (7, 6), (7, 7), (7, 8), (7, 9), (8, 6), (8, 7), (8, 8), (8, 9), (9, 7), (9, 8), (9, 9)] 
 [(0, 7), (0, 8), (0, 9), (1, 6), (1, 7), (1, 8), (1, 9), (2, 5), (2, 6), (2, 7), (2, 8), (2, 9), (3, 6), (3, 7), (3, 8), (4, 7)] 
 [(5, 2), (6, 1), (6, 2), (6, 3), (7, 0), (7, 1), (7, 2), (7, 3), (7, 4), (8, 0), (8, 1), (8, 2), (8, 3), (9, 0), (9, 1), (9, 2)]
16 16 16 16
0.06203124999999999 0.6773437500000001 0.09158016147635521


### Below arrays representing copulas are created to show how the jensenshannon distance works

Note: As long as you do not describe what we are looking at, it's hard to follow. 
Please describe the main conclusions from the `jensenshannon` experiments, as well as of the copulas above. 

It's not clear what you want to show in each case and/or why you are doing what you're doing.. 

In [6]:


cells = 5
reference_copula = np.full( (cells, cells), (1 / (cells*cells)) )
reference_copula_1_flat = reference_copula.flatten()


reference_copula_2 = np.array([[0.25, 0.25, 0, 0, 0],
                               [0.25, 0.25, 0, 0, 0],
                               [0, 0, 0,  0, 0],
                               [0, 0, 0,  0, 0],
                               [0, 0, 0, 0,  0]])


reference_copula_2_flat = reference_copula_2.flatten()


reference_copula_3 = np.array([[1, 0, 0, 0, 0],
                               [0, 0, 0, 0, 0],
                               [0, 0, 0,  0, 0],
                               [0, 0, 0,  0, 0],
                               [0, 0, 0, 0,  0]])

reference_copula_3_flat = reference_copula_3.flatten()


# Compute Jensen-Shannon distance (range: 0 to 1)
dist = jensenshannon(reference_copula_1_flat, reference_copula_1_flat)
print(dist)

dist = jensenshannon(reference_copula_1_flat, reference_copula_2_flat)
print(dist)

dist = jensenshannon(reference_copula_1_flat, reference_copula_3_flat)
print(dist)



0.0
0.6785698641197397
0.7799839057460577


In [7]:
import numpy as np
cells = 100

a = np.zeros((cells,cells))

b = a.copy()
b[0,0] = 1
b_flat = b.flatten()

c = a.copy()
c[:10, :10] = 0.01
c_flat = c.flatten()

reference_copula = np.full( (cells, cells), (1 / (cells*cells)) )
reference_copula_flat = reference_copula.flatten()

In [8]:
from scipy.spatial.distance import jensenshannon

dist = jensenshannon(b_flat, reference_copula_flat)
print(dist)

dist = jensenshannon(c_flat, reference_copula_flat)
print(dist)

0.8322479564657576
0.8155344336992489
