### Load libraries

In [1]:

import sys
sys.path.insert(0, '../src/')

from load_modify_sample_utils import load_model, get_objective_functions, get_reaction_bounds, modify_model
from load_modify_sample_utils import sample_dingo, sample_optgp

from correlations_utils import correlated_reactions, plot_correlation_matrix, split_forward_reverse
from correlations_utils import find_reactants_products, sharing_metabolites_square_matrix

from pathways_utils import sort_reactions_in_pathways_by_reactions_in_model_order, subset_sampling_df_from_reaction_ids
from pathways_utils import map_model_to_kegg_reactions, read_json_file, bigg_to_kegg_id, fill_missing_kegg_ids_in_dict
from pathways_utils import get_kegg_pathways_from_reaction_ids, subset_model_reactions_from_pathway_info


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


### Load model

In [2]:
ec_cobra_model, ec_cobra_reactions, ec_dingo_model, ec_dingo_reactions = load_model("../ext_data/models/e_coli_core.xml")

objective_functions = get_objective_functions(ec_cobra_model)
print(objective_functions)

default_reaction_bounds = get_reaction_bounds(ec_cobra_model)
print(default_reaction_bounds.get("BIOMASS_Ecoli_core_w_GAM"))

Set parameter Username
Set parameter LicenseID to value 2642044
Academic license - for non-commercial use only - expires 2026-03-25
['BIOMASS_Ecoli_core_w_GAM']
(0.0, 1000.0)


### Modify model and sample

In [3]:
ec_cobra_model_condition_100, ec_dingo_model_condition_100 = modify_model(ec_cobra_model, objective_function="BIOMASS_Ecoli_core_w_GAM", optimal_percentage=100)
ec_cobra_model_condition_0, ec_dingo_model_condition_0 = modify_model(ec_cobra_model, objective_function="BIOMASS_Ecoli_core_w_GAM", optimal_percentage=10)


#samples_dingo_condition_100 = sample_dingo(ec_dingo_model_condition_100, reaction_in_rows = True, ess=2000)
#print(samples_dingo_condition_100.shape)

#samples_dingo_condition_0 = sample_dingo(ec_dingo_model_condition_0, reaction_in_rows = True, ess=2000)
#print(samples_dingo_condition_0.shape)


samples_optgp_condition_100 = sample_optgp(ec_cobra_model_condition_100, n_samples = 3000, thinning=100, reaction_in_rows = True)
samples_optgp_condition_0 = sample_optgp(ec_cobra_model_condition_0, n_samples = 3000, thinning=100, reaction_in_rows = True)



Read LP format model from file /tmp/tmpw9ft__ik.lp
Reading time = 0.02 seconds
: 72 rows, 190 columns, 720 nonzeros


Read LP format model from file /tmp/tmpjkas_u51.lp
Reading time = 0.01 seconds
: 72 rows, 190 columns, 720 nonzeros
Read LP format model from file /tmp/tmpb0hn354j.lp
Reading time = 0.01 seconds
: 72 rows, 190 columns, 720 nonzeros
Read LP format model from file /tmp/tmppgq_pjnw.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros


### Map BiGG reactions to their corresponding KEGG pathways

In [4]:
initial_bigg_to_kegg_dictionary = map_model_to_kegg_reactions(ec_cobra_model)


reactions_json, reactions_pandas = read_json_file("../ext_data/reactions/reactions.json")


final_bigg_to_kegg_dictionary = fill_missing_kegg_ids_in_dict(initial_bigg_to_kegg_dictionary, reactions_pandas)


df_kegg_pathways = get_kegg_pathways_from_reaction_ids(final_bigg_to_kegg_dictionary)




### Subset reactions from specific pathways in the sampling dataframe

In [5]:

# Define pathways from KEGG
glycolysis = subset_model_reactions_from_pathway_info(df_kegg_pathways, "Glycolysis")
ppp = subset_model_reactions_from_pathway_info(df_kegg_pathways, "Pentose")


# Alternatively define pathways manually
#glycolysis = ["PGI", "PFK", "FBA", "TPI", "GAPD", "PGK", "PGM", "ENO", "PYK"]
#ppp = ["G6PDH2r", "PGL", "GND", "RPE", "RPI", "TKT1", "TKT2", "TALA"]


reactions_in_pathways_ordered_duplicates = sort_reactions_in_pathways_by_reactions_in_model_order(ec_dingo_reactions, glycolysis, ppp)

# Remove duplicates
reactions_in_pathways_ordered = []
[reactions_in_pathways_ordered.append(val) for val in reactions_in_pathways_ordered_duplicates if val not in reactions_in_pathways_ordered]


subset_pathways_optgp_condition_100 = subset_sampling_df_from_reaction_ids(
                                                                samples_optgp_condition_100, 
                                                                ec_dingo_reactions, 
                                                                subset_reactions = reactions_in_pathways_ordered)



### Split reactions having both positive and negative flux values (reversible reactions) into two separate reactions (forward and reverse).

In [6]:

#extended_steady_states, extended_reactions = split_forward_reverse(samples_optgp_condition_0, reactions=ec_dingo_reactions)
extended_steady_states, extended_reactions = split_forward_reverse(subset_pathways_optgp_condition_100, reactions=reactions_in_pathways_ordered)


reversibility_list_all_reactions, reactants_list_all_reactions, products_list_all_reactions = find_reactants_products(ec_cobra_model, extended_reactions)
print(len(reversibility_list_all_reactions), len(reactants_list_all_reactions))



21 21


### Calculate a boolean matrix with dimensions equal to the number of reactions (here, reactions after the forward and reverse split). "True" in a position of the 2D array represents 2 reactions with a sharing reactant or product. This is done to filter correlations between reactions that do not share metabolites

In [7]:

boolean_sharing_metabolites_matrix = sharing_metabolites_square_matrix(extended_reactions, reversibility_list_all_reactions, reactants_list_all_reactions, products_list_all_reactions)


print(boolean_sharing_metabolites_matrix)
print(boolean_sharing_metabolites_matrix.shape)


[[ True False False False False False False False False False False False
  False False False False False  True  True False False]
 [False  True  True False  True  True False False False False False False
  False False False False False False False  True False]
 [False  True  True False  True  True False False False False False False
  False False False False False False False  True False]
 [False False False  True False False False False False False False False
  False False False False False False False False  True]
 [False  True  True False  True  True False False False False False False
  False False  True False False False False False False]
 [False  True  True False  True  True False False False False False False
  False False  True False False False False False False]
 [False False False False False False  True  True False False False False
  False False False False False False False False False]
 [False False False False False False  True  True False False False False
  False F

### Compute pairwise linear and non-linear correlations from flux samples and filter (remove) correlations between reactions not sharing any metabolites

In [8]:


(linear_correlation_matrix, 
non_linear_correlation_matrix, 
mixed_correlation_matrix, 
correlations_dictionary) = correlated_reactions(
        steady_states = extended_steady_states,
        boolean_sharing_metabolites_matrix=boolean_sharing_metabolites_matrix,
        reactions=extended_reactions, 
        linear_corr_cutoff = 0.3, 
        indicator_cutoff = 1.2,
        jensenshannon_cutoff = 0.05,
        std_cutoff= 1e-2,
        include_non_linear = True, 
        cells = 5, 
        cop_coeff = 0.2, 
        lower_triangle = False, 
        verbose = True
)




Completed the process of 13 from 103 copulas
Completed the process of 14 from 103 copulas
Completed the process of 15 from 103 copulas
Completed the process of 16 from 103 copulas
Completed the process of 17 from 103 copulas
Completed the process of 18 from 103 copulas
Completed the process of 30 from 103 copulas
Completed the process of 31 from 103 copulas
Completed the process of 32 from 103 copulas
Completed the process of 33 from 103 copulas
Completed the process of 34 from 103 copulas
Completed the process of 35 from 103 copulas
Completed the process of 38 from 103 copulas
Completed the process of 42 from 103 copulas
Completed the process of 44 from 103 copulas
Completed the process of 47 from 103 copulas
Completed the process of 49 from 103 copulas
Completed the process of 52 from 103 copulas
Completed the process of 54 from 103 copulas
Completed the process of 57 from 103 copulas
Completed the process of 59 from 103 copulas
Completed the process of 62 from 103 copulas
Completed 

### Show some pairs of reactions with extreme non-linear dependencies

In [9]:

filtered_positive = {
    pair: metrics
    for pair, metrics in correlations_dictionary.items()
    if metrics['jensenshannon'] > 0.10 and metrics['indicator'] > 1.2
}

print(filtered_positive)
print(filtered_positive.keys())


filtered_negative = {
    pair: metrics
    for pair, metrics in correlations_dictionary.items()
    if metrics['jensenshannon'] < -0.10 and metrics['indicator'] < 1.2

}

print(filtered_negative)
print(filtered_negative.keys())


{'PYK~PGK': {'pearson': 0, 'jensenshannon': 0.1205120608688758, 'indicator': 1.558629773813057, 'classification': 'positive_upper_lower_tail'}, 'PYK~PGL': {'pearson': 0, 'jensenshannon': 0.12099520444165601, 'indicator': 1.5646437972375575, 'classification': 'positive_upper_lower_tail'}, 'PYK~PGM': {'pearson': 0, 'jensenshannon': 0.11982182093593541, 'indicator': 1.556578945171399, 'classification': 'positive_upper_lower_tail'}, 'RPE~PYK': {'pearson': 0, 'jensenshannon': 0.12072898313275361, 'indicator': 1.5667106978862195, 'classification': 'positive_upper_lower_tail'}, 'RPI_rev~PYK': {'pearson': 0, 'jensenshannon': 0.12099520444165604, 'indicator': 1.5646437972375578, 'classification': 'positive_upper_lower_tail'}, 'TKT1~PYK': {'pearson': 0, 'jensenshannon': 0.12089054595870595, 'indicator': 1.5667106978862195, 'classification': 'positive_upper_lower_tail'}, 'GND~PYK': {'pearson': 0, 'jensenshannon': 0.12099520444165604, 'indicator': 1.5646437972375578, 'classification': 'positive_up

### Plot 3 correlation matrices, (a) only with linear correlations, (b) only with non-linear correlations and (c) with both linear and non-linear correlations

In [10]:

cobra_reactions_str = [str(reaction.id) for reaction in ec_cobra_model.reactions]

plot_correlation_matrix(linear_correlation_matrix, extended_reactions, label_font_size=8)
plot_correlation_matrix(non_linear_correlation_matrix, extended_reactions, label_font_size=8)
plot_correlation_matrix(mixed_correlation_matrix, extended_reactions, label_font_size=8)


### Calculate pairwise correlatins without filtering for sharing metabolites

In [11]:

(linear_correlation_matrix_no_metabolites, 
non_linear_correlation_matrix_no_metabolites, 
mixed_correlation_matrix_no_metabolites, 
correlations_dictionary_no_metabolites) = correlated_reactions(
        steady_states = extended_steady_states,
        boolean_sharing_metabolites_matrix=None,
        reactions=extended_reactions, 
        linear_corr_cutoff = 0.3, 
        indicator_cutoff = 1.2,
        jensenshannon_cutoff = 0.05,
        std_cutoff= 1e-2,
        include_non_linear = True, 
        cells = 5, 
        cop_coeff = 0.2, 
        lower_triangle = False, 
        verbose = True)


Completed the process of 13 from 103 copulas
Completed the process of 14 from 103 copulas
Completed the process of 15 from 103 copulas
Completed the process of 16 from 103 copulas
Completed the process of 17 from 103 copulas
Completed the process of 18 from 103 copulas
Completed the process of 30 from 103 copulas
Completed the process of 31 from 103 copulas
Completed the process of 32 from 103 copulas
Completed the process of 33 from 103 copulas
Completed the process of 34 from 103 copulas
Completed the process of 35 from 103 copulas
Completed the process of 38 from 103 copulas
Completed the process of 42 from 103 copulas
Completed the process of 44 from 103 copulas
Completed the process of 47 from 103 copulas
Completed the process of 49 from 103 copulas
Completed the process of 52 from 103 copulas
Completed the process of 54 from 103 copulas
Completed the process of 57 from 103 copulas
Completed the process of 59 from 103 copulas
Completed the process of 62 from 103 copulas
Completed 

In [12]:

filtered_g = {
    pair: metrics
    for pair, metrics in correlations_dictionary_no_metabolites.items()
    if metrics['jensenshannon'] > 0.1 and metrics['indicator'] > 1.2
}

print(filtered_g)
print(filtered_g.keys())


filtered_l = {
    pair: metrics
    for pair, metrics in correlations_dictionary_no_metabolites.items()
    if metrics['jensenshannon'] < -0.1 and metrics['indicator'] < 1.2

}

print(filtered_l)
print(filtered_l.keys())


{'PYK~PGK': {'pearson': 0, 'jensenshannon': 0.1205120608688758, 'indicator': 1.558629773813057, 'classification': 'positive_upper_lower_tail'}, 'PYK~PGL': {'pearson': 0, 'jensenshannon': 0.12099520444165601, 'indicator': 1.5646437972375575, 'classification': 'positive_upper_lower_tail'}, 'PYK~PGM': {'pearson': 0, 'jensenshannon': 0.11982182093593541, 'indicator': 1.556578945171399, 'classification': 'positive_upper_lower_tail'}, 'RPE~PYK': {'pearson': 0, 'jensenshannon': 0.12072898313275361, 'indicator': 1.5667106978862195, 'classification': 'positive_upper_lower_tail'}, 'RPI_rev~PYK': {'pearson': 0, 'jensenshannon': 0.12099520444165604, 'indicator': 1.5646437972375578, 'classification': 'positive_upper_lower_tail'}, 'TKT1~PYK': {'pearson': 0, 'jensenshannon': 0.12089054595870595, 'indicator': 1.5667106978862195, 'classification': 'positive_upper_lower_tail'}, 'GND~PYK': {'pearson': 0, 'jensenshannon': 0.12099520444165604, 'indicator': 1.5646437972375578, 'classification': 'positive_up

In [13]:

cobra_reactions_str = [str(reaction.id) for reaction in ec_cobra_model.reactions]

plot_correlation_matrix(linear_correlation_matrix_no_metabolites, extended_reactions, label_font_size=8)
plot_correlation_matrix(non_linear_correlation_matrix_no_metabolites, extended_reactions, label_font_size=8)
plot_correlation_matrix(mixed_correlation_matrix_no_metabolites, extended_reactions, label_font_size=8)
