In [2]:

import sys
sys.path.insert(0, '../src/')

from load_modify_sample_utils import load_model, get_objective_functions, get_reaction_bounds, modify_model
from load_modify_sample_utils import sample_dingo, sample_optgp

from correlations_utils import correlated_reactions, plot_correlation_matrix, split_forward_reverse
from correlations_utils import find_reactants_products, sharing_metabolites_square_matrix

from clustering_utils import clustering_of_correlation_matrix, plot_dendrogram

from pathways_utils import sort_reactions_in_pathways_by_reactions_in_model_order, subset_sampling_df_from_reaction_ids
from pathways_utils import map_model_to_kegg_reactions, read_json_file, bigg_to_kegg_id, fill_missing_kegg_ids_in_dict
from pathways_utils import get_kegg_pathways_from_reaction_ids, subset_model_reactions_from_pathway_info
from pathways_utils import dictionary_bigg_id_to_pathway_names, dictionary_forward_reverse_bigg_id_to_pathway_names
from pathways_utils import reaction_in_pathway_binary_matrix, plot_reaction_in_pathway_heatmap


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [3]:
ec_cobra_model, ec_cobra_reactions, ec_dingo_model, ec_dingo_reactions = load_model("../ext_data/models/e_coli_core.xml")

objective_functions = get_objective_functions(ec_cobra_model)
print(objective_functions)

default_reaction_bounds = get_reaction_bounds(ec_cobra_model)
print(default_reaction_bounds.get("BIOMASS_Ecoli_core_w_GAM"))

Set parameter Username
Set parameter LicenseID to value 2642044
Academic license - for non-commercial use only - expires 2026-03-25
['BIOMASS_Ecoli_core_w_GAM']
(0.0, 1000.0)


In [4]:
ec_cobra_model_condition_100, ec_dingo_model_condition_100 = modify_model(ec_cobra_model, objective_function="BIOMASS_Ecoli_core_w_GAM", optimal_percentage=100)
ec_cobra_model_condition_0, ec_dingo_model_condition_0 = modify_model(ec_cobra_model, objective_function="BIOMASS_Ecoli_core_w_GAM", optimal_percentage=10)

samples_optgp_condition_100 = sample_optgp(ec_cobra_model_condition_100, n_samples = 3000, thinning=100, reaction_in_rows = True)
samples_optgp_condition_0 = sample_optgp(ec_cobra_model_condition_0, n_samples = 3000, thinning=100, reaction_in_rows = True)


Read LP format model from file /tmp/tmpbv_ixsdo.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros


Read LP format model from file /tmp/tmpipxqr25j.lp
Reading time = 0.01 seconds
: 72 rows, 190 columns, 720 nonzeros
Read LP format model from file /tmp/tmpxb_hypcv.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros
Read LP format model from file /tmp/tmp0m4nod9u.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros


### Map KEGG pathway information to reactions BiGG ids  

In [5]:
initial_bigg_to_kegg_dictionary = map_model_to_kegg_reactions(ec_cobra_model)


reactions_json, reactions_pandas = read_json_file("../ext_data/reactions/reactions.json")


final_bigg_to_kegg_dictionary = fill_missing_kegg_ids_in_dict(initial_bigg_to_kegg_dictionary, reactions_pandas)


df_kegg_pathways = get_kegg_pathways_from_reaction_ids(final_bigg_to_kegg_dictionary)



### Subset sampling dataframe to include only reactions from pathways of interest

In [6]:

# Automatically from KEGG information
glycolysis = subset_model_reactions_from_pathway_info(df_kegg_pathways, "Glycolysis")
ppp = subset_model_reactions_from_pathway_info(df_kegg_pathways, "Pentose")


reactions_in_pathways_ordered_duplicates = sort_reactions_in_pathways_by_reactions_in_model_order(ec_dingo_reactions, glycolysis, ppp)


# Remove duplicates
reactions_in_pathways_ordered = []
[reactions_in_pathways_ordered.append(val) for val in reactions_in_pathways_ordered_duplicates if val not in reactions_in_pathways_ordered]


subset_pathways_optgp_condition_100 = subset_sampling_df_from_reaction_ids(
                                                                samples_optgp_condition_100, 
                                                                ec_dingo_reactions, 
                                                                subset_reactions = reactions_in_pathways_ordered)



### Split into forward and reverse reactions

In [7]:

extended_steady_states, extended_reactions = split_forward_reverse(subset_pathways_optgp_condition_100, reactions=reactions_in_pathways_ordered)


reversibility_list_all_reactions, reactants_list_all_reactions, products_list_all_reactions = find_reactants_products(ec_cobra_model, extended_reactions)
print(len(reversibility_list_all_reactions), len(reactants_list_all_reactions))


21 21


### Create a dictionary that maps pathways from KEGG information

In [8]:

bigg_to_pathway_dict = dictionary_bigg_id_to_pathway_names(glycolysis = glycolysis, ppp = ppp)

group_map = dictionary_forward_reverse_bigg_id_to_pathway_names(bigg_to_pathway_dict, extended_reactions)


In [9]:


binary_df = reaction_in_pathway_binary_matrix(group_map)

plot_reaction_in_pathway_heatmap(binary_df, font_size=8, fig_width=600, fig_height=400, title="")



### Compute correlation matrix

In [10]:

(linear_correlation_matrix_no_metabolites, 
non_linear_correlation_matrix_no_metabolites, 
mixed_correlation_matrix_no_metabolites, 
correlations_dictionary_no_metabolites) = correlated_reactions(
        steady_states = extended_steady_states,
        boolean_sharing_metabolites_matrix=None,
        reactions=extended_reactions, 
        linear_corr_cutoff = 0.3, 
        indicator_cutoff = 1.2,
        jensenshannon_cutoff = 0.05,
        std_cutoff= 1e-2,
        include_non_linear = True, 
        cells = 5, 
        cop_coeff = 0.2, 
        lower_triangle = False, 
        verbose = True)


Completed the process of 30 from 103 copulas
Completed the process of 31 from 103 copulas
Completed the process of 32 from 103 copulas
Completed the process of 33 from 103 copulas
Completed the process of 34 from 103 copulas
Completed the process of 35 from 103 copulas
Completed the process of 44 from 103 copulas
Completed the process of 49 from 103 copulas
Completed the process of 54 from 103 copulas
Completed the process of 59 from 103 copulas
Completed the process of 64 from 103 copulas
Completed the process of 69 from 103 copulas
Completed the process of 74 from 103 copulas
Completed the process of 75 from 103 copulas
Completed the process of 76 from 103 copulas
Completed the process of 77 from 103 copulas
Completed the process of 78 from 103 copulas
Completed the process of 79 from 103 copulas
Completed the process of 84 from 103 copulas
Completed the process of 85 from 103 copulas
Completed the process of 86 from 103 copulas
Completed the process of 87 from 103 copulas
Completed 

### Clustering and Dendrogram plot

In [11]:

dissimilarity_matrix_1, labels_1, clusters_1 = clustering_of_correlation_matrix(mixed_correlation_matrix_no_metabolites, 
                                                                reactions=extended_reactions, 
                                                                linkage="ward", 
                                                                t = 1.0, 
                                                                correction=False)

plot_dendrogram(dissimilarity_matrix_1, 
                extended_reactions, 
                show_labels = True, 
                t = 5, 
                linkage = "ward", 
                group_map=group_map,
                label_fontsize = 7)



scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix


scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix

