### Load libraries and functions

In [1]:

import sys
sys.path.insert(0, '../src/')

from loopless_utils import loops_enumeration_from_fva

from load_modify_sample_utils import load_model, get_objective_functions, get_reaction_bounds, modify_model
from load_modify_sample_utils import sample_optgp

from distributions_comparison_utils import significantly_altered_reactions, plot_volcano, dictionary_reaction_to_all_pathways 
from distributions_comparison_utils import plot_pathway_enrichment, hypergeometric_test_pathway_enrichment

from pathways_utils import dictionary_reaction_id_to_kegg_id, get_kegg_pathways_from_reaction_ids
from pathways_utils import map_model_to_kegg_reactions_dictionary, read_json_file, fill_missing_kegg_ids_in_initial_dictionary


### Load and inspect model (for more info see `load_modify_samply.ipynb`)

In [2]:

ec_cobra_model, ec_cobra_reactions, ec_cobra_reaction_ids,  = load_model("../ext_data/models/e_coli_core.xml")

objective_functions = get_objective_functions(ec_cobra_model)
print(objective_functions)

default_reaction_bounds = get_reaction_bounds(ec_cobra_model)
print(default_reaction_bounds)


Set parameter Username
Set parameter LicenseID to value 2642044
Academic license - for non-commercial use only - expires 2026-03-25
['BIOMASS_Ecoli_core_w_GAM']
{'PFK': (0.0, 1000.0), 'PFL': (0.0, 1000.0), 'PGI': (-1000.0, 1000.0), 'PGK': (-1000.0, 1000.0), 'PGL': (0.0, 1000.0), 'ACALD': (-1000.0, 1000.0), 'AKGt2r': (-1000.0, 1000.0), 'PGM': (-1000.0, 1000.0), 'PIt2r': (-1000.0, 1000.0), 'ALCD2x': (-1000.0, 1000.0), 'ACALDt': (-1000.0, 1000.0), 'ACKr': (-1000.0, 1000.0), 'PPC': (0.0, 1000.0), 'ACONTa': (-1000.0, 1000.0), 'ACONTb': (-1000.0, 1000.0), 'ATPM': (8.39, 1000.0), 'PPCK': (0.0, 1000.0), 'ACt2r': (-1000.0, 1000.0), 'PPS': (0.0, 1000.0), 'ADK1': (-1000.0, 1000.0), 'AKGDH': (0.0, 1000.0), 'ATPS4r': (-1000.0, 1000.0), 'PTAr': (-1000.0, 1000.0), 'PYK': (0.0, 1000.0), 'BIOMASS_Ecoli_core_w_GAM': (0.0, 1000.0), 'PYRt2': (-1000.0, 1000.0), 'CO2t': (-1000.0, 1000.0), 'RPE': (-1000.0, 1000.0), 'CS': (0.0, 1000.0), 'RPI': (-1000.0, 1000.0), 'SUCCt2_2': (0.0, 1000.0), 'CYTBD': (0.0, 1000.

### Modify the model to create two different conditions (for more info see `load_modify_samply.ipynb`)

In [3]:

# Set optimal percentage to 100
ec_cobra_model_condition_100 = modify_model(
    cobra_model         = ec_cobra_model,
    objective_function  = "BIOMASS_Ecoli_core_w_GAM",
    optimal_percentage  = 100,
    objective_direction = "max"
)

updated_objective_functions = get_objective_functions(ec_cobra_model_condition_100)
print(updated_objective_functions)

updated_reaction_bounds = get_reaction_bounds(ec_cobra_model_condition_100)
print(updated_reaction_bounds.get("BIOMASS_Ecoli_core_w_GAM"))

# -----------

# Set optimal percentage to 0
ec_cobra_model_condition_0 = modify_model(
    cobra_model         = ec_cobra_model,
    objective_function  = "BIOMASS_Ecoli_core_w_GAM",
    optimal_percentage  = 0,
    objective_direction = "max"
)

updated_objective_functions = get_objective_functions(ec_cobra_model_condition_0)
print(updated_objective_functions)

updated_reaction_bounds = get_reaction_bounds(ec_cobra_model_condition_0)
print(updated_reaction_bounds.get("BIOMASS_Ecoli_core_w_GAM"))


Read LP format model from file /tmp/tmpdnrl67ek.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros
['BIOMASS_Ecoli_core_w_GAM']
(0.872922, 1000)
Read LP format model from file /tmp/tmp4ezbatcq.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros
['BIOMASS_Ecoli_core_w_GAM']
(0.0, 1000)


### Identify loopy reactions in model (for more info see `loopless.ipynb`)

In [None]:

loopy_reactions_fva_100 = loops_enumeration_from_fva(ec_cobra_model_condition_100, fraction_of_optimum=0.999)
print(loopy_reactions_fva_100)

loopy_reactions_100 = [item[0] for item in loopy_reactions_fva_100]
print(loopy_reactions_100)


loopy_reactions_fva_0 = loops_enumeration_from_fva(ec_cobra_model_condition_0, fraction_of_optimum=0)
print(loopy_reactions_fva_0)

loopy_reactions_0 = [item[0] for item in loopy_reactions_fva_0]
print(loopy_reactions_0)


[('SUCDi', 994.7794007141792), ('FRD7', 995.0539767141795)]
[('SUCDi', 980.0), ('FRD7', 1000.0)]


### Remove loopy reactions from the 2 models created above to reduce the thermodynamically infeasible solutions from sampling

In [5]:

ec_cobra_model_condition_100.reactions.get_by_id("FRD7").bounds = (0, 0)
ec_cobra_model_condition_0.reactions.get_by_id("FRD7").bounds = (0, 0)


### Perform sampling on the modified models with the loopy reaction "FRD7" removed. (for more info see `load_modify_samply.ipynb`)

In [6]:

samples_optgp_condition_100 = sample_optgp(ec_cobra_model_condition_100, 
                                           n_samples = 3000, 
                                           thinning=100, 
                                           reaction_in_rows = True)


samples_optgp_condition_0 = sample_optgp(ec_cobra_model_condition_0, 
                                         n_samples = 3000, 
                                         thinning=100, 
                                         reaction_in_rows = True)


Read LP format model from file /tmp/tmpuzst9nbq.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros


Read LP format model from file /tmp/tmpmbtyrfpm.lp
Reading time = 0.00 seconds
: 72 rows, 190 columns, 720 nonzeros


### Map KEGG pathway information to reactions BiGG ids  

Functions that are used to map information from KEGG pathways to the model reactions (BiGG/SEED ids). For more info see `pathways.ipynb`

In [7]:

initial_bigg_to_kegg_dictionary = map_model_to_kegg_reactions_dictionary(ec_cobra_model)

reactions_json, reactions_pandas = read_json_file("../ext_data/reactions/reactions.json")

bigg_to_kegg, seed_to_kegg = dictionary_reaction_id_to_kegg_id(reactions_pandas)

final_bigg_to_kegg_dictionary = fill_missing_kegg_ids_in_initial_dictionary(initial_bigg_to_kegg_dictionary, 
                                                                            modeltype="BiGG", 
                                                                            bigg_to_kegg=bigg_to_kegg,
                                                                            seed_to_kegg=seed_to_kegg)

df_kegg_pathways = get_kegg_pathways_from_reaction_ids(final_bigg_to_kegg_dictionary)


### Compare flux distributions of reactions across conditions with a Kolmogorov–Smirnov (KS) non-parametric test

The `significantly_altered_reactions` function takes as input at least 2 flux sampling conditions to compare and identifies significantly altered reactions.

It performs a Kolmogorov-Smirnov (KS) non-parametric test and corrects p-value for multiple comparisons.

It additionally calculates a fold change that together with the p-value classifies reactions as significantly altered or not.

It returns 2 lists one with the significantly changed (`significant_diff_reactions`) and one with the not significantly changed (`not_significant_diff_reactions`) reactions. Also, 2 dicitonaries mapping reaction IDs to corrected p_values (`pval_dict`) and fold change values (`fold_change_dict`)

In [8]:

conditions = [samples_optgp_condition_100, samples_optgp_condition_0]

selected_comparisons = [(0, 1)]

(significant_diff_reactions,
 not_significant_diff_reactions,
 pval_dict,
 fold_change_dict) = significantly_altered_reactions(conditions, 
                                                    selected_comparisons, 
                                                    cobra_model=ec_cobra_model,
                                                    fold_change_cutoff = 0.6,
                                                    std_cutoff = 1e-3)
 
print(significant_diff_reactions)
print(len(significant_diff_reactions))


['PFL', 'ACALD', 'AKGt2r', 'PIt2r', 'ALCD2x', 'ACALDt', 'ACKr', 'PPC', 'PPCK', 'ACt2r', 'PPS', 'ADK1', 'PTAr', 'PYK', 'BIOMASS_Ecoli_core_w_GAM', 'PYRt2', 'SUCCt2_2', 'D_LACt2', 'SUCCt3', 'ETOHt2r', 'THD2', 'EX_ac_e', 'EX_acald_e', 'EX_akg_e', 'EX_etoh_e', 'EX_for_e', 'EX_glu__L_e', 'EX_lac__D_e', 'EX_nh4_e', 'EX_pi_e', 'EX_pyr_e', 'EX_succ_e', 'FBP', 'FORt2', 'FORt', 'GLNS', 'GLUDy', 'GLUN', 'GLUSy', 'GLUt2r', 'ICL', 'LDH_D', 'MALS', 'MDH', 'ME1', 'ME2', 'NADTRHD', 'NH4t']
48


### Volcano plot of the differential flux analysis results

Volcano plot with Fold Change on the x-axis and -log10(p_value) on the y-axis. Users can provide a reactions list in the `annotate` parameter, to show reaction IDs on the plot. Also, lines showing the significance cutoffs may optionally be added, when providing `p_value_cutoff`, `fold_change_cutoff` and `show_cutoff_lines`.

In [9]:

reactions_to_annotate = ["NH4t"]

plot_volcano(pval_dict,
             fold_change_dict,
             p_value_cutoff = 0.05,
             fold_change_cutoff = 0.6,
             annotate = reactions_to_annotate,
             width = 800,
             height = 600,
             title = "",
             show_cutoff_lines = True)


### Create a dictionary mapping reaction IDs (keys) to all the pathways they belong (values)

In [10]:

reaction_to_pathways = dictionary_reaction_to_all_pathways(df_kegg_pathways)
print(reaction_to_pathways)


{'PGI': ['Starch and sucrose metabolism', 'Metabolic pathways', 'Biosynthesis of secondary metabolites'], 'PGM': ['Glycolysis / Gluconeogenesis', 'Glycine, serine and threonine metabolism', 'Methane metabolism', 'Metabolic pathways', 'Biosynthesis of secondary metabolites', 'Microbial metabolism in diverse environments', 'Carbon metabolism', 'Biosynthesis of amino acids'], 'PGL': ['Pentose phosphate pathway', 'Metabolic pathways', 'Biosynthesis of secondary metabolites', 'Microbial metabolism in diverse environments', 'Carbon metabolism'], 'PFL': ['Pyruvate metabolism', 'Butanoate metabolism', 'Metabolic pathways', 'Microbial metabolism in diverse environments'], 'PFK': ['Glycolysis / Gluconeogenesis', 'Pentose phosphate pathway', 'Fructose and mannose metabolism', 'Methane metabolism', 'Metabolic pathways', 'Biosynthesis of secondary metabolites', 'Microbial metabolism in diverse environments', 'Carbon metabolism', 'Biosynthesis of amino acids'], 'PGK': ['Glycolysis / Gluconeogenesis'

### Perform hypergeometric test for pathway enrichment

The `hypergeometric_test_pathway_enrichment` function performs a hypergeometric test to identify significantly affected pathways between our sampling conditions. It also calculated fold enrichment and p_values useful for filtering significant changes and plotting

In [11]:

hypergeometric_pathway_enrichment_df = hypergeometric_test_pathway_enrichment(significant_diff_reactions, 
                                                                              ec_cobra_reaction_ids, 
                                                                              reaction_to_pathways)

print(hypergeometric_pathway_enrichment_df)


                                         pathway  \
10                           Pyruvate metabolism   
31   Alanine, aspartate and glutamate metabolism   
32                           Nitrogen metabolism   
30                         Arginine biosynthesis   
23            Taurine and hypotaurine metabolism   
5                             Methane metabolism   
17                            Xylene degradation   
26                     Biosynthesis of cofactors   
18             Degradation of aromatic compounds   
16                            Dioxin degradation   
15                          Benzoate degradation   
14                      Phenylalanine metabolism   
34        Nicotinate and nicotinamide metabolism   
11                          Butanoate metabolism   
21       Glyoxylate and dicarboxylate metabolism   
1                             Metabolic pathways   
19                Other carbon fixation pathways   
6   Microbial metabolism in diverse environments   
13          

### Create a bubbleplot of the enrichment results to show affected pathways.

The `plot_pathway_enrichment` function takes as input the `hypergeometric_pathway_enrichment_df` dataframe created with the `hypergeometric_test_pathway_enrichment` function and plots the enrichment results in a bubble plot. Bubble size stands for pathway size (number of reactions) and reaction colour stands for -log10(p-value)

In [12]:

plot_pathway_enrichment(hypergeometric_pathway_enrichment_df, pval_threshold=2)
