# Import and Load Libraries

In [1]:
# Standard library imports
import copy         
import re            
import subprocess   
import sys
from pathlib import Path 

# Third-party library imports
import numpy as np             
import pandas as pd             
import matplotlib.pyplot as plt  

# COBRApy 
import cobra                     
from cobra import Reaction       
from cobra.io import (read_sbml_model, save_matlab_model, write_sbml_model)
from cobra.util.array import create_stoichiometric_matrix  # To build stoichiometric matrices from models

# Functions

In [2]:
# Set the project root path by going up from the notebook location
notebook_dir = Path(__file__).parent if '__file__' in globals() else Path().resolve()
project_root = notebook_dir.parent.parent  
sys.path.append(str(project_root))

# Import Helper Functions
from src.utils import (parse_quickcheck_output)

# Define Paths

In [21]:
raw_sbml_path = project_root / "data" / "raw" / "sbml_files"
temp_sbml_path = raw_sbml_path / "temporary_files"
quickcheck_path = project_root / "code"/ "Mplrs_Analysis"/ "quickcheck_sbml.py"
processed_sbml_path = project_root / "data" / "processed" / "sbml_files"

# Import Model

In [4]:
model = read_sbml_model(str(raw_sbml_path / "e_coli_core.xml"))

# Modify Exchange Fluxes

In [5]:
# Run FBA
solution = model.optimize()

# Collect data from exchange reactions
data = []
for rxn in model.exchanges:
    flux = solution.fluxes.get(rxn.id, None)
    included_in_fba = flux is not None and abs(flux) > 1e-20  # Tiny flux ≈ zero

    # Determine Export/Import values
    export = None
    import_ = None
    if included_in_fba:
        export = flux > 0
        import_ = flux < 0
    else:
        # Use bounds as heuristic if not used in FBA
        if rxn.lower_bound == 0 and rxn.upper_bound > 0:
            export = True
            import_ = False
        elif rxn.upper_bound == 0 and rxn.lower_bound < 0:
            export = False
            import_ = True

    data.append({
        'ID': rxn.id,
        'name': rxn.name,
        'LB': rxn.lower_bound,
        'UB': rxn.upper_bound,
        'FBA': included_in_fba,
        'FBA_flux': flux if included_in_fba else None,
        'Export': export,
        'Import': import_
    })

# Convert to DataFrame
exchange_df = pd.DataFrame(data)

# Sort and reset index
exchange_df.sort_values(by='ID', inplace=True)
exchange_df.reset_index(drop=True, inplace=True)

In [6]:
# Version 0: D-Glucose exchange (no need to produce, already there!)

# Version 1: D-Glucose exchange, D-Fructose exchange 

exchange_df.loc[exchange_df['ID'] == "EX_fru_e", 'Import'] = True
exchange_df.loc[exchange_df['ID'] == "EX_fru_e", 'Export'] = False

# Version 2: D-Fructose exchange, L-Glutamine exchange 

exchange_df.loc[exchange_df['ID'] == "EX_gln__L_e", 'Import'] = True
exchange_df.loc[exchange_df['ID'] == "EX_gln__L_e", 'Export'] = False

# Version 3: D-Fructose exchange, L-Glutamine exchange, Fumarate exchange 

exchange_df.loc[exchange_df['ID'] == "EX_fum_e", 'Import'] = True
exchange_df.loc[exchange_df['ID'] == "EX_fum_e", 'Export'] = False

# Version 4: D-Fructose exchange, L-Glutamine exchange, Fumarate exchange, L-Malate exchange 

exchange_df.loc[exchange_df['ID'] == "EX_mal__L_e", 'Import'] = True
exchange_df.loc[exchange_df['ID'] == "EX_mal__L_e", 'Export'] = False

In [7]:
# Check if any IDs are unassigned
missing_ids = exchange_df.loc[exchange_df['Import'].isna(), 'ID'].to_list() # all IDs assigned!

# Modify Directionalities

In [8]:
rev_model = copy.deepcopy(model)

In [9]:
for i, row in exchange_df.iterrows():
    rxn_id = row['ID']
    export = row['Export']
    import_ = row['Import']

    try:
        rxn = rev_model.reactions.get_by_id(rxn_id)
    except KeyError:
        continue

    original_lb = rxn.lower_bound
    original_ub = rxn.upper_bound

    # Handle Export cleanup
    if export and original_lb < 0:
        rxn.lower_bound = 0

    # Handle Import split
    elif import_:
        # Adjust original reaction bounds
        new_bound = abs(original_lb)
        rxn.lower_bound = 0
        rxn.upper_bound = new_bound

        # Create reverse reaction
        rxn_rev = Reaction(id=rxn_id + "_rev")
        rxn_rev.name = rxn.name + " (reversed)"
        rxn_rev.lower_bound = 0
        rxn_rev.upper_bound = new_bound
        rxn_rev.add_metabolites({met: -coeff for met, coeff in rxn.metabolites.items()})
        rev_model.add_reactions([rxn_rev])

        # Handle groups
        for group in rev_model.groups:
            if rxn in group.members:
                group.members.remove(rxn)
                group.members.add(rxn_rev)

        # Remove original reaction from model
        rev_model.reactions.remove(rxn)

In [10]:
write_sbml_model(rev_model, temp_sbml_path / 'revmodel.xml')

In [11]:
# Load models into a list
model_path =  temp_sbml_path / 'revmodel.xml'
models = [read_sbml_model(model_path) for _ in range(5)]

# Define which extra exchanges each model version should allow
# (besides D-glucose which is already present)
exchange_config = {
    1: ["EX_fru_e_rev"],
    2: ["EX_fru_e_rev", "EX_gln__L_e_rev"],
    3: ["EX_fru_e_rev", "EX_gln__L_e_rev", "EX_fum_e_rev"],
    4: ["EX_fru_e_rev", "EX_gln__L_e_rev", "EX_fum_e_rev", "EX_mal__L_e_rev"]
}

# Apply exchange bounds (0 to 5) to the specified reactions
for i, rxn_ids in exchange_config.items():
    for rxn_id in rxn_ids:
        rxn = models[i].reactions.get_by_id(rxn_id)
        rxn.lower_bound = 0
        rxn.upper_bound = 5

# Save each model in the models list with a corresponding filename
for i, model in enumerate(models):
    filename = temp_sbml_path/ f"model_v{i}_quickcheck.xml"
    write_sbml_model(model, filename)

In [12]:
# Run FBA for each model and collect results
results = []

for i, model in enumerate(models):
    solution = model.optimize()  # or pfba(model) if you prefer parsimonious FBA
    results.append({
        "version": f"v{i}",
        "objective_value": solution.objective_value
        # You can add more if needed, like specific fluxes below
        # "EX_glc__D_e": solution.fluxes.get("EX_glc__D_e", None)
    })

# Convert to DataFrame
fba_df = pd.DataFrame(results)

# Integrate QuickCheck Results

In [13]:
# Loop over model versions v0 to v4
for i in range(5):
    model_file = f"{temp_sbml_path}/model_v{i}_quickcheck.xml"
    command = ["python", quickcheck_path, "-s", model_file]
    
    print(f"Running quickcheck for model_v{i}...")
    subprocess.run(command)

Running quickcheck for model_v0...
------------------------------------------------
COBRAPY added 0 exchange reaction
------------------------------------------------
None
reactions: 95 (including reactions added by cobrapy)
reversibilities:  39
metabolites: 72
genes: 137
growth rate: 0.8739215069684295
------------------------------------------------
fraction of optimum: 0.1
------------------------------------------------
Calculating FVA...
Checking reactions...
Writing results to file: /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/raw/sbml_files/temporary_files/model_v0_quickcheck_reactions.txt
------------------------------------------------
Please check the following reactions:
NOTE: FVA results are rounded to 10th decimal, exact values can be found in *_reactions.txt
------------------------------------------------
PGK: 3pg_c + atp_c <=> 13dpg_c + adp_c
lower bound: -1000.0 upper bound: 1000.0
FVA minimum: -19.7676854458 FVA maximum: -1.0959150482 difference: 18.671770397600003

### Round 1

In [14]:
# Loop over model versions v0 to v4
for i in range(5):
    print(f"\n🔧 Processing model_v{i}...")

    # Load model
    model_path = f"{temp_sbml_path}/model_v{i}_quickcheck.xml"
    qc_model = read_sbml_model(model_path)

    # Load quickcheck reaction data
    df_path = f"{temp_sbml_path}/model_v{i}_quickcheck_reactions.txt"
    quickcheck_df = parse_quickcheck_output(df_path)

    # Identify reaction sets
    irrev_list_fwd = quickcheck_df[(quickcheck_df['min'] >= 0) & (quickcheck_df['lb'] < 0)].reaction_id.tolist()
    irrev_list_rev = quickcheck_df[(quickcheck_df['min'] < 0) & (quickcheck_df['max'] <= 0)].reaction_id.tolist()
    delete_list = quickcheck_df[(quickcheck_df['min'] == 0) & (quickcheck_df['max'] == 0)].reaction_id.tolist()

    # 🗑️ Delete blocked reactions
    success_count = 0
    for rxn_id in delete_list:
        try:
            rxn = qc_model.reactions.get_by_id(rxn_id)
            for group in qc_model.groups:
                if rxn in group.members:
                    group.members.remove(rxn)
            metabolites = list(rxn.metabolites.keys())
            qc_model.reactions.remove(rxn)
            for met in metabolites:
                if len(met.reactions) == 0:
                    qc_model.metabolites.remove(met)
            success_count += 1
        except KeyError:
            continue
    print(f"✅ Deleted {success_count} blocked reactions.")

    # 🔁 Process reverse-only irreversible reactions
    success_count = 0
    for rxn_id in irrev_list_rev:
        try:
            rxn = qc_model.reactions.get_by_id(rxn_id)
        except KeyError:
            continue
        new_bound = abs(rxn.lower_bound)
        rxn.lower_bound = 0
        rxn.upper_bound = new_bound
        rxn_rev = Reaction(id=rxn_id + "_rev")
        rxn_rev.name = rxn.name + " (reversed)"
        rxn_rev.lower_bound = 0
        rxn_rev.upper_bound = new_bound
        rxn_rev.add_metabolites({met: -coeff for met, coeff in rxn.metabolites.items()})
        qc_model.add_reactions([rxn_rev])
        for group in qc_model.groups:
            if rxn in group.members:
                group.members.remove(rxn)
                group.members.add(rxn_rev)
        qc_model.reactions.remove(rxn)
        success_count += 1
    print(f"✅ Reversed and replaced {success_count} reverse-only reactions.")

    # ➡️ Adjust forward-only irreversible reactions
    success_count = 0
    for rxn_id in irrev_list_fwd:
        try:
            rxn = qc_model.reactions.get_by_id(rxn_id)
            rxn.lower_bound = 0
            success_count += 1
        except KeyError:
            continue
    print(f"✅ Updated {success_count} forward-only reactions (set lb to 0).")

    # 💾 Save modified model
    export_path = f"{temp_sbml_path}/model_v{i}_quickcheck2.xml"
    write_sbml_model(qc_model, export_path)
    print(f"💾 Saved modified model to {export_path}")



🔧 Processing model_v0...
✅ Deleted 8 blocked reactions.
✅ Reversed and replaced 17 reverse-only reactions.
✅ Updated 10 forward-only reactions (set lb to 0).
💾 Saved modified model to /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/raw/sbml_files/temporary_files/model_v0_quickcheck2.xml

🔧 Processing model_v1...
✅ Deleted 6 blocked reactions.
✅ Reversed and replaced 17 reverse-only reactions.
✅ Updated 10 forward-only reactions (set lb to 0).
💾 Saved modified model to /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/raw/sbml_files/temporary_files/model_v1_quickcheck2.xml

🔧 Processing model_v2...
✅ Deleted 4 blocked reactions.
✅ Reversed and replaced 17 reverse-only reactions.
✅ Updated 9 forward-only reactions (set lb to 0).
💾 Saved modified model to /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/raw/sbml_files/temporary_files/model_v2_quickcheck2.xml

🔧 Processing model_v3...
✅ Deleted 2 blocked reactions.
✅ Reversed and replaced 16 reverse-only reactions.
✅ Updated 7 forward-only r

### Round 2

In [17]:
# Loop over model versions v0 to v4
for i in range(5):
    model_file = f"{temp_sbml_path}/model_v{i}_quickcheck2.xml"
    command = ["python", quickcheck_path, "-s", model_file]
    
    print(f"Running quickcheck for model_v{i}...")
    subprocess.run(command)

Running quickcheck for model_v0...
------------------------------------------------
COBRAPY added 0 exchange reaction
------------------------------------------------
None
reactions: 87 (including reactions added by cobrapy)
reversibilities:  13
metabolites: 72
genes: 137
growth rate: 0.8739215069684311
------------------------------------------------
fraction of optimum: 0.1
------------------------------------------------
Calculating FVA...
Checking reactions...
Writing results to file: /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/raw/sbml_files/temporary_files/model_v0_quickcheck2_reactions.txt
------------------------------------------------
Please check the following reactions:
NOTE: FVA results are rounded to 10th decimal, exact values can be found in *_reactions.txt
------------------------------------------------
PTAr: accoa_c + pi_c <=> actp_c + coa_c
lower bound: -1000.0 upper bound: 1000.0
FVA minimum: 0.0 FVA maximum: 18.6717703976 difference: 18.6717703976
reversibility

In [18]:
# Define version-specific reaction lists
irrev_lists_rev = {
    0: ['H2Ot'],
    1: ['H2Ot'],
    2: ['H2Ot'],
    3: [],  # v3 has none
    4: ['SUCOAS', 'FORt', 'H2Ot']
}

irrev_lists_fwd = {
    0: ['PTAr'],
    1: ['PTAr'],
    2: ['PTAr'],
    3: ['PTAr'],  # Note: was labeled as v4 in your input, corrected here
    4: []  # v4 has none
}

# Process each model v0 to v4
for i in range(5):
    print(f"\n🔧 Processing model_v{i}_quickcheck2.xml")

    # Load model
    model_path = f"{temp_sbml_path}/model_v{i}_quickcheck2.xml"
    qc_model2 = read_sbml_model(model_path)

    # Get version-specific reaction lists
    irrev_list_rev = irrev_lists_rev[i]
    irrev_list_fwd = irrev_lists_fwd[i]

    # 🔁 Process reverse-only irreversible reactions
    success_count = 0
    for rxn_id in irrev_list_rev:
        try:
            rxn = qc_model2.reactions.get_by_id(rxn_id)
        except KeyError:
            continue
        new_bound = abs(rxn.lower_bound)
        rxn.lower_bound = 0
        rxn.upper_bound = new_bound

        rxn_rev = Reaction(id=rxn_id + "_rev")
        rxn_rev.name = rxn.name + " (reversed)"
        rxn_rev.lower_bound = 0
        rxn_rev.upper_bound = new_bound
        rxn_rev.add_metabolites({met: -coeff for met, coeff in rxn.metabolites.items()})
        qc_model2.add_reactions([rxn_rev])

        for group in qc_model2.groups:
            if rxn in group.members:
                group.members.remove(rxn)
                group.members.add(rxn_rev)

        qc_model2.reactions.remove(rxn)
        success_count += 1
    print(f"✅ Reversed and replaced {success_count} reactions from 'irrev_list_rev'.")

    # ➡️ Adjust forward-only irreversible reactions
    success_count = 0
    for rxn_id in irrev_list_fwd:
        try:
            rxn = qc_model2.reactions.get_by_id(rxn_id)
            rxn.lower_bound = 0
            success_count += 1
        except KeyError:
            continue
    print(f"✅ Set lower bound to 0 for {success_count} reactions from 'irrev_list_fwd'.")

    # Save the modified model
    output_path = f"{temp_sbml_path}/model_v{i}_quickcheck3.xml"
    write_sbml_model(qc_model2, output_path)
    print(f"💾 Saved model to: {output_path}")


🔧 Processing model_v0_quickcheck2.xml
✅ Reversed and replaced 1 reactions from 'irrev_list_rev'.
✅ Set lower bound to 0 for 1 reactions from 'irrev_list_fwd'.
💾 Saved model to: /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/raw/sbml_files/temporary_files/model_v0_quickcheck3.xml

🔧 Processing model_v1_quickcheck2.xml
✅ Reversed and replaced 1 reactions from 'irrev_list_rev'.
✅ Set lower bound to 0 for 1 reactions from 'irrev_list_fwd'.
💾 Saved model to: /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/raw/sbml_files/temporary_files/model_v1_quickcheck3.xml

🔧 Processing model_v2_quickcheck2.xml
✅ Reversed and replaced 1 reactions from 'irrev_list_rev'.
✅ Set lower bound to 0 for 1 reactions from 'irrev_list_fwd'.
💾 Saved model to: /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/raw/sbml_files/temporary_files/model_v2_quickcheck3.xml

🔧 Processing model_v3_quickcheck2.xml
✅ Reversed and replaced 0 reactions from 'irrev_list_rev'.
✅ Set lower bound to 0 for 1 reactions from 'irrev_list_

### Round 3

In [19]:
# Loop over model versions v0 to v4
for i in range(5):
    model_file = f"{temp_sbml_path}/model_v{i}_quickcheck3.xml"
    command = ["python", quickcheck_path, "-s", model_file]
    
    print(f"Running quickcheck for model_v{i}...")
    subprocess.run(command)

Running quickcheck for model_v0...
------------------------------------------------
COBRAPY added 0 exchange reaction
------------------------------------------------
None
reactions: 87 (including reactions added by cobrapy)
reversibilities:  11
metabolites: 72
genes: 137
growth rate: 0.8739215069684311
------------------------------------------------
fraction of optimum: 0.1
------------------------------------------------
Calculating FVA...
Checking reactions...
Writing results to file: /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/raw/sbml_files/temporary_files/model_v0_quickcheck3_reactions.txt
------------------------------------------------
All reactions seem to be fine
Exact FVA results can be found in *_reactions.txt
Running quickcheck for model_v1...
------------------------------------------------
COBRAPY added 0 exchange reaction
------------------------------------------------
None
reactions: 89 (including reactions added by cobrapy)
reversibilities:  11
metabolites: 72
g

### Information on models passed on to EFMlrs

In [23]:
# 📊 Collect and print summary stats for all modified models
model_stats = []

print("\n📊 Model Summary (After Modifications):")

for i in range(5):
    model_path = temp_sbml_path / f"model_v{i}_quickcheck3.xml"
    model = read_sbml_model(model_path)

    stoich = create_stoichiometric_matrix(model)
    rank = np.linalg.matrix_rank(stoich)
    num_rxns = len(model.reactions)
    num_exchanges = sum(1 for r in model.reactions if r.id.startswith("EX_"))
    dof = num_rxns - rank

    model_stats.append({
        "Model": f"model_v{i}",
        "Reactions": num_rxns,
        "Exchange Reactions": num_exchanges,
        "Rank": rank,
        "Degrees of Freedom": dof
    })

# 🖨️ Pretty print summary
print("{:<10} {:<12} {:<20} {:<8} {:<20}".format("Model", "Reactions", "Exchange Reactions", "Rank", "Degrees of Freedom"))
for stat in model_stats:
    print("{:<10} {:<12} {:<20} {:<8} {:<20}".format(
        stat["Model"],
        stat["Reactions"],
        stat["Exchange Reactions"],
        stat["Rank"],
        stat["Degrees of Freedom"]
    ))


📊 Model Summary (After Modifications):
Model      Reactions    Exchange Reactions   Rank     Degrees of Freedom  
model_v0   87           16                   63       24                  
model_v1   89           17                   64       25                  
model_v2   91           18                   65       26                  
model_v3   93           19                   66       27                  
model_v4   95           20                   67       28                  


# Export stuff for EFMlrs

In [24]:
# Conda environment name (replace with your actual environment)
conda_env_name = "efmlrs-env"  # change this to your environment name

# Loop through all model versions v0 to v4
for i in range(5):
    # Load the model
    model_path = f"{temp_sbml_path}/model_v{i}_quickcheck3.xml"
    model = read_sbml_model(model_path)

    # Export to Ecoli_Stuff directory
    export_path = f"{processed_sbml_path}/ecolicore_model_v{i}.xml"
    write_sbml_model(model, export_path)
    print(f"✅ Exported model_v{i} to {export_path}")

    # Command to run efmlrs_pre
    cmd = f"conda run -n {conda_env_name} efmlrs_pre -i {export_path} --bounds"

    print(f"🚀 Running efmlrs_pre on ecolicore_model_v{i}.xml...")
    subprocess.run(cmd, shell=True)

✅ Exported model_v0 to /Users/Luca/Desktop/Thesis_Rep/GitHub_Repo/data/processed/sbml_files/ecolicore_model_v0.xml
🚀 Running efmlrs_pre on ecolicore_model_v0.xml...
                          
           EFMlrs     __ 
    (\   .-.   .-.   /_")
     \\_//^\\_//^\\_//   
      `"´   `"´   `"´    
     start compressions   
                          
Ignoring compartments: None
The following metabolites are orphans (not involved in any reaction) and will be removed from the model
['gln__L_e', 'mal__L_e', 'fru_e', 'fum_e']
no empty reactions
Checking for additional bounds
Found 2 additional bounds:
Lower bound: 8.39 for reaction ATPM
Upper bound: 10.0 for reaction EX_glc__D_e_rev
Uncompressed network size: 90 reactions ( 11 reversible ) and 70 metabolites.
START COMPRESSIONS
*** Compression round: 1 ***
Start deadend compression...
Done deadend compression. Network size: 70 metabolites and 90 reactions ( 11 reversible )
Start many2one compression...
Done many2one compression. Network size: