In [None]:
"""
optimize_metabolites.py

Run slim_optimize on a list of exchange metabolites for each COBRA model
in a fixed folder, then collect the results into a CSV file.

Input folder: E:/liu_imate/mqc_control
Output CSV: liutanyuan35.csv
"""

import os
import logging
from typing import List, Optional

import pandas as pd
import cobra


# -----------------------------------------------------------------------------
# CONFIGURATION (fixed paths for direct run)
# -----------------------------------------------------------------------------
INPUT_DIR = 'path/to/output/models'
OUTPUT_CSV = 'path/to/output/data.csv'


# -----------------------------------------------------------------------------
# CONSTANTS
# -----------------------------------------------------------------------------
METABOLITES_TO_TEST: List[str] = [
    "EX_malt_e", "EX_galur_e", "EX_glcn_e", "EX_inost_e", "EX_xyl__D_e",
    "EX_mnl_e", "EX_meoh_e", "EX_glyc_e", "EX_succ_e", "EX_confrl_e",
    "EX_bz_e", "EX_for_e", "EX_fru_e", "EX_man_e", "EX_orn_e",
    "EX_glc__D_e", "EX_gly_e", "EX_ac_e", "EX_tre_e", "EX_pyr_e",
    "EX_mma_e", "EX_cellb_e", "EX_oxa_e", "EX_sucr_e", "EX_gal_e",
    "EX_xylan4_e", "EX_glu__L_e", "EX_ala__L_e", "EX_cys__L_e",
    "EX_asp__L_e", "EX_phe__L_e", "EX_his__L_e", "EX_ile__L_e",
    "EX_leu__L_e", "EX_lys__L_e", "EX_met__L_e", "EX_asn__L_e",
    "EX_pro__L_e", "EX_gln__L_e", "EX_arg__L_e", "EX_ser__L_e",
    "EX_thr__L_e", "EX_val__L_e", "EX_trp__L_e", "EX_tyr__L_e",
    "EX_g3pg_e", "EX_4ahmmp_e", "EX_thm_e", "EX_2pglyc_e",
    "EX_dtmp_e", "EX_25dkglcn_e", "EX_amp_e", "EX_glyc3p_e"
]


# -----------------------------------------------------------------------------
# FUNCTION DEFINITIONS
# -----------------------------------------------------------------------------
def optimize_metabolite_in_model(
    model: cobra.Model,
    exchange_id: str,
    uptake_rate: float = 10.0
) -> Optional[float]:
    """
    Temporarily set the uptake rate of one exchange reaction,
    run slim_optimize(), and return the objective value.
    On error, return None.
    """
    original_medium = model.medium.copy()
    try:
        modified = original_medium.copy()
        modified[exchange_id] = uptake_rate
        model.medium = modified
        model.objective = "Growth"
        return model.slim_optimize()
    except Exception as e:
        logging.warning("Error optimizing %s: %s", exchange_id, e)
        return None
    finally:
        model.medium = original_medium


def process_models(
    folder: str,
    metabolites: List[str]
) -> pd.DataFrame:
    """
    For every .xml in folder, run optimize_metabolite_in_model()
    on each metabolite, collect results into a DataFrame:
      rows = metabolites, columns = model names.
    """
    xml_files = sorted(f for f in os.listdir(folder) if f.lower().endswith(".xml"))
    model_names = [os.path.splitext(f)[0] for f in xml_files]
    df = pd.DataFrame(index=metabolites, columns=model_names, dtype=float)

    for xml_file, model_name in zip(xml_files, model_names):
        path = os.path.join(folder, xml_file)
        logging.info("Loading model: %s", path)
        try:
            model = cobra.io.read_sbml_model(path)
        except Exception as e:
            logging.error("Failed to load %s: %s", xml_file, e)
            df[model_name] = 0.0
            continue

        for met in metabolites:
            res = optimize_metabolite_in_model(model, met)
            df.at[met, model_name] = res if res is not None else 0.0

    return df.reset_index().rename(columns={"index": "Metabolite"})


# -----------------------------------------------------------------------------
# MAIN
# -----------------------------------------------------------------------------
def main():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)s: %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S"
    )
    logging.info("Processing models in %s", INPUT_DIR)
    result_df = process_models(INPUT_DIR, METABOLITES_TO_TEST)
    logging.info("Writing CSV to %s", OUTPUT_CSV)
    result_df.to_csv(OUTPUT_CSV, index=False, float_format="%.6f")
    logging.info("Done.")


if __name__ == "__main__":
    main()