In [78]:
from metaxime.parser import ParserRP2
from biopathopt import ModelBuilder

In [79]:
parser = ParserRP2(
    rp2_scope_path='out_scope.csv',
    rp2_cmp_path='out_compounds.csv',
    rp2_paths_path='out_paths.csv',
)

In [None]:
all_models = parser.return_rp2_models()

In [None]:
model = all_models[6][0]

In [None]:
biopath_iml1515 = ModelBuilder(path_to_model='iML1515.xml', use_progressbar=True)

In [18]:
from typing import Any, Dict, Iterable, Set

def annotations_overlap(a: Dict[str, Any], b: Dict[str, Any]) -> bool:
    """
    Return True if there is at least one overlapping value for any key present in both dicts.
    Values may be strings or iterables of strings.
    """
    def _to_norm_set(v: Any) -> Set[str]:
        """Normalize a value (str or iterable) to a lowercase, stripped set of strings."""
        if v is None:
            return set()
        if isinstance(v, (list, tuple, set)):
            items: Iterable[Any] = v
        else:
            items = [v]
        return {str(x).strip().lower() for x in items if x is not None}
    for key in set(a) & set(b):
        if _to_norm_set(a[key]) & _to_norm_set(b[key]):
            return True
    return False

In [77]:
import logging
from cobra import Model, Reaction, Metabolite
from typing import Dict

def merge_models(
    source_model: Model,
    input_target_model: Model,
) -> Model:
    """Merge a COBRApy model into another by matching metabolites via annotation overlap.

    For each metabolite in the source model, this function tries to find a match in the
    target model based on overlapping annotations. If a match is found, the metabolite
    in the source model is mapped to the corresponding target metabolite. Reactions
    from the source model are then copied and added to the target model using this mapping.

    Args:
        source_model (Model): The model whose reactions should be merged into the target.
        target_model (Model): The model to which matching reactions are added.

    Returns:
        Dict[str, str]: Mapping of source metabolite IDs to target metabolite IDs.
    """
    target_model = input_target_model.copy()
    gen_ori_convert_metabolites: Dict[str, str] = {}

    # Match metabolites based on annotation overlap
    for gen_m in source_model.metabolites:
        for ori_m in target_model.metabolites:
            if annotations_overlap(gen_m.annotation, ori_m.annotation):
                logging.debug(f"{gen_m.id} matches {ori_m.id}")
                gen_ori_convert_metabolites[gen_m.id] = ori_m.id
                break

    # Copy reactions with mapped metabolites
    new_reactions = []
    for r in source_model.reactions:
        reaction = Reaction(r.id)
        reaction.name = r.name
        reaction.lower_bound = r.lower_bound
        reaction.upper_bound = r.upper_bound

        reac_meta_dict = {}
        for met, coeff in r.metabolites.items():
            mapped_id = gen_ori_convert_metabolites.get(met.id, met.id)
            try:
                meta = target_model.metabolites.get_by_id(mapped_id)
            except KeyError:
                meta = source_model.metabolites.get_by_id(met.id)
            reac_meta_dict[meta] = coeff

        reaction.add_metabolites(reac_meta_dict)
        new_reactions.append(reaction)

    target_model.add_reactions(new_reactions)
    logging.info(f"Added {len(new_reactions)} reactions to {target_model.id or 'target model'}")
    return target_model

In [38]:
to_add_metabolites = []
gen_ori_convert_metabolites = {}
for gen_m in model.metabolites:
    is_found = False
    for ori_m in biopath_iml1515.model.metabolites:
        if annotations_overlap(gen_m.annotation, ori_m.annotation):
            print(f'{gen_m.id} matches {ori_m.id}')
            is_found = True
            gen_ori_convert_metabolites[gen_m.id] = ori_m.id
            break
    if not is_found:
        print(f'Could not find {gen_m.id}')

Could not find TARGET_0000000001_c
Could not find MNXM732923_c
Could not find CMPD_0000000011_c
MNXM26_c matches ac_c
WATER_c matches h2o_p
Could not find CMPD_0000000014_c
MNXM159_c matches 2obut_c
MNXM729302_c matches nh4_c
MNXM418_c matches acser_c
Could not find MNXM741485_c


In [39]:
gen_ori_convert_metabolites

{'MNXM26_c': 'ac_c',
 'WATER_c': 'h2o_p',
 'MNXM159_c': '2obut_c',
 'MNXM729302_c': 'nh4_c',
 'MNXM418_c': 'acser_c'}

In [68]:
biopath_iml1515 = ModelBuilder(path_to_model='iML1515.xml', use_progressbar=True)

Updating the metabolite annotations: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1877/1877 [02:41<00:00, 11.61it/s]
Updating the reaction annotations: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2712/2712 [00:44<00:00, 61.56it/s]


In [69]:
from cobra import Model, Reaction, Metabolite

to_add_metabolites = []
gen_ori_convert_metabolites = {}
for gen_m in model.metabolites:
    is_found = False
    for ori_m in biopath_iml1515.model.metabolites:
        if annotations_overlap(gen_m.annotation, ori_m.annotation):
            print(f'{gen_m.id} matches {ori_m.id}')
            is_found = True
            gen_ori_convert_metabolites[gen_m.id] = ori_m.id
            break
    if not is_found:
        print(f'Could not find {gen_m.id}')

to_add_reacs = []
for r in model.reactions:
    reaction = Reaction(r.id)
    reaction.name = r.name
    reaction.lower_bound = r.lower_bound
    reaction.upper_bound = r.upper_bound

    reac_meta_dict = {}
    for i in r.metabolites:
        try:
            meta = biopath_iml1515.model.metabolites.get_by_id(
                gen_ori_convert_metabolites.get(i.id,i.id)
            )
        except KeyError:
            meta = model.metabolites.get_by_id(i.id)
        reac_meta_dict[meta] = r.metabolites[i]
    reaction.add_metabolites(reac_meta_dict)
    to_add_reacs.append(reaction)

biopath_iml1515.model.add_reactions(to_add_reacs)

In [73]:
model.reactions

[<Reaction TRS_0_0_5 at 0x30a3e3ac0>,
 <Reaction TRS_0_1_0 at 0x30a3e3cd0>,
 <Reaction TRS_0_2_29 at 0x30a3e3e80>]

In [74]:
biopath_iml1515.model.reactions.get_by_id('TRS_0_1_0')

0,1
Reaction identifier,TRS_0_1_0
Name,
Memory address,0x177ffd390
Stoichiometry,2obut_c + CMPD_0000000014_c + nh4_c --> CMPD_0000000011_c + h2o_p  2-Oxobutanoate + CMPD_0000000014 + Ammonium --> CMPD_0000000011 + H2O H2O
GPR,
Lower bound,0.0
Upper bound,1000.0


In [75]:
biopath_iml1515.model.reactions.get_by_id('TRS_0_1_0').metabolites

{<Metabolite CMPD_0000000014_c at 0x177ffec50>: -1,
 <Metabolite h2o_p at 0x178096860>: 1,
 <Metabolite 2obut_c at 0x179f89db0>: -1,
 <Metabolite nh4_c at 0x52dc45240>: -1,
 <Metabolite CMPD_0000000011_c at 0x177fff580>: 1}

In [76]:
model.reactions.get_by_id('TRS_0_1_0')

0,1
Reaction identifier,TRS_0_1_0
Name,
Memory address,0x30a3e3cd0
Stoichiometry,CMPD_0000000014_c + MNXM159_c + MNXM729302_c --> CMPD_0000000011_c + WATER_c  CMPD_0000000014 + 2-oxobutanoate + NH4(+) --> CMPD_0000000011 + H2O
GPR,
Lower bound,0.0
Upper bound,1000.0


In [55]:
model.add_reactions([reaction])

0,1
Reaction identifier,TRS_0_0_5
Name,
Memory address,0x178158d90
Stoichiometry,CMPD_0000000011_c + MNXM26_c --> MNXM732923_c + TARGET_0000000001_c  CMPD_0000000011 + acetate --> L-selenocysteine + TARGET_0000000001
GPR,
Lower bound,0.0
Upper bound,1000.0


In [43]:
model.reactions.get_by_id('dwsdsd')

KeyError: 'dwsdsd'