### Setup

In [None]:
import os, sys
sys.path.append('..')

from cobra import io
from cobra.core import Model, Reaction, Gene, GPR
from scripts.helpers.model import rxn_in_model

# Load models

# Load wildtype from manual directory (adjust path for notebooks directory)
# Use io.read_sbml_model for local files instead of io.load_model
wildtype = io.read_sbml_model('../data/fill/xmls/MNL_iCre1355_auto_GAPFILL.xml')

models = {
    "Wildtype": wildtype
}

# Get altered reactions of wildtype - using correct directory name
altered_dir = '../data/altered/xmls/MNL_iCre1355_auto_GAPFILL/'

for root, dirs, files in os.walk(altered_dir):

    # Exclude the /h directory from search
    if root.endswith('/h'):
        continue

    for file in files:
        if file.endswith('.xml'):
            # Get file name and remove .xml extension
            model_name = file[:-4]
            full_path = os.path.join(root, file)
            print(f"Loading model: {model_name} from {full_path}")
            models[model_name] = io.read_sbml_model(full_path)

No objective coefficients in model. Unclear what should be optimized


Loading model: SQE+MVA from ../data/altered/xmls/MNL_iCre1355_auto_GAPFILL/SQE+MVA.xml


No objective coefficients in model. Unclear what should be optimized


Loading model: SQE from ../data/altered/xmls/MNL_iCre1355_auto_GAPFILL/SQE.xml


No objective coefficients in model. Unclear what should be optimized


Loading model: SQS+MVA from ../data/altered/xmls/MNL_iCre1355_auto_GAPFILL/SQS+MVA.xml


No objective coefficients in model. Unclear what should be optimized


Loading model: SQS+SQE+MVA from ../data/altered/xmls/MNL_iCre1355_auto_GAPFILL/SQS+SQE+MVA.xml


No objective coefficients in model. Unclear what should be optimized


Loading model: SQS+SQE from ../data/altered/xmls/MNL_iCre1355_auto_GAPFILL/SQS+SQE.xml


No objective coefficients in model. Unclear what should be optimized


Loading model: SQS from ../data/altered/xmls/MNL_iCre1355_auto_GAPFILL/SQS.xml


No objective coefficients in model. Unclear what should be optimized


### Uniprot Gene IDs

In [21]:
"""
The following reactions need to have gene associations updated to UniProt IDs for GECKO Kcat values:
    - ALT_MVAS, ALT_MVAD, ALT_MVAE, ALT_MVD, ALT_MVK, ALT_PMK, ALT_IDLI (MVA pathway)
    - SS, SMO, ERG5, ERG3, ERG (Sterol pathway)
    - ALT_PSPPS2, ALT_SQS2, ALT_SQE2 (Overexpressions)
    - DXS, CMK, CMS, HDS (MEP pathway)
    - Additional SQS, SQE strains with their kcat values

Concepts to keep in mind:
    - One gene may regulate multiple reactions
    - One reaction may be regulated by the conjunction or disjunction of multiple genes

Ensure:
    - Did we cover ever necessary reaction for proper constraining?
"""
reac_gene = {
    # MVA Pathway -- Note: what strain are we using for MVA?
    'ALT_MVAS': ['MVAS'],
    'ALT_MVAD': ['MVAD'],
    'ALT_MVAE': ['MVAE'],
    'ALT_MVK': ['MVK'],
    'ALT_PMK': ['PMK'],
    'ALT_IDLI': ['IDLI'],
    # MEP Pathway
    'DXS': ['Cre07.g356350.t1.1'],
    'CMK': ['Cre02.g145050.t1.2'],
    'CMS': ['Cre16.g679669.t1.1'],
    'HDS': ['Cre12.g490350.t1.1'],
    # Sterol Pathway
    'SS': ['Cre03.g175250.t1.2', 'Cre03.g175250.t2.1'],
    'SMO': ['Cre17.g734644.t1.1'],
    'ERG5': ['ERG5'],
    'ERG3': ['ERG3'],
    'ERG': ['ERG4'],
    # Overexpressions
    'ALT_PSPPS2': ['SQS2'],
    'ALT_SQS2': ['SQS2'],
    'ALT_SQE2': ['SQE2'],
}

gene_uniprot = {
    # MVA Pathway
    'MVAS': 'Q9FD71', # https://www.uniprot.org/uniprotkb/Q9FD71/entry
    'MVAE': 'Q9FD65', # https://www.uniprot.org/uniprotkb/Q9FD65/publications
    'MVK': '',
    'PMK': '',
    'MVAD': '',
    'IDLI': '',
    # MEP Pathway
    'Cre07.g356350.t1.1': 'O81954', # https://www.uniprot.org/uniprotkb/O81954/entry
    'Cre02.g145050.t1.2': 'O81014', # https://www.uniprot.org/uniprotkb/O81014/entry (assumes CMK/ISPE of Arabidopsis thaliana)
    'Cre16.g679669.t1.1': '',
    'Cre12.g490350.t1.1': '',
    # Sterol Pathway
    'Cre03.g175250.t1.2': 'B4DWP0', # https://www.uniprot.org/uniprotkb/B4DWP0/entry (assumes sqs of homo sapiens)
    'Cre03.g175250.t2.1': 'B4DWP0', # ... (likewise)
    'Cre17.g734644.t1.1': 'P52020', # https://www.uniprot.org/uniprotkb/P52020/publications (assumes sqe of rattus norvegicus)
    # 'ERG3': '',
    # 'ERG5': '',
    # 'ERG4': '',
    # Overexpressions (these will be storing an object with different gene id for strains)
    'SQS2': {},
    'SQE2': {},
}

The following script rebuilds the model gene associations where the above stated gene ids are replaced with the uniprot equivalent:

In [32]:
def rxns_with_gene(m: Model, gene_id: str) -> list[Reaction]:

    return [rxn for rxn in m.reactions if gene_id in list(map(lambda x: x.id, rxn.genes))]

def rebuild_model(m: Model) -> Model:

    new_model: Model = m.copy()
    
    for gene in new_model.genes:

        if gene.id not in gene_uniprot.keys(): continue

        # Special handling for this case
        if gene.id in ['SQS2', 'SQE2']:
            continue
        
        old_gid = gene.id
        new_gid = gene_uniprot[old_gid]
        if new_gid == "": continue
        gene.id = new_gid

        print(f"New gene: {new_gid} (old: {old_gid})")

        # Find all reactions that are associated with the gene and update the gpr / gene_reaction
        rxns = rxns_with_gene(new_model, gene.id)
        for rxn in rxns:
            print(f"\t{rxn.gene_reaction_rule}")
            if old_gid in rxn.gene_reaction_rule:
                rxn.gene_reaction_rule = rxn.gene_reaction_rule.replace(old_gid, new_gid)
        print('\n')
    
    return new_model

# Models updated with UniProt IDs
updated_models = {k: rebuild_model(v) for k, v in models.items()}

New gene: P52020 (old: Cre17.g734644.t1.1)
	Cre17.g734644.t1.1


New gene: O81014 (old: Cre02.g145050.t1.2)
	Cre02.g145050.t1.2


New gene: O81954 (old: Cre07.g356350.t1.1)
	Cre07.g356350.t1.1


New gene: B4DWP0 (old: Cre03.g175250.t2.1)
	Cre03.g175250.t1.2 or Cre03.g175250.t2.1
	Cre03.g175250.t1.2 or Cre03.g175250.t2.1


New gene: B4DWP0 (old: Cre03.g175250.t1.2)
	Cre03.g175250.t1.2 or B4DWP0
	Cre03.g175250.t1.2 or B4DWP0


New gene: P52020 (old: Cre17.g734644.t1.1)
	Cre17.g734644.t1.1


New gene: O81014 (old: Cre02.g145050.t1.2)
	Cre02.g145050.t1.2


New gene: O81954 (old: Cre07.g356350.t1.1)
	Cre07.g356350.t1.1


New gene: B4DWP0 (old: Cre03.g175250.t2.1)
	Cre03.g175250.t1.2 or Cre03.g175250.t2.1
	Cre03.g175250.t1.2 or Cre03.g175250.t2.1


New gene: B4DWP0 (old: Cre03.g175250.t1.2)
	Cre03.g175250.t1.2 or B4DWP0
	Cre03.g175250.t1.2 or B4DWP0


New gene: Q9FD71 (old: MVAS)
	MVAS


New gene: Q9FD65 (old: MVAE)
	MVAE


New gene: P52020 (old: Cre17.g734644.t1.1)
	Cre17.g734644.t1.1


New

### Testing New Models

In [31]:
for k, m in updated_models.items():

    print(f"Model: {k}")

    for rid in reac_gene.keys():

        if not rxn_in_model(m, rid): continue

        rxn = m.reactions.get_by_id(rid)

        print(f"\tReaction: {rid}\n\t\tGPR:{rxn.gpr}\n\t\tGenes:{rxn.genes}\n\t\tGene-Reaction:{rxn.gene_reaction_rule}\n")

Model: Wildtype
	Reaction: DXS
		GPR:O81954
		Genes:frozenset({<Gene O81954 at 0x1fcea43b770>})
		Gene-Reaction:O81954

	Reaction: CMK
		GPR:O81014
		Genes:frozenset({<Gene O81014 at 0x1fcea43bef0>})
		Gene-Reaction:O81014

	Reaction: CMS
		GPR:Cre16.g679669.t1.1
		Genes:frozenset({<Gene Cre16.g679669.t1.1 at 0x1fcfa075cd0>})
		Gene-Reaction:Cre16.g679669.t1.1

	Reaction: HDS
		GPR:Cre12.g490350.t1.1
		Genes:frozenset({<Gene Cre12.g490350.t1.1 at 0x1fcf98afb00>})
		Gene-Reaction:Cre12.g490350.t1.1

	Reaction: SS
		GPR:B4DWP0 or B4DWP0
		Genes:frozenset({<Gene B4DWP0 at 0x1fcea43a4e0>})
		Gene-Reaction:B4DWP0 or B4DWP0

	Reaction: SMO
		GPR:P52020
		Genes:frozenset({<Gene P52020 at 0x1fcea439ca0>})
		Gene-Reaction:P52020

	Reaction: ERG5
		GPR:ERG5
		Genes:frozenset({<Gene ERG5 at 0x1fcfa148200>})
		Gene-Reaction:ERG5

	Reaction: ERG3
		GPR:ERG3
		Genes:frozenset({<Gene ERG3 at 0x1fcfa1481d0>})
		Gene-Reaction:ERG3

	Reaction: ERG
		GPR:ERG4
		Genes:frozenset({<Gene ERG4 at 0x1fcfa14823

### Save Models

In [None]:
from scripts.helpers.model import add_single_gene_reaction_pair, met_in_model

save_path = os.path.join('..', 'data', 'gecko', 'prev', 'xmls')
os.makedirs(save_path, exist_ok=True)
for k, m in updated_models.items():

    # Add ergosterol & orthophosphate sink reactions if not present
    # Add ergosterol exchange reaction
    ERG = "ergosterol_c"
    ERGEXCH = "ERGOSTEROLEXCH"
    if not rxn_in_model(m, ERGEXCH):
        add_single_gene_reaction_pair(
            model=m, 
            gene_id="EXCHERG_GENE",
            reaction_id=ERGEXCH,
            reaction_name="Ergosterol exchange (assumption)", 
            reaction_subsystem="Exchange", 
            metabolites=[(-1, ERG)],
            reversible=True
        )

    ORTHOP = "orthop_c"
    EXCHORTHOP = "ORTHOPHOSPHATEEXCH"
    if not rxn_in_model(m, EXCHORTHOP) and met_in_model(m, ORTHOP):
        add_single_gene_reaction_pair(
            model=m,
            gene_id="EXCHORTHOP",
            reaction_id=EXCHORTHOP,
            reaction_name="Orthophosphate exchange (assumption)",
            reaction_subsystem="Exchange",
            metabolites=[(-1, ORTHOP)],
            reversible=True
        )
    
    io.write_sbml_model(m, os.path.join(save_path, f"{k}_updated.xml"))

### Build GECKO Model

In [None]:
import geckopy as gp

### Gap-fill Missing Kcat

### FBA