# Autopacmen
This script collects the scripts from the Autopacmen workflow

In [1]:
import pathlib
import os
import cobra
import sys

In [2]:
# your script should be located like shown below
'''
project/
├── autopacmen/
│   ├── __init__.py
│   ├── ... .py
│   └── submodules/
│       ├── __init__.py
│       └── ... .py
├── thesis_scripts/
│   ├── models/
│   │   └── model.mat/model.xml
│   └── sMOMENT
│       └── autopacmen_workflow.ipynb
├── datasets/
│   ├── autopacmen_setup/
│   │   ├── bigg_models_metabolites.txt
│   │   └── brenda_2023_1.txt
│   ├── autopacmen_input/
│   └── autopacmen_output/
'''

# enabling local imports
sys.path.append(str(pathlib.Path(os.getcwd()).parent.parent / "autopacmen"))


# save preparation dataset folder
setup_dir = pathlib.Path(os.getcwd()).parent.parent / "datasets" / "autopacmen_setup"
input_dir = pathlib.Path(os.getcwd()).parent.parent / "datasets" / "autopacmen_input"
print(setup_dir)

c:\Users\phili\OneDrive - uni-bielefeld.de\Uni\SoSe24\BA\datasets\autopacmen_setup


In [3]:
# preparation for ftINIT models

# save model directory
model_dir = pathlib.Path(os.getcwd()).parent / "models" / "A375_ftINIT_model.mat" # change for different model
xml_model_dir = model_dir.with_stem(model_dir.stem + "_prep").with_suffix(".xml")

# if neccessary: load matlab model, assign ec-codes and save as sbml model
if not xml_model_dir.is_file():
    model = cobra.io.load_matlab_model(model_dir)
    ihuman_xml = cobra.io.read_sbml_model(model_dir.parent / "Human-GEM.xml")
    # iterating over reactions in the model and assigning the respective annotations lost in the matlab representation
    for reaction in model.reactions:
        model.reactions.get_by_id(reaction.id).annotation.update(ihuman_xml.reactions.get_by_id(reaction.id).annotation)
    for gene in model.genes:
        model.genes.get_by_id(gene.id).annotation.update(ihuman_xml.genes.get_by_id(gene.id).annotation)

    cobra.io.write_sbml_model(model, xml_model_dir)

In [4]:
# run data preparation scripts 

# specifying the input directories
bigg_dir = setup_dir / "bigg_models_metabolites.txt"
brenda_dir = setup_dir / "brenda_2023_1.txt"

# converting BIGG metabolites file to JSON
# inputs:   - path to the "bigg_models_metabolites.txt" file (as string)
#           - path to the folder where the JSON file should be saved in (as string)
# outputs:  - JSON file named "bigg_id_name_mapping.json"
# BIGG metabolites data from downloaded from http://bigg.ucsd.edu/data_access (accessed: 09/12/2024)
from autopacmen.submodules.parse_bigg_metabolites_file import parse_bigg_metabolites_file
parse_bigg_metabolites_file(
    str(bigg_dir), 
    str(setup_dir)
)

# converting sMOMENT relevant data from Brenda textfile to JSON
# inputs:   - path to the "brenda_2023_1.txt" file (as string)
#           - path to the "bigg_id_name_mapping.json" file generated in the previous script (as string)
#           - path to the file (not the folder!) where the JSON file should be saved in (as string)
# outputs:  - JSON file named "brenda.json" (if not stated otherwise in the inputs)
# Brenda database data downloaded from https://www.brenda-enzymes.org/download.php (accessed: 09/12/2024)
from autopacmen.submodules.parse_brenda_textfile import parse_brenda_textfile
parse_brenda_textfile(
    str(brenda_dir), 
    str(setup_dir), 
    str(setup_dir / "brenda.json")
)

# makes the previous "brenda.json" file specific for the model
# inputs:   - path to the model sbml file
#           - path to the previous "brenda.json" file (as string)
#           - path to output file (not folder)
# outputs:  - JSON file named "brenda_model_specific.json" (if not stated otherwise in the inputs)
from autopacmen.submodules.parse_brenda_json_for_model import parse_brenda_json_for_model
parse_brenda_json_for_model(
    str(xml_model_dir), 
    str(setup_dir / "brenda.json"), 
    str(setup_dir / "brenda_model_specific.json")
)





In [5]:
# maps kcat values derived from the SABIO-RK database to model EC-numbers, 
# allowing for "wildcards" (meaning using values from related proteins if no kcat values are available)
# inputs:   - path to the model sbml file
#           - path to output file (not folder)
#           - path to the previous "bigg_id_name_mapping.json" file (as string)
# outputs:  - JSON file named "sabio_rk.json" (if not stated otherwise in the inputs) (as string)
# SABIO-RK was accessed on 10/12/2024
from autopacmen.submodules.parse_sabio_rk_for_model import parse_sabio_rk_for_model_with_sbml
parse_sabio_rk_for_model_with_sbml(
    xml_model_dir, 
    str(setup_dir / "sabio_rk.json"), 
    str(setup_dir / "bigg_id_name_mapping.json")
)

KeyboardInterrupt: 

In [6]:
# combines kcat values mapped to EC-numbers from SABIO-RK and BRENDA into one json
# inputs:   - path to SABIO-RK json mapping kcat values to EC-Numbers of the reactions in the model (as string)
#           - path to brenda json kcat database (as string)
#           - path to the output file (not folder)
# output:   - combined json file named "sabio.json"
from autopacmen.submodules.create_combined_kcat_database import create_combined_kcat_database
create_combined_kcat_database(
    str(setup_dir / "sabio_rk.json"), 
    str(setup_dir / "brenda_model_specific.json"), 
    str(setup_dir / "sabio.json")
)

In [11]:
from autopacmen.submodules.get_initial_spreadsheets import get_initial_spreadsheets_with_sbml
get_initial_spreadsheets_with_sbml(str(xml_model_dir), str(input_dir), "A375")

INFO: Reaction MAR08360 does not have a KEGG ID annotation
INFO: Reaction MAR05396 does not have a KEGG ID annotation
INFO: Reaction MAR09727 does not have a KEGG ID annotation
INFO: Reaction MAR05397 does not have a KEGG ID annotation
INFO: Reaction MAR05398 does not have a KEGG ID annotation
INFO: Reaction MAR05399 does not have a KEGG ID annotation
INFO: Reaction MAR05400 does not have a KEGG ID annotation
INFO: Reaction MAR05401 does not have a KEGG ID annotation
INFO: Reaction MAR08578 does not have a KEGG ID annotation
INFO: Reaction MAR08591 does not have a KEGG ID annotation
INFO: Reaction MAR08592 does not have a KEGG ID annotation
INFO: Reaction MAR04130 does not have a KEGG ID annotation
INFO: Reaction MAR08762 does not have a KEGG ID annotation
INFO: Reaction MAR08727 does not have a KEGG ID annotation
INFO: Reaction MAR08728 does not have a KEGG ID annotation
INFO: Reaction MAR08500 does not have a KEGG ID annotation
INFO: Reaction MAR08501 does not have a KEGG ID annotati

In [None]:
# Creates a JSON with the protein masses from UniProt for all proteins given in the gene rules of the given metabolic model
# inputs:   - path to the model smbl file (as string)
#           - path to the project folder (as string)
#           - project name
# outputs:  - json file with the mapping called project name + "_protein_id_mass_mapping.json"

from autopacmen.submodules.get_protein_mass_mapping import get_protein_mass_mapping_with_sbml
get_protein_mass_mapping_with_sbml(
    str(xml_model_dir), 
    str(input_dir), 
    "A375"
)

{'O60762': ['ENSG00000000419'], 'Q9BTY2': ['ENSG00000001036'], 'P48506': ['ENSG00000001084'], 'Q16850': ['ENSG00000001630'], 'P28838': ['ENSG00000002549'], 'O14792': ['ENSG00000002587'], 'P19801': ['ENSG00000002726'], 'Q9NR63': ['ENSG00000003137'], 'Q9Y216': ['ENSG00000003987'], 'P52569': ['ENSG00000003989'], 'P54819': ['ENSG00000004455'], 'P28907': ['ENSG00000004468'], 'O14561': ['ENSG00000004779'], 'Q16654': ['ENSG00000004799'], 'Q86VW1': ['ENSG00000004809'], 'Q9UJS0': ['ENSG00000004864'], 'P02730': ['ENSG00000004939'], 'P05141': ['ENSG00000005022'], 'P52435': ['ENSG00000005075'], 'Q53FZ2': ['ENSG00000005187'], 'P05164': ['ENSG00000005381'], 'P27169': ['ENSG00000005421'], 'Q9UKG9': ['ENSG00000005469'], 'P21439': ['ENSG00000005471'], 'Q15119': ['ENSG00000005882'], 'Q09428': ['ENSG00000006071'], 'Q53H12': ['ENSG00000006530'], 'P43353': ['ENSG00000006534'], 'O75223': ['ENSG00000006625'], 'P51689': ['ENSG00000006756'], 'P41247': ['ENSG00000006757'], 'O95045': ['ENSG00000007001'], 'P43034

In [None]:
# maps kcat values from BRENDA/SABIO RK databases as well as optional custom values to the reactions in the model
# inputs:   - path to the SBML of the metabolic model
#           - path to the output folder 
#           - project name
#           - scientific name of the organism (used for the taxonomy-dependent search of kcat values)
#           - path to the SABIO-RK&BRENDA kcat<->reaction mapping JSON (from data_create_combined_kcat_database.py)
#           - *optional* path to the custom user-defined kcat<->protein JSON (default: "")
#           - type of kcat selection TEXT Can be "mean", "median" or "random"
# output:   - json with kcat values mapped to the reactions

from autopacmen.submodules.get_reactions_kcat_mapping import get_reactions_kcat_mapping

get_reactions_kcat_mapping(
    sbml_path=str(xml_model_dir),
    project_folder=str(input_dir),
    project_name='A375',
    organism='Homo sapiens',
    kcat_database_path=str(setup_dir / "sabio.json"),
    protein_kcat_database_path="",
    type_of_kcat_selection='mean'
)

# Applies the sMOMENT method on the given SBML
# inputs:   - 
from autopacmen.submodules.create_smoment_model_reaction_wise import create_smoment_model_reaction_wise_with_sbml

create_smoment_model_reaction_wise_with_sbml(
    input_sbml_path=str(xml_model_dir),
    output_sbml_name="A375_sMOMENT",
    project_folder=str(input_dir),
    project_name="A375",
    type_of_default_kcat_selection="mean"
)

ModuleNotFoundError: No module named 'autopacmen'