# Autopacmen Workflow
This script collects the scripts from the Autopacmen workflow

In [1]:
# setup code: execute and continue below

# general imports
import pathlib
import os
import cobra
import sys

# enabling local imports of autopacmen scripts
sys.path.append(str(pathlib.Path(os.getcwd()).parent.parent / "autopacmen"))


# create folders for input & output data
current_dir = pathlib.Path(os.getcwd())
setup_dir = current_dir.parent / "datasets" / "sMOMENT" / "autopacmen_setup"
input_dir = current_dir.parent / "datasets" / "sMOMENT" / "autopacmen_input"
os.makedirs(setup_dir, exist_ok=True)
os.makedirs(input_dir, exist_ok=True)
os.makedirs(input_dir.parent / "autopacmen_output", exist_ok=True)

In [6]:
'''
In order for sMOMENT to work, your project folder should look like this:

project/
├── autopacmen/
│   ├── __init__.py
│   ├── ... .py
│   └── submodules/
│       ├── __init__.py
│       └── ... .py
├── thesis_scripts/
│   ├── models/
│   │   └── your_ftINIT_model.mat
│   └── sMOMENT/
│       └── autopacmen_workflow.ipynb


Before executing the scripts, there are a few necessary precautions:
-   download BIGG metabolites data from from http://bigg.ucsd.edu/data_access (last accessed: 09/12/2024) and 
    save it into "datasets/sMOMENT/autopacmen_setup/bigg_models_metabolites.txt"
-   download BRENDA database data from https://www.brenda-enzymes.org/download.php (last accessed: 09/12/2024) and 
    save it into "datasets/sMOMENT/autopacmen_setup/brenda_2023_1.txt"
-   input the name of your model created in ftINIT and the project name below:
'''
model_name = "A375_ftINIT_1+1_model.xml"
project_name = "A375_11"
output_model_name = "A375_1+1_sMOMENT_model.xml"




In [3]:
# get model directory
model_dir = current_dir.parent / "models" / model_name

In [4]:
# run data preparation scripts 

# specifying the input directories
bigg_dir = setup_dir / "bigg_models_metabolites.txt"
brenda_dir = setup_dir / "brenda_2023_1.txt"

# converting BIGG metabolites file to JSON
# inputs:   - path to the "bigg_models_metabolites.txt" file (as string)
#           - path to the folder where the JSON file should be saved in (as string)
# outputs:  - JSON file named "bigg_id_name_mapping.json"
# BIGG metabolites data from downloaded from http://bigg.ucsd.edu/data_access (accessed: 09/12/2024)
from autopacmen.submodules.parse_bigg_metabolites_file import parse_bigg_metabolites_file
parse_bigg_metabolites_file(
    str(bigg_dir), 
    str(setup_dir)
)

# converting sMOMENT relevant data from Brenda textfile to JSON
# inputs:   - path to the "brenda_2023_1.txt" file (as string)
#           - path to the "bigg_id_name_mapping.json" file generated in the previous script (as string)
#           - path to the file (not the folder!) where the JSON file should be saved in (as string)
# outputs:  - JSON file named "brenda.json" (if not stated otherwise in the inputs)
# Brenda database data downloaded from https://www.brenda-enzymes.org/download.php (accessed: 09/12/2024)
from autopacmen.submodules.parse_brenda_textfile import parse_brenda_textfile
parse_brenda_textfile(
    str(brenda_dir), 
    str(setup_dir), 
    str(setup_dir / "brenda.json")
)

# makes the previous "brenda.json" file specific for the model
# inputs:   - path to the model sbml file
#           - path to the previous "brenda.json" file (as string)
#           - path to output file (not folder)
# outputs:  - JSON file named "brenda_model_specific.json" (if not stated otherwise in the inputs)
from autopacmen.submodules.parse_brenda_json_for_model import parse_brenda_json_for_model
parse_brenda_json_for_model(
    str(model_dir), 
    str(setup_dir / "brenda.json"), 
    str(setup_dir / "brenda_model_specific.json")
)





In [8]:
# maps kcat values derived from the SABIO-RK database to model EC-numbers, 
# allowing for "wildcards" (meaning using values from related proteins if no kcat values are available)
# inputs:   - path to the model sbml file
#           - path to output file (not folder)
#           - path to the previous "bigg_id_name_mapping.json" file (as string)
# outputs:  - JSON file named "sabio_rk.json" (if not stated otherwise in the inputs) (as string)
# SABIO-RK was accessed on 10/12/2024
from autopacmen.submodules.parse_sabio_rk_for_model import parse_sabio_rk_for_model_with_sbml
parse_sabio_rk_for_model_with_sbml(
    model_dir, 
    str(setup_dir / "sabio_rk.json"), 
    str(setup_dir / "bigg_id_name_mapping.json")
)

Starting EC numbers kcat search in SABIO-RK...
Wildcard level 0...
['2.1.1.201', '1.1.1.9', '4.1.1.6', '2.7.8.11', '2.4.2.11', '2.7.1.140', '3.6.3.8', '2.1.2.2', '1.5.8.4', '1.1.1.181', '2.6.1.1', '3.5.1.4', '2.8.2.4', '1.13.11.33', '6.1.1.3', '5.4.2.6', '1.14.13.30', '1.4.3.10', '4.1.1.50', '2.1.2.10', '2.3.1.20', '2.3.1.51', '1.13.11.31', '1.14.99.-', '3.1.6.8', '1.5.1.2', '1.1.1.41', '2.4.2.3', '3.2.1.46', '4.1.2.13', '2.4.1.101', '2.7.4.10', '2.4.1.212', '3.6.5.4', '1.3.1.20', '2.7.1.74', '3.5.3.1', '4.2.1.74', '3.4.11.1', '2.4.1.175', '3.5.1.49', '1.13.11.52', '3.5.4.16', '3.1.3.53', '3.1.4.17', '3.2.1.31', '2.7.11.2', '1.1.1.30', '3.4.11.6', '2.3.1.50', '1.14.99.9', '1.1.99.1', '1.1.1.237', '2.4.1.79', '1.11.2.2', '2.6.1.5', '1.17.1.4', '4.1.3.4', '3.4.19.9', '1.3.3.2', '3.2.1.23', '1.1.1.153', '2.4.1.-', '2.6.1.22', '2.3.1.86', '2.5.1.22', '2.7.1.24', '2.1.1.103', '1.13.11.27', '2.3.2.2', '1.13.11.6', '1.3.8.8', '1.1.1.178', '2.7.8.1', '3.6.5.6', '1.14.11.2', '3.1.1.1', '2.4.2.1

In [9]:
# combines kcat values mapped to EC-numbers from SABIO-RK and BRENDA into one json
# inputs:   - path to SABIO-RK json mapping kcat values to EC-Numbers of the reactions in the model (as string)
#           - path to brenda json kcat database (as string)
#           - path to the output file (not folder)
# output:   - combined json file named "sabio.json"
from autopacmen.submodules.create_combined_kcat_database import create_combined_kcat_database
create_combined_kcat_database(
    str(setup_dir / "sabio_rk.json"), 
    str(setup_dir / "brenda_model_specific.json"), 
    str(setup_dir / "sabio.json")
)

In [10]:
# creates xlsx spreadsheets for the input information
# inputs:   - path to SBML model 
#           - path to the folder where the sheets should be created in
#           - project name
# outputs:  - xlsx spreadsheets to input data about the organism (only the enzyme_stoichiometries and protein_data sheets are essential)
from autopacmen.submodules.get_initial_spreadsheets import get_initial_spreadsheets_with_sbml
get_initial_spreadsheets_with_sbml(
    str(model_dir), 
    str(input_dir), 
    str(project_name)
)

INFO: Reaction MAR08360 does not have a KEGG ID annotation
INFO: Reaction MAR05396 does not have a KEGG ID annotation
INFO: Reaction MAR09727 does not have a KEGG ID annotation
INFO: Reaction MAR05397 does not have a KEGG ID annotation
INFO: Reaction MAR05398 does not have a KEGG ID annotation
INFO: Reaction MAR05399 does not have a KEGG ID annotation
INFO: Reaction MAR05400 does not have a KEGG ID annotation
INFO: Reaction MAR05401 does not have a KEGG ID annotation
INFO: Reaction MAR08578 does not have a KEGG ID annotation
INFO: Reaction MAR08591 does not have a KEGG ID annotation
INFO: Reaction MAR08592 does not have a KEGG ID annotation
INFO: Reaction MAR04130 does not have a KEGG ID annotation
INFO: Reaction MAR08762 does not have a KEGG ID annotation
INFO: Reaction MAR08727 does not have a KEGG ID annotation
INFO: Reaction MAR08728 does not have a KEGG ID annotation
INFO: Reaction MAR08500 does not have a KEGG ID annotation
INFO: Reaction MAR08501 does not have a KEGG ID annotati

In [12]:
# Creates a JSON with the protein masses from UniProt for all proteins given in the gene rules of the given metabolic model
# inputs:   - path to the model smbl file (as string)
#           - path to the project folder (as string)
#           - project name
# outputs:  - json file with the mapping called project name + "_protein_id_mass_mapping.json"

from autopacmen.submodules.get_protein_mass_mapping import get_protein_mass_mapping_with_sbml
get_protein_mass_mapping_with_sbml(
    str(model_dir), 
    str(input_dir), 
    str(project_name)
)

{'O60762': ['ENSG00000000419'], 'Q9BTY2': ['ENSG00000001036'], 'P48506': ['ENSG00000001084'], 'Q16850': ['ENSG00000001630'], 'P28838': ['ENSG00000002549'], 'O14792': ['ENSG00000002587'], 'P19801': ['ENSG00000002726'], 'Q9NR63': ['ENSG00000003137'], 'Q9Y216': ['ENSG00000003987'], 'P52569': ['ENSG00000003989'], 'P54819': ['ENSG00000004455'], 'P28907': ['ENSG00000004468'], 'O14561': ['ENSG00000004779'], 'Q16654': ['ENSG00000004799'], 'Q86VW1': ['ENSG00000004809'], 'Q9UJS0': ['ENSG00000004864'], 'P02730': ['ENSG00000004939'], 'P05141': ['ENSG00000005022'], 'P52435': ['ENSG00000005075'], 'Q53FZ2': ['ENSG00000005187'], 'P05164': ['ENSG00000005381'], 'P27169': ['ENSG00000005421'], 'Q9UKG9': ['ENSG00000005469'], 'P21439': ['ENSG00000005471'], 'Q15119': ['ENSG00000005882'], 'Q09428': ['ENSG00000006071'], 'Q53H12': ['ENSG00000006530'], 'P43353': ['ENSG00000006534'], 'O75223': ['ENSG00000006625'], 'P51689': ['ENSG00000006756'], 'P41247': ['ENSG00000006757'], 'O95045': ['ENSG00000007001'], 'P43034

In [13]:
# maps kcat values from BRENDA/SABIO RK databases as well as optional custom values to the reactions in the model
# inputs:   - path to the SBML of the metabolic model
#           - path to the output folder 
#           - project name
#           - scientific name of the organism (used for the taxonomy-dependent search of kcat values)
#           - path to the SABIO-RK&BRENDA kcat<->reaction mapping JSON (from data_create_combined_kcat_database.py)
#           - *optional* path to the custom user-defined kcat<->protein JSON (default: "")
#           - type of kcat selection TEXT Can be "mean", "median" or "random"
# output:   - json with kcat values mapped to the reactions

from autopacmen.submodules.get_reactions_kcat_mapping import get_reactions_kcat_mapping

get_reactions_kcat_mapping(
    sbml_path=str(model_dir),
    project_folder=str(input_dir),
    project_name=str(project_name),
    organism='Homo sapiens',
    kcat_database_path=str(setup_dir / "sabio.json"),
    protein_kcat_database_path="",
    type_of_kcat_selection='mean'
)

***
Reaction: MAR03905
Forward kcat: 2.569637839063218
Reverse kcat: 2.569637839063218

***
Reaction: MAR03907
Forward kcat: 3.944748461345614
Reverse kcat: 3.944748461345614

***
Reaction: MAR04097
Forward kcat: 107.02857142857142
Reverse kcat: 107.02857142857142

***
Reaction: MAR04099
Forward kcat: 107.02857142857142
Reverse kcat: 107.02857142857142

***
Reaction: MAR04108
Forward kcat: 107.02857142857142
Reverse kcat: 107.02857142857142

***
Reaction: MAR04133
Forward kcat: 107.02857142857142
Reverse kcat: 107.02857142857142

***
Reaction: MAR04281
Forward kcat: 220.87253564705884
Reverse kcat: 220.87253564705884

***
Reaction: MAR04388
Forward kcat: 220.87253564705884
Reverse kcat: 220.87253564705884

***
Reaction: MAR04283
Forward kcat: 8.2
Reverse kcat: 8.2

***
Reaction: MAR08357
Forward kcat: 18.170115717216483
Reverse kcat: 18.170115717216483

***
Reaction: MAR04379
Forward kcat: 241.21004633333334
Reverse kcat: 241.21004633333334

***
Reaction: MAR04301
Forward kcat: 241.210

In [None]:
# Applies the sMOMENT method on the given SBML
# inputs:   - path to the input SBML model
#           - name of the output SBML (protein-constraint-enhanced model)
#           - project folder containing reaction<->kcat mapping, protein<->mass mapping and enzyme stoichiometry spreadsheet
#           - project name
#           - excluded reactions for which the pseudometabolite of the protein pool shall not be introduced
#           - type of default kcat selection (can be "mean", "median" or "random")
# output:   - protein-constraint-enhanced version of the input model

from autopacmen.submodules.create_smoment_model_reaction_wise import create_smoment_model_reaction_wise_with_sbml
create_smoment_model_reaction_wise_with_sbml(
    input_sbml_path=str(model_dir),
    output_sbml_name=str(output_model_name),
    project_folder=str(input_dir),
    project_name=str(project_name),
    excluded_reactions=[], # reactions can be excluded from the pseudometabolite constrain
    type_of_default_kcat_selection="mean"
)

# *** Important Note ***
# Because of the size of the Human1 model, this step might take days to compute on a standard computer. This is why computation on 
# a cluster is recommended. For this, it is easiest to execute the script from the console using this command:

# python modeling_create_smoment_model.py 
# –-input_sbml_path <project_directory>/thesis_scrips/models/<model_sbml_name>.xml 
# --output_sbml_name <output_model_name>.xml 
# --project_folder <project_directory>/thesis_scripts/datasets/autopacmen/autopacmen_input/ 
# --project_name <project_name> 
# --excluded_reactions <semicolon seperated list of reactions to exclude> 
# --type_of_default_kcat_selection 'mean'