### Checks all models given in the dict

In [1]:
# imports
import cobra
import yaml

# config preperation
import sys # append path

sys.path.append('../scripts/')
import helperFunction as hf

config_name = 'model_config'
config_path = f'../config/{config_name}.yaml'

# load config
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)

In [None]:
# uses validation function from cobra and stores the results in a dataframe / csv
# takes 1.30 min
import cobra
import os  
import pandas as pd
import yaml

config_name = 'model_config'
config_path = f'../config/{config_name}.yaml'

# load config
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)

## load models
models = {
    'yli647_corr': config['models']['yli647_corr'], 
    'yli647_uncorr': config['models']['yli647_uncorr'], 
    'iYali4_corr': config['models']['yli4_corr'], 
    'iYali4_uncorr': config['models']['yli4_uncorr'], 
    'iYli_2.0_corr': config['models']['yli2.0_corr'], 
    #'iYli_2.0_uncorr': config['models']['yli2.0_uncorr'], 
    'iMK735_corr': config['models']['yliMK735_corr'], 
    'iMK735_uncorr': config['models']['yliMK735_uncorr'], 
    'iNL895_corr': config['models']['yliNL895_corr'],
    'iNL895_uncorr': config['models']['yliNL895_uncorr'],
    'PpaMBEL1254': config['models']['ppaMBEL1254'],
    'iYli21': config['models']['yli21'],
    'iMT1026v3': config['models']['ppa1026v3'],
    'iMT1026Chan2017': config['models']['ppa1026Chan'],
    'iLC915': config['models']['ppaiLC915'],
}

# create and store error dataframe:
error_dict = { 'Model_Name': [],
'SBML_FATAL': [],
 'SBML_ERROR': [],
 'SBML_SCHEMA_ERROR': [],
 'SBML_WARNING': [],
 'COBRA_FATAL': [],
 'COBRA_ERROR': [],
 'COBRA_WARNING': [],
 'COBRA_CHECK': []}

# iterate the models and store the errors in the error_dict
for model_name, model_path in models.items():
    # load and validate model
    model, report = cobra.io.validate_sbml_model(model_path)
    error_dict['Model_Name'].append(model_name)
    for key, value in report.items():
        error_dict[key].append(value)
error_df = pd.DataFrame.from_dict(error_dict)

os.makedirs('/'.join(config['results']['model_errors'].split('/')[0:-1]), exist_ok=True)  
# save error_df
error_df.to_csv(config['results']['model_errors'], sep='\t', index=False)



In [3]:
# print the error for all models
## load models
models = {
    'yli647_corr': config['models']['yli647_corr'], 
    'yli647_uncorr': config['models']['yli647_uncorr'], 
    'iYali4_corr': config['models']['yli4_corr'], 
    'iYali4_uncorr': config['models']['yli4_uncorr'], 
    'iYli_2.0_corr': config['models']['yli2.0_corr'], 
    #'iYli_2.0_uncorr': config['models']['yli2.0_uncorr'], 
    'iMK735_corr': config['models']['yliMK735_corr'], 
    'iMK735_uncorr': config['models']['yliMK735_uncorr'], 
    'iNL895_corr': config['models']['yliNL895_corr'],
    'iNL895_uncorr': config['models']['yliNL895_uncorr'],
    'PpaMBEL1254': config['models']['ppaMBEL1254'],
    'iYli21': config['models']['yli21'],
    'iMT1026v3': config['models']['ppa1026v3'],
    'iMT1026Chan2017': config['models']['ppa1026Chan'],
    'iLC915': config['models']['ppaiLC915'],
}

# iterater models dictionary
for model_name, model_path in models.items():
    # load and validate model
    model, report = cobra.io.validate_sbml_model(model_path)
    print(model_name)
    for key, value in report.items():
        if "fatal" in key.lower() or "error" in key.lower():
            print(f'{key}: {value}')
    # check if model is non type:
    if isinstance(model, type(None)):
        print('Model is NoneType')
    print('----------------------------------')
## No Error and Warning

# iYli21, iYli647 iYL: no error and no worning

## No Error but Warning
# iYali4, iYli_2.0, iMK735:
# 'No objective in listOfObjectives'
# iNL895: many reactions without any reactants or products
# iMT1026v3: Formula in the notes section is discouraged

## Error
# iMT1026Chan2017: Error in chemical formula



yli647_corr
SBML_FATAL: []
SBML_ERROR: []
SBML_SCHEMA_ERROR: []
COBRA_FATAL: []
COBRA_ERROR: []
----------------------------------
yli647_uncorr
SBML_FATAL: []
SBML_ERROR: ["E0 (Error): XML content (core, L13765); Missing required attribute; Missing a required XML attribute. The http://www.sbml.org/sbml/level2 reaction attribute 'id' is required.\n", "E1 (Error): XML content (core, L13838); Missing required attribute; Missing a required XML attribute. The http://www.sbml.org/sbml/level2 reaction attribute 'id' is required.\n", "E2 (Error): XML content (core, L13765); Missing required attribute; Missing a required XML attribute. The http://www.sbml.org/sbml/level2 reaction attribute 'id' is required.\n", "E3 (Error): XML content (core, L13838); Missing required attribute; Missing a required XML attribute. The http://www.sbml.org/sbml/level2 reaction attribute 'id' is required.\n"]
SBML_SCHEMA_ERROR: []
COBRA_FATAL: []
COBRA_ERROR: ['Required attribute \'id\' cannot be found or parsed in

In [4]:
# check 'PpaMBEL1254' with validation function
model, report = cobra.io.validate_sbml_model(config['models']['ppaMBEL1254'])

# for key, value in report.items():
#     if "fatal" in key.lower() or "error" in key.lower():
#         print(f'{key}: {value}')
# # is not valid SBML

# set the objective for the model and write it again (default objective: R01288)
model.objective = 'R01288'
cobra.io.write_sbml_model(model, config['models']['ppaMBEL1254'])


In [5]:
# iMK735_corr, iYli_2.0_corr, iNL895_corr
# config['models']['yliMK735_corr'], # biomass_C
# config['models']['yli2.0_corr'], # biomass_C
# config['models']['yliNL895_corr'], biomass_C
models = [config['models']['yliMK735_corr'], config['models']['yli2.0_corr'], config['models']['yliNL895_corr']]
for model_path in models:
    model, report = cobra.io.validate_sbml_model(model_path)
    if not isinstance(model, type(None)):
        model.objective = 'biomass_C'
        cobra.io.write_sbml_model(model, model_path)
        print(f'{model_path} is written')
    else:
        print(f'{model_path} is NoneType')


../data/models/Yarrowia_lipolytica/iMK735/iMK735_corr.xml is written
../data/models/Yarrowia_lipolytica/iYli_2.0/iYli_2.0_corr_corr_v2.xml is written
../data/models/Yarrowia_lipolytica/iNL895/iNL895_corr.xml is written


## Investigate Models and their properties

### Number of Reactions, Metabolites, Genes and Compartments -> Table

In [5]:
# helfende funktionen
def numberDf(model, _modelPrefix, _outputDir, _sep, _createOutput, _verbose):
    '''Check numbers of genes, metabolites and reactions and safe them to dataframe'''
    # investigate number and print numbers
    modelName = model.name
    reacNum = len(model.reactions)
    metaboNum = len(model.metabolites)
    geneNum = len(model.genes)
    if _verbose:
        print('The {name} has {reacNum} reactions, {metaboNum} metabolites and {geneNum} genes'.format(name=modelName, reacNum=reacNum, metaboNum=metaboNum, geneNum=geneNum))
    # generate dataframe
    tupleList = [(_modelPrefix,geneNum,metaboNum,reacNum)]
    modelNumbers = pd.DataFrame(tupleList, columns=['modelName','#Genes','#Metabolites','#Reactions'])
    if _createOutput:
        if _verbose:
            print('The model numbers will be saved to a .csv file.', "Path: ", _outputDir+_modelPrefix+'_modelNumbers.csv')
        modelNumbers.to_csv(_outputDir + _modelPrefix + '_modelNumbers.csv', index=False, sep=_sep)
    return modelNumbers

In [17]:
iYli21_model.medium.annotations.compartment_shortlist

AttributeError: 'dict' object has no attribute 'annotations'

In [12]:
# check the model properties and build a dataframe
seperator = '\t'

# Funktionsdefinition: numberDf(model, _modelPrefix, _outputDir, _sep, _createOutput, _verbose)
# #### TEST ####
# iYli21_model = cobra.io.read_sbml_model(config['models']['yli21'])
# test_df = numberDf(iYli21_model, 'iYli21', config['results']['model_numbers_directory'], seperator, True, True)

### next: check how to find compartment number

numberDFs = []
# iterate all models 
for model_name, model_path in models.items():
    # load and validate model
    model, report = cobra.io.validate_sbml_model(model_path)
    # check if model is nontype 
    if isinstance(model,type(None)):
        print(f'{model_name} is NoneType')
        continue
    numberDFs.append(numberDf(model,model_name, config['results']['model_numbers_directory'], seperator, True, False))

# concat all dataframes
modelNumbers = pd.concat(numberDFs)

outpath_numbers = config['results']['model_numbers_directory'] + 'workingModelNumbers.csv'
# save modelNumbers
modelNumbers.to_csv(outpath_numbers, sep='\t', index=False)

        

iMT1026Chan2017 is NoneType


IsADirectoryError: [Errno 21] Is a directory: '../workflow/results/quality/'

### Find reactions for Yli and Ppa models

In [7]:
# load models
iYli21_model = cobra.io.read_sbml_model(models['iYli21'])


In [13]:
# oxcaloacetate (https://www.kegg.jp/entry/C00036)
# participates in malate dehydrogenase (R533) (100% producing)
mdh = iYli21_model.reactions.get_by_id('R533')
mdh # => m77[m]: oxaloacetate[m]
# oxcaloacetate = iYli21_model.metabolites.get_by_id('m77[m]')
# # oxcaloacetate.summary() #=> 100% produced by R533
# hf.formulaWithNames(mdh) # 'NAD_C21H27N7O14P2 + (S)-malate_C4H6O5 <=> H+_p+1 + NADH_C21H29N7O14P2 + oxaloacetate_C4H4O5'

0,1
Reaction identifier,R533
Name,malate dehydrogenase
Memory address,0x2a5a24ca0
Stoichiometry,m27[m] + m538[m] <=> m28[m] + m30[m] + m77[m]  NAD_C21H27N7O14P2 + (S)-malate_C4H6O5 <=> H+_p+1 + NADH_C21H29N7O14P2 + oxaloacetate_C4H4O5
GPR,YALI1D20676g or YALI1E17214g
Lower bound,-1000.0
Upper bound,1000.0
