In [3]:
#import packages
from libchebipy import ChebiEntity
import libchebipy
import pubchempy as pcp
import io
import requests
import json
import cobra
import functions
from functions import *

In [4]:
model=cobra.io.load_matlab_model('Data/iFpraus_v_1_0.mat')
info(model)

Metabolites :  833
Reactions :  1030
Genes :  602
Compartments :  {'c': '', 'e': ''}
Objective function : 
 Maximize
1.0*Biomass_FP - 1.0*Biomass_FP_reverse_ee33b 



### What are the different reactions
e: extracellular

c: cytosol

EX_ : exchange reaction
### Gifu Anaerobic Medium (mGAM) from:
https://hyserve.com/files/05433_GAM-Broth_Modified_final.pdf


- Peptone, Soya Peptone, Proteose Peptone: source of amino acids -> most likely all of them 
- Yeast extract, liver extract, meat extract, digested serum -> amino acids but also a lot of other crap, will leave this till the end 
- Dextrose -> same as glucose (https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:17634) and so -> glc_D
- Soluble starch -> starch1200 (potatoe starch)
- L-Tryptophane -> trp__L
- L-Cystein Hydrochloride -> cys_L and cl and h (hydrochloride<-> HCL becomes cl and h)
- Sodium Thioglycolate -> thiog and na1
- L-Arginine -> arg_L
- Vitamine K1 -> phllqne https://pubchem.ncbi.nlm.nih.gov/compound/Vitamin-K1#section=Depositor-Supplied-Synonyms 
- Hemin -> Iron and Chlorine  fe3 and cl  and perhaps ppp9 https://pubchem.ncbi.nlm.nih.gov/#query=3-%5B18-(2-carboxyethyl)-8%2C13-bis(ethenyl)-3%2C7%2C12%2C17-tetramethylporphyrin-21%2C23-diid-2-yl%5Dpropanoic%20acid%3Biron(3%2B)%3Bchloride 
- Potassium Dihydrogen Phosphate -> pi, k, h
- Sodium Chloride -> na1 cl

Digested serum : https://www.nebiolabs.com.au/-/media/catalog/datacards-or-manuals/p8108datasheet-lot0021306.pdf	

Generally useful page ; https://pubchem.ncbi.nlm.nih.gov/#query=Hemin

check also: https://opencobra.github.io/cobratoolbox/latest/tutorials/tutorialMetabotoolsI.html 

| Component                      | Concentration (g/L) |
|--------------------------------|---------------------|
| Bacteriological Peptome        | 5.0                 |
| Soy Peptome                    | 3.0                 |
| Proteose Peptome               | 5.0                 |
| Digested Serum                 | 10                  |
| Yeast Extract                  | 2.5                 |
| Meat extract                   | 2.2                 |
| Liver Extract                  | 1.2                 |
| Dextrose                       | 0.5                 |
| L-tryptophan                   | 0.2                 |
| l-cysteine hydrochloride       | 0.3                 |
| L-argenine                     | 1                   |
| Sodium thioglycolate           | 0.3                 |
| L-argenine                     | 1                   |
| Vitamin K1                     | 0.005               |
| Hemin                          | 0.010               |
| Potassium dihydrogen phosphate | 2.5                 |
| NaCl                           | 3                   |

In [5]:
# Get medium from model reactions
root_model = model.copy()
current_medium = [metab.lstrip('EX_') for metab in list(root_model.medium.keys())]

# Check which metabolites control growth by removing one at a time and seeing the results
def growth_stoppers(model, metabolites):
    results = {}
    for metabolite in metabolites:
        metabolites.remove(metabolite.lstrip('EX_'))
        test_model = medium(model, metabolites)
        results[metabolite.lstrip('EX_')] = test_model.slim_optimize()
        metabolites = [metab.lstrip("EX_") for metab in metabolites]
        metabolites.append(metabolite.lstrip('EX_'))

    import pandas as pd
    results_df = pd.DataFrame(results.items(), columns=["metab","growth"]).sort_values('growth', ascending=False)

    return results_df

growth_check_df = growth_stoppers(root_model, current_medium)

# Filtering the dataframe to find the metabolites that hinder growth the most
# (If growth is less than the mean, then it's considered to be enough to consider not removing it from the theoretical medium)
# Don't take these out of the medium if you can help it:
mandatory = growth_check_df[growth_check_df['growth'] < growth_check_df['growth'].mean()]
mandatory

Unnamed: 0,metab,growth
64,glyc3p(e),5.518982
119,pi(e),5.518982
110,nac(e),7.8792e-13
141,trp_L(e),6.209833e-15
87,k(e),3.821436e-15
41,cobalt2(e),3.598136e-15
43,cu2(e),3.598136e-15
50,fol(e),1.340104e-15
131,ser_L(e),-2.866077e-15
39,cl(e),-2.943929e-15


In [6]:
### Remove metabolites from the medium that can't be justified
# This part is where the metabolites for each component of the media are removeed
# This is entirely subjective and should be based on some kind of prior knowledge
# (Please don't delete lines, simply comment them out <3)

# current_medium.remove('EX_3mop(e)')
current_medium.remove('EX_4abz(e)')
current_medium.remove('EX_5oxpro(e)')
# current_medium.remove('EX_Lcyst(e)')
# current_medium.remove('EX_Lcystin(e)')
current_medium.remove('EX_ac(e)')
current_medium.remove('EX_acasp(e)')
current_medium.remove('EX_acgal(e)')
current_medium.remove('EX_acgalglcur(e)')
current_medium.remove('EX_acgam(e)')
current_medium.remove('EX_acglu(e)')
current_medium.remove('EX_acnam(e)')
# current_medium.remove('EX_ade(e)')
current_medium.remove('EX_adocbl(e)')
# current_medium.remove('EX_ala_D(e)')
# current_medium.remove('EX_ala_L(e)')
# current_medium.remove('EX_alaala(e)')
# current_medium.remove('EX_alaasp(e)')
# current_medium.remove('EX_alagln(e)')
# current_medium.remove('EX_alaglu(e)')
# current_medium.remove('EX_alagly(e)')
# current_medium.remove('EX_alahis(e)')
# current_medium.remove('EX_alaleu(e)')
# current_medium.remove('EX_alathr(e)')
current_medium.remove('EX_arab_L(e)')
# current_medium.remove('EX_arg_L(e)')
# current_medium.remove('EX_asn_L(e)')
current_medium.remove('EX_aso3(e)')
current_medium.remove('EX_aso4(e)')
# current_medium.remove('EX_asp_L(e)')
# current_medium.remove('EX_btn(e)')
current_medium.remove('EX_but(e)')
current_medium.remove('EX_butso3(e)')
# current_medium.remove('EX_ca2(e)')
current_medium.remove('EX_cbl1(e)')
current_medium.remove('EX_cbl2(e)')
current_medium.remove('EX_cd2(e)')
current_medium.remove('EX_cellb(e)')
# current_medium.remove('EX_cgly(e)')
current_medium.remove('EX_cit(e)')
# current_medium.remove('EX_cl(e)')
current_medium.remove('EX_co2(e)')
# current_medium.remove('EX_cobalt2(e)') ###
# current_medium.remove('EX_cps_fp(e)') ###
# current_medium.remove('EX_cu2(e)')
# current_medium.remove('EX_cys_L(e)')
current_medium.remove('EX_dhor_S(e)')
current_medium.remove('EX_ethso3(e)')
current_medium.remove('EX_fe2(e)')
# current_medium.remove('EX_fe3(e)')
current_medium.remove('EX_fe3dcit(e)')
# current_medium.remove('EX_fol(e)')
current_medium.remove('EX_for(e)')
current_medium.remove('EX_fru(e)')
current_medium.remove('EX_fum(e)')
current_medium.remove('EX_gal(e)')
current_medium.remove('EX_galur(e)')
# current_medium.remove('EX_gam(e)')
# current_medium.remove('EX_glc(e)')
# current_medium.remove('EX_glcur(e)')
# current_medium.remove('EX_gln_L(e)')
# current_medium.remove('EX_glu_L(e)')
# current_medium.remove('EX_gly(e)')
# current_medium.remove('EX_glyasn(e)')
# current_medium.remove('EX_glyasp(e)')
# current_medium.remove('EX_glyc3p(e)') ###
current_medium.remove('EX_glyc_R(e)')
# current_medium.remove('EX_glygln(e)')
# current_medium.remove('EX_glyglu(e)')
# current_medium.remove('EX_glygly(e)')
# current_medium.remove('EX_glyleu(e)')
# current_medium.remove('EX_glymet(e)')
# current_medium.remove('EX_glyphe(e)')
# current_medium.remove('EX_glypro(e)')
# current_medium.remove('EX_glytyr(e)')
current_medium.remove('EX_gthox(e)')
current_medium.remove('EX_gthrd(e)')
# current_medium.remove('EX_gua(e)')
# current_medium.remove('EX_h(e)')
# current_medium.remove('EX_h2o(e)')
current_medium.remove('EX_h2s(e)')
current_medium.remove('EX_hg2(e)')
current_medium.remove('EX_his_L(e)')
current_medium.remove('EX_hxan(e)')
current_medium.remove('EX_ile_L(e)')
current_medium.remove('EX_ins(e)')
current_medium.remove('EX_inulin(e)')
current_medium.remove('EX_isetac(e)')
# current_medium.remove('EX_k(e)')
current_medium.remove('EX_kesto(e)')
current_medium.remove('EX_kestopt(e)')
current_medium.remove('EX_kestottr(e)')
current_medium.remove('EX_lac_D(e)')
current_medium.remove('EX_lcts(e)')
# current_medium.remove('EX_leu_L(e)')
# current_medium.remove('EX_leugly(e)')
# current_medium.remove('EX_leuleu(e)')
# current_medium.remove('EX_lys_L(e)')
current_medium.remove('EX_mal_L(e)')
current_medium.remove('EX_malt(e)')
current_medium.remove('EX_man(e)')
current_medium.remove('EX_meoh(e)')
# current_medium.remove('EX_met_D(e)')
# current_medium.remove('EX_met_L(e)')
# current_medium.remove('EX_metala(e)')
# current_medium.remove('EX_metsox_R_L(e)')
# current_medium.remove('EX_metsox_S_L(e)')
# current_medium.remove('EX_mg2(e)')
current_medium.remove('EX_mobd(e)')
current_medium.remove('EX_mso3(e)')
# current_medium.remove('EX_na1(e)')
# current_medium.remove('EX_nac(e)') ###
current_medium.remove('EX_ncam(e)')
# current_medium.remove('EX_nh4(e)')
current_medium.remove('EX_o2(e)')
current_medium.remove('EX_orn(e)')
current_medium.remove('EX_orot(e)')
# current_medium.remove('EX_pb(e)')
# current_medium.remove('EX_pect(e)') ###
# current_medium.remove('EX_phe_L(e)')
# current_medium.remove('EX_pi(e)')
current_medium.remove('EX_plac(e)')
# current_medium.remove('EX_pnto_R(e)')
# current_medium.remove('EX_pro_L(e)')
# current_medium.remove('EX_progly(e)')
current_medium.remove('EX_ptrc(e)')
# current_medium.remove('EX_pydam(e)')
# current_medium.remove('EX_pydx(e)') ###
# current_medium.remove('EX_pydxn(e)')
# current_medium.remove('EX_rbflvrd(e)')
# current_medium.remove('EX_ribflv(e)')
current_medium.remove('EX_seln(e)')
# current_medium.remove('EX_ser_L(e)')
# current_medium.remove('EX_so4(e)') ###
current_medium.remove('EX_spmd(e)')
# current_medium.remove('EX_strch1(e)')
current_medium.remove('EX_succ(e)')
current_medium.remove('EX_sulfac(e)')
# current_medium.remove('EX_taur(e)')
# current_medium.remove('EX_thm(e)')
# current_medium.remove('EX_thr_L(e)')
current_medium.remove('EX_thymd(e)')
# current_medium.remove('EX_trp_L(e)')
# current_medium.remove('EX_tyr_L(e)')
current_medium.remove('EX_ura(e)')
current_medium.remove('EX_urate(e)')
current_medium.remove('EX_urea(e)')
# current_medium.remove('EX_val_L(e)')
current_medium.remove('EX_xan(e)')

In [7]:
# Get molar masses from pubchem
# This function takes some time to run, but it gets all of the molar masses for the metabolites in the media
# The result is a dataframe that we'll use to manipulate concentrations and eventually calculate flux lower bounds, but for now we
# need to deal with all of the molar masses that couldn't be found
weights = get_molecular_weights(root_model, current_medium)
weights.head()

Unnamed: 0,query,BiGG_ID,hit,mol_weight
0,L-cysteate,Lcyst(e),23619007,168.15
1,L-Cystine,Lcystin(e),67678,240.3
2,Adenine,ade(e),190,135.13
3,D-Alanine,ala_D(e),71080,89.09
4,L-Alanine,ala_L(e),5950,89.09


In [8]:
# Still missing molar masses:
weights[weights['mol_weight'] == 'NA']

Unnamed: 0,query,BiGG_ID,hit,mol_weight
21,"Capsular polysaccharide (F. prausnitzii, putat...",cps_fp(e),,
37,exchange reaction for glycylglyci,glygly(e),,
40,exchange reaction for glycylphenylalai,glyphe(e),,
48,exchange reaction for leucylglyci,leugly(e),,
61,exchange reaction for pectins,pect(e),,
74,"starch, structure 1 (1,6-{7[1,4-Glc], 4[1,4-Gl...",strch1(e),,


In [9]:
### MANUALLY fill these in
# I just did this by adding together the molar mass of each of the metabolites in the exchange reaction
weights.loc[weights['BiGG_ID'] == 'glygly(e)', 'mol_weight'] = 75.07*2
weights.loc[weights['BiGG_ID'] == 'glyphe(e)', 'mol_weight'] = 75.07+165.19
weights.loc[weights['BiGG_ID'] == 'leugly(e)', 'mol_weight'] = 75.07+131.17
weights.loc[weights['BiGG_ID'] == 'pect(e)', 'mol_weight'] = 194.14
weights.loc[weights['BiGG_ID'] == 'strch1(e)', 'mol_weight'] = 359.33
weights.loc[weights['BiGG_ID'] == 'cps_fp(e)', 'mol_weight'] = 1000

# There should be nothing left; everything now has a molar mass
weights[weights['mol_weight'] == 'NA']

Unnamed: 0,query,BiGG_ID,hit,mol_weight


In [10]:
# Here, we set up the dataframes for some of the main components of the media: peptones and yeast extracts
# For traces, each entry is named EXACTLY after a BiGG metabolite, so the concentration can be added to this metabolite exclusively
# 
# For amino acids (AAs), each entry is named after a substring corresponding to an AA(asp, arg, leu, etc.). 
# This is because we have no way of telling how much of each form of AA and how to differentiate their proportion in the media 
# (ie. how much L-cysteate do we have compared to L-cysteine? We really don't know)
# There is a function that does this for us, and is described in more detail in the cells below
#
# These tables are based on the oxoid data:
# http://www.oxoid.com/UK/blue/prod_detail/prod_detail.asp?pr=LP0037&c=UK&lang=EN&minfo=Y
# All values are in ppm
traces_list = [{'BiGG_ID':'ca2(e)', "conc_ppm_Bact_Peptone_L37": 635, "conc_ppm_Proteose_Peptone_L85": 200, "conc_ppm_soya_peptone_L44": 225, "conc_ppm_yeast_extract": 155},
{'BiGG_ID':'mg2(e)', "conc_ppm_Bact_Peptone_L37": 265, "conc_ppm_Proteose_Peptone_L85": 340, "conc_ppm_soya_peptone_L44": 1530, "conc_ppm_yeast_extract": 205},
{'BiGG_ID':'fe3(e)', "conc_ppm_Bact_Peptone_L37": 22, "conc_ppm_Proteose_Peptone_L85": 42, "conc_ppm_soya_peptone_L44": 90, "conc_ppm_yeast_extract": 52},
{'BiGG_ID':'cu2(e)', "conc_ppm_Bact_Peptone_L37": 1, "conc_ppm_Proteose_Peptone_L85": 10, "conc_ppm_soya_peptone_L44": 2, "conc_ppm_yeast_extract": 2},
{'BiGG_ID':'pb(e)', "conc_ppm_Bact_Peptone_L37": 0.4, "conc_ppm_Proteose_Peptone_L85": 0.1, "conc_ppm_soya_peptone_L44": 0.3, "conc_ppm_yeast_extract": 0.7},
{'BiGG_ID':'mn2(e)', "conc_ppm_Bact_Peptone_L37": 3.4, "conc_ppm_Proteose_Peptone_L85": 0.5, "conc_ppm_soya_peptone_L44": 1.0, "conc_ppm_yeast_extract": 1.3},
{'BiGG_ID':'zn2(e)', "conc_ppm_Bact_Peptone_L37": 9.2, "conc_ppm_Proteose_Peptone_L85": 16, "conc_ppm_soya_peptone_L44": 12, "conc_ppm_yeast_extract": 94},
{'BiGG_ID':'cobalt2(e)', "conc_ppm_Bact_Peptone_L37": 0.1, "conc_ppm_Proteose_Peptone_L85": 0.2, "conc_ppm_soya_peptone_L44": 0.2, "conc_ppm_yeast_extract": 3.1},
{'BiGG_ID':'cl(e)', "conc_ppm_Bact_Peptone_L37": 10000, "conc_ppm_Proteose_Peptone_L85": 80000, "conc_ppm_soya_peptone_L44": 4000, "conc_ppm_yeast_extract": 3000},
{'BiGG_ID':'k(e)', "conc_ppm_Bact_Peptone_L37": 36000, "conc_ppm_Proteose_Peptone_L85": 14000, "conc_ppm_soya_peptone_L44": 33000, "conc_ppm_yeast_extract": 73000},
{'BiGG_ID':'na1(e)', "conc_ppm_Bact_Peptone_L37": 10000, "conc_ppm_Proteose_Peptone_L85": 80000, "conc_ppm_soya_peptone_L44": 4000, "conc_ppm_yeast_extract": 3000}]


aa_list = [{'BiGG_ID':'ala', "conc_ppm_Bact_Peptone_L37": 39200 , "conc_ppm_Proteose_Peptone_L85": 38100, "conc_ppm_soya_peptone_L44": 28700, "conc_ppm_yeast_extract": 9100},
{'BiGG_ID':'arg', "conc_ppm_Bact_Peptone_L37": 49900, "conc_ppm_Proteose_Peptone_L85": 58000, "conc_ppm_soya_peptone_L44": 46400, "conc_ppm_yeast_extract": 33100},
{'BiGG_ID':'asp', "conc_ppm_Bact_Peptone_L37": 60600, "conc_ppm_Proteose_Peptone_L85": 58500, "conc_ppm_soya_peptone_L44": 70600, "conc_ppm_yeast_extract": 70700}, 
{'BiGG_ID':'cys', "conc_ppm_Bact_Peptone_L37": 16600, "conc_ppm_Proteose_Peptone_L85": 1500, "conc_ppm_soya_peptone_L44": 5300, "conc_ppm_yeast_extract": 7600},
{'BiGG_ID':'glu', "conc_ppm_Bact_Peptone_L37": 99300, "conc_ppm_Proteose_Peptone_L85": 137800, "conc_ppm_soya_peptone_L44": 147100, "conc_ppm_yeast_extract": 134900},
{'BiGG_ID':'gly', "conc_ppm_Bact_Peptone_L37": 77100 , "conc_ppm_Proteose_Peptone_L85": 44500, "conc_ppm_soya_peptone_L44": 28300, "conc_ppm_yeast_extract": 59500},
{'BiGG_ID':'ile', "conc_ppm_Bact_Peptone_L37": 38100, "conc_ppm_Proteose_Peptone_L85": 45800, "conc_ppm_soya_peptone_L44": 25100, "conc_ppm_yeast_extract": 48100},
{'BiGG_ID':'leu', "conc_ppm_Bact_Peptone_L37": 37900, "conc_ppm_Proteose_Peptone_L85": 60100, "conc_ppm_soya_peptone_L44": 43100, "conc_ppm_yeast_extract": 60400},
{'BiGG_ID':'lys', "conc_ppm_Bact_Peptone_L37": 43800, "conc_ppm_Proteose_Peptone_L85": 46100, "conc_ppm_soya_peptone_L44": 37700, "conc_ppm_yeast_extract": 54000},
{'BiGG_ID':'met', "conc_ppm_Bact_Peptone_L37": 15800, "conc_ppm_Proteose_Peptone_L85": 10800, "conc_ppm_soya_peptone_L44": 6200, "conc_ppm_yeast_extract": 8000},
{'BiGG_ID':'phe', "conc_ppm_Bact_Peptone_L37": 26000, "conc_ppm_Proteose_Peptone_L85": 46600, "conc_ppm_soya_peptone_L44": 3800, "conc_ppm_yeast_extract": 37800},
{'BiGG_ID':'pro', "conc_ppm_Bact_Peptone_L37": 58300, "conc_ppm_Proteose_Peptone_L85": 59900, "conc_ppm_soya_peptone_L44": 34000, "conc_ppm_yeast_extract": 8800},
{'BiGG_ID':'ser', "conc_ppm_Bact_Peptone_L37": 28100, "conc_ppm_Proteose_Peptone_L85": 21800, "conc_ppm_soya_peptone_L44": 6700, "conc_ppm_yeast_extract": 34200},
{'BiGG_ID':'thr', "conc_ppm_Bact_Peptone_L37": 12500, "conc_ppm_Proteose_Peptone_L85": 27500, "conc_ppm_soya_peptone_L44": 16800, "conc_ppm_yeast_extract": 27300},
{'BiGG_ID':'trp', "conc_ppm_Bact_Peptone_L37": 6600, "conc_ppm_Proteose_Peptone_L85": 7500, "conc_ppm_soya_peptone_L44": 6400, "conc_ppm_yeast_extract": 8500},
{'BiGG_ID':'tyr', "conc_ppm_Bact_Peptone_L37": 3900, "conc_ppm_Proteose_Peptone_L85": 17700, "conc_ppm_soya_peptone_L44": 20900, "conc_ppm_yeast_extract": 49500},
{'BiGG_ID':'val', "conc_ppm_Bact_Peptone_L37": 33300, "conc_ppm_Proteose_Peptone_L85": 41100, "conc_ppm_soya_peptone_L44": 36500, "conc_ppm_yeast_extract": 10000}]

#https://www.sigmaaldrich.com/catalog/product/sial/07533?lang=de&region=DE 
#Vitamins mg/100 g product conversion factor to ppm 1/10000
#Vitamin B6 consists of 3 components see also wikipedia and vitamin B3 2 components https://en.wikipedia.org/wiki/B_vitamins 

vitamins_list=[{'BiGG_ID':'thm(e)', "conc_ppm_Bact_Peptone_L37": 36/10000, "conc_ppm_Proteose_Peptone_L85": 36/10000, "conc_ppm_soya_peptone_L44": 36/10000, "conc_ppm_yeast_extract": 36/10000},
{'BiGG_ID':'ribflv(e)', "conc_ppm_Bact_Peptone_L37": 13/2/10000, "conc_ppm_Proteose_Peptone_L85": 13/2/10000, "conc_ppm_soya_peptone_L44": 13/2/10000, "conc_ppm_yeast_extract": 13/2/10000}, 
{'BiGG_ID':'rbflvrd(e)', "conc_ppm_Bact_Peptone_L37":13/2/10000, "conc_ppm_Proteose_Peptone_L85": 13/2/10000, "conc_ppm_soya_peptone_L44":13/2/10000, "conc_ppm_yeast_extract": 13/2/10000},
{'BiGG_ID':'nac(e)', "conc_ppm_Bact_Peptone_L37": 650/2/10000, "conc_ppm_Proteose_Peptone_L85": 650/2/10000, "conc_ppm_soya_peptone_L44": 650/2/10000, "conc_ppm_yeast_extract": 650/2/10000},
{'BiGG_ID':'ncam(e)', "conc_ppm_Bact_Peptone_L37": 650/2/10000 , "conc_ppm_Proteose_Peptone_L85": 650/2/10000, "conc_ppm_soya_peptone_L44": 650/2/10000, "conc_ppm_yeast_extract":650/2/10000},
{'BiGG_ID':'pydam(e)', "conc_ppm_Bact_Peptone_L37": 3.9/3/10000, "conc_ppm_Proteose_Peptone_L85": 3.9/3/10000, "conc_ppm_soya_peptone_L44": 3.9/3/10000, "conc_ppm_yeast_extract": 3.9/3/10000},
{'BiGG_ID':'pydx(e)', "conc_ppm_Bact_Peptone_L37": 3.9/3/10000+7/10000, "conc_ppm_Proteose_Peptone_L85": 3.9/3/10000+7/10000, "conc_ppm_soya_peptone_L44": 3.9/3/10000+7/10000, "conc_ppm_yeast_extract": 3.9/3/10000+7/10000},
{'BiGG_ID':'pydxn(e)', "conc_ppm_Bact_Peptone_L37": 3.9/3/10000, "conc_ppm_Proteose_Peptone_L85": 3.9/3/10000, "conc_ppm_soya_peptone_L44": 3.9/3/10000, "conc_ppm_yeast_extract": 3.9/3/10000},
{'BiGG_ID':'fol(e)', "conc_ppm_Bact_Peptone_L37": 2.8/10000, "conc_ppm_Proteose_Peptone_L85": 2.8/10000, "conc_ppm_soya_peptone_L44": 2.8/10000, "conc_ppm_yeast_extract": 2.8/10000},
{'BiGG_ID':'pnto_R(e)', "conc_ppm_Bact_Peptone_L37": 11.2/10000 , "conc_ppm_Proteose_Peptone_L85": 11.2/10000, "conc_ppm_soya_peptone_L44": 11.2/10000, "conc_ppm_yeast_extract": 11.2/10000},
{'BiGG_ID':'btn(e)', "conc_ppm_Bact_Peptone_L37": 2/10000, "conc_ppm_Proteose_Peptone_L85": 2/10000, "conc_ppm_soya_peptone_L44": 2/10000, "conc_ppm_yeast_extract": 2/10000}]

medium_trace = pd.DataFrame(traces_list+vitamins_list)
medium_aa = pd.DataFrame(aa_list)

In [11]:
# mGAM uses a "meat extract" so I had to add a df column here
meat_traces_df = pd.DataFrame([{'BiGG_ID':'ca2(e)', 'lab_lemco_meat_extract': 140},
{'BiGG_ID':'mg2(e)', 'lab_lemco_meat_extract': 140},
{'BiGG_ID':'fe(e)', 'lab_lemco_meat_extract': 20},
{'BiGG_ID':'cu2(e)', 'lab_lemco_meat_extract': 2},
{'BiGG_ID':'pb(e)', 'lab_lemco_meat_extract': 0.3},
{'BiGG_ID':'mn2(e)', 'lab_lemco_meat_extract': 0.4},
{'BiGG_ID':'zn2(e)', 'lab_lemco_meat_extract': 18},
{'BiGG_ID':'cobalt2(e)', 'lab_lemco_meat_extract': 0.5},
{'BiGG_ID':'cl(e)', 'lab_lemco_meat_extract': 5500},
{'BiGG_ID':'k(e)', 'lab_lemco_meat_extract': 19000},
{'BiGG_ID':'na1(e)', 'lab_lemco_meat_extract': 5500}])

meat_aa_df = pd.DataFrame([{'BiGG_ID':'ala', 'lab_lemco_meat_extract': 58500},
{'BiGG_ID':'arg', 'lab_lemco_meat_extract': 71000},
{'BiGG_ID':'asp', 'lab_lemco_meat_extract': 51000},
{'BiGG_ID':'cys', 'lab_lemco_meat_extract': 6800},
{'BiGG_ID':'glu', 'lab_lemco_meat_extract': 107100},
{'BiGG_ID':'gly', 'lab_lemco_meat_extract': 108500},
{'BiGG_ID':'leu', 'lab_lemco_meat_extract': 31500},
{'BiGG_ID':'ile', 'lab_lemco_meat_extract': 31700},
{'BiGG_ID':'lys', 'lab_lemco_meat_extract': 47800},
{'BiGG_ID':'met', 'lab_lemco_meat_extract': 26100},
{'BiGG_ID':'phe', 'lab_lemco_meat_extract': 23400},
{'BiGG_ID':'pro', 'lab_lemco_meat_extract': 77900},
{'BiGG_ID':'ser', 'lab_lemco_meat_extract': 18700},
{'BiGG_ID':'thr', 'lab_lemco_meat_extract': 25400},
{'BiGG_ID':'trp', 'lab_lemco_meat_extract': 3400},
{'BiGG_ID':'tyr', 'lab_lemco_meat_extract': 6600},
{'BiGG_ID':'val', 'lab_lemco_meat_extract': 30600}])

medium_trace = medium_trace.merge(meat_traces_df, how='left', on="BiGG_ID")
medium_aa = medium_aa.merge(meat_aa_df, how='left', on="BiGG_ID")


In [12]:
# This cell does a dataframe merge (essentially a SQL left join)
# It combines the columns from the dataframe that contains each of the metabolites in the medium defined earlier and
# the columns for each of the metabolite concentrations in each of the oxoid media ingredients
# Note: this only deals with the "traces" df; the amino acids will be dealt with in the next operation
weights_and_traces = weights.merge(medium_trace, how="left", on="BiGG_ID")
weights_and_traces[weights_and_traces['conc_ppm_Bact_Peptone_L37'] > 0]

Unnamed: 0,query,BiGG_ID,hit,mol_weight,conc_ppm_Bact_Peptone_L37,conc_ppm_Proteose_Peptone_L85,conc_ppm_soya_peptone_L44,conc_ppm_yeast_extract,lab_lemco_meat_extract
16,Biotin,btn(e),171548,244.31,0.0002,0.0002,0.0002,0.0002,
17,Calcium,ca2(e),5460341,40.08,635.0,200.0,225.0,155.0,140.0
19,Chloride,cl(e),312,35.45,10000.0,80000.0,4000.0,3000.0,5500.0
20,Co2+,cobalt2(e),104729,58.9332,0.1,0.2,0.2,3.1,0.5
22,Cu2+,cu2(e),27099,63.55,1.0,10.0,2.0,2.0,2.0
24,Fe3+,fe3(e),29936,55.84,22.0,42.0,90.0,52.0,
25,Folate,fol(e),135398658,441.4,0.00028,0.00028,0.00028,0.00028,
46,K+,k(e),813,39.098,36000.0,14000.0,33000.0,73000.0,19000.0
56,Mg,mg2(e),5462224,24.305,265.0,340.0,1530.0,205.0,140.0
57,Sodium,na1(e),5360545,22.9898,10000.0,80000.0,4000.0,3000.0,5500.0


In [13]:
# Returning to the problem outlined above when we were declaring the dictionaries full of ppm data above:
# How much L-cysteate do we have compared to L-cysteine?
# The solution to this is to take every metabolite containing this substring and assign it to an equal proportion of the given concentration in the media.
# 
# (ie. If the technical data says that there is 16600 ppm of cysteine in bacterial peptone, and we assume that our media contains L-cysteine and L-cysteate that # contain the "cys" substring, they will each get an equal portion of that 16600ppm: 8300ppm each. 
# This scales independent of the number of "cys" variants we assume to be in the media.
# So if there are 4 metabolites containing the "cys" substring, they will each be given 4150ppm from the 16600ppm, and so on.)
#
# The function "split_concentration_proportions()" does this for us. It's in the file "functions.py" and has some insight 
# into what the input and return values should be.
weights_full = split_concentration_proportions(weights_and_traces, medium_aa)
weights_full.head()

Unnamed: 0,query,BiGG_ID,hit,mol_weight,conc_ppm_Bact_Peptone_L37,conc_ppm_Proteose_Peptone_L85,conc_ppm_soya_peptone_L44,conc_ppm_yeast_extract,lab_lemco_meat_extract
0,L-cysteate,Lcyst(e),23619007,168.15,5533.333333,500.0,1766.666667,2533.333333,2266.666667
1,L-Cystine,Lcystin(e),67678,240.3,5533.333333,500.0,1766.666667,2533.333333,2266.666667
2,Adenine,ade(e),190,135.13,0.0,0.0,0.0,0.0,0.0
3,D-Alanine,ala_D(e),71080,89.09,3563.636364,3463.636364,2609.090909,827.272727,5318.181818
4,L-Alanine,ala_L(e),5950,89.09,3563.636364,3463.636364,2609.090909,827.272727,5318.181818


It's easier to have this table here for now, for the cell below

| Component                      | Concentration (g/L) |
|--------------------------------|---------------------|
| Bacteriological Peptome        | 5.0                 |
| Soy Peptome                    | 3.0                 |
| Proteose Peptome               | 5.0                 |
| ~~Digested Serum~~             | ~~10~~              |
| Yeast Extract                  | 2.5                 |
| Meat extract                   | 2.2                 |
| Liver Extract                  | 1.2                 |
| Dextrose                       | 0.5                 |
| L-tryptophan                   | 0.2                 |
| l-cysteine hydrochloride       | 0.3                 |
| L-argenine                     | 1                   |
| Sodium thioglycolate           | 0.3                 |
| L-argenine                     | 1                   |
| Vitamin K1                     | 0.005               |
| Hemin                          | 0.010               |
| Potassium dihydrogen phosphate | 2.5                 |
| NaCl                           | 3                   |

In [14]:
# Assign proportions based on mmcb medium concentrations:
# This is pretty subjective and makes a lot of assumptions about dissociation reactions of these medium components
# (ie. what happens to Cysteine-HCL in the medium? The model doesn't use Cysteine-HCL in any reaction, but it's added intentionally, so it must be a source
# of some variant of cysteine)

# Assign proportions based on mgam medium concentrations:
peptone_columns = {"conc_ppm_Bact_Peptone_L37": 5000, "conc_ppm_Proteose_Peptone_L85": 5000, "conc_ppm_soya_peptone_L44": 3000, "conc_ppm_yeast_extract": 2500, 'lab_lemco_meat_extract': 2200}
update = {'glc': 500, 'cys': 500}
supplements = {'na1(e)': 1500, 'cl(e)':1500, 'trp_L(e)': 200, 'cys_L(e)': 300, 'arg_L(e)': 1000, 'h2o(e)': 100000, 'h(e)': 660, 'so4(e)': 330, 'strch1(e)': 5000, 'pi(e)': 330}

# Scale by peptone columns ~~~
# peptone_columns: each of the components from the Oxoid table. For example, there are 6.5g/L of bacterial peptone in the media, which we've input as 6500 (ppm). 
# We'll use this to scale each of the corresponding columns to this g/L proportion:
total_concentration = sum([sum(i.values()) for i in [peptone_columns, update, supplements]])
for i in peptone_columns.keys():
    scale_value = peptone_columns[i]/total_concentration
    weights_full[i] = weights_full[i]*scale_value

# Update ambiguous supplement metabolites ~~~
# For the rest of the medium components, their concentrations will be added to a new column in the dataframe called "supplements"
# Using the "update" dictionary, we'll search for the key (ie. 'trp', 'cys') that is present in our defined media
# Collecting all of the metabolites corresponding to these substrings, each individual metabolite will be given an extra amount in the "supplements" column
#
# (Similar to before: if L-cysteine and L-cysteate are the two metabolites corresponding to the "cys" substring and there's a 400ppm source of "cys", each 
# metabolite will be assigned 200 (ppm) in the "supplements" column)
weights_full['supplements'] = 0
for i in update.keys():
    aa_list = weights_full.loc[weights_full["BiGG_ID"].str.contains(i), "BiGG_ID"]
    aa_count = len(aa_list)
    proportion = update[i]/aa_count
    for aa in aa_list:
        weights_full.loc[weights_full["BiGG_ID"] == aa, "supplements"] = proportion

# Add supplements ~~~
# Finally, directly add the concentrations of metabolites to the "supplements" column 
for i in supplements.keys():
    weights_full.loc[weights_full["BiGG_ID"] == i, "supplements"] = supplements[i]

In [15]:
# There are still some 0's left in the medium, meaning that there are still some metabolites used in the reaction that don't have a concentration defined in
# the medium from the literature (that I've been able to figure out) so this is where I've taken the mean value for each of the concentration columns
# and used that to fill in any 0's in that column
# This is a huge, baseless assumption; I think we might be better off manually determining which metabolites that have a concentration are similar to metabolites # that don't have a concentration.

# Note: I don't think this would be a huge problem normally, but there are some metabolites here that are necessary for growth 
# according to the "mandatory" metabolites from earlier
print(len(weights_full[weights_full.iloc[:,4:10].sum(axis=1) == 0]))
display(weights_full[weights_full.iloc[:,4:10].sum(axis=1) == 0])

undef_list = weights_full.loc[weights_full.iloc[:,4:10].sum(axis=1) == 0, "BiGG_ID"]
fill_value = {}
# Mean averaging for each of the columns:
for media in weights_full.columns[4:]:
    fill_value[media] = weights_full[media].mean()

for met in undef_list:
    for media in fill_value.keys():
        weights_full.loc[weights_full['BiGG_ID'] == met, media] = fill_value[media]

10


Unnamed: 0,query,BiGG_ID,hit,mol_weight,conc_ppm_Bact_Peptone_L37,conc_ppm_Proteose_Peptone_L85,conc_ppm_soya_peptone_L44,conc_ppm_yeast_extract,lab_lemco_meat_extract,supplements
2,Adenine,ade(e),190.0,135.13,0.0,0.0,0.0,0.0,0.0,0.0
14,L-Asparagine,asn_L(e),6267.0,132.12,0.0,0.0,0.0,0.0,0.0,0.0
21,"Capsular polysaccharide (F. prausnitzii, putat...",cps_fp(e),,1000.0,0.0,0.0,0.0,0.0,0.0,0.0
26,D-Glucosamine,gam(e),439213.0,179.17,0.0,0.0,0.0,0.0,0.0,0.0
29,L-Glutamine,gln_L(e),5961.0,146.14,0.0,0.0,0.0,0.0,0.0,0.0
43,Guanine,gua(e),135398634.0,151.13,0.0,0.0,0.0,0.0,0.0,0.0
59,Ammonia,nh4(e),222.0,17.031,0.0,0.0,0.0,0.0,0.0,0.0
61,exchange reaction for pectins,pect(e),,194.14,0.0,0.0,0.0,0.0,0.0,0.0
75,Taurine,taur(e),1123.0,125.15,0.0,0.0,0.0,0.0,0.0,0.0
81,D-Xylose,xyl_D(e),135191.0,150.13,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
# Making calculations to eventually get to flux:
weights_full['g/L'] = weights_full.iloc[:,4:10].sum(axis=1)/1000 # ppm/1000 = g/L (roughly)
weights_full['mmol/mL'] = weights_full['g/L']/weights_full['mol_weight'] # (g/L)/(molar mass) = mol/L == mmol/mL
weights_full.head()

Unnamed: 0,query,BiGG_ID,hit,mol_weight,conc_ppm_Bact_Peptone_L37,conc_ppm_Proteose_Peptone_L85,conc_ppm_soya_peptone_L44,conc_ppm_yeast_extract,lab_lemco_meat_extract,supplements,g/L,mmol/mL
0,L-cysteate,Lcyst(e),23619007,168.15,213.609224,19.302038,40.920321,48.898497,38.501132,166.666667,0.527898,0.003139
1,L-Cystine,Lcystin(e),67678,240.3,213.609224,19.302038,40.920321,48.898497,38.501132,166.666667,0.527898,0.002197
2,Adenine,ade(e),190,135.13,313.457353,401.149243,164.212573,163.082884,150.092853,1361.626016,2.553621,0.018898
3,D-Alanine,ala_D(e),71080,89.09,137.570891,133.710483,60.432927,15.96805,90.333539,0.0,0.438016,0.004917
4,L-Alanine,ala_L(e),5950,89.09,137.570891,133.710483,60.432927,15.96805,90.333539,0.0,0.438016,0.004917


In [29]:
# Cells per mL of medium: cells_per_ml = 1640992
# file: FP_14.xlsx, sheet: Determination of Cell Count, excel cell: P4
# updated: FP_14.xlsx, Determination of Cell Count, excell cell: P11
cells_per_ml = 1001930111

time = 80
# gDw: based on EColi gDw
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC106103/
# 1172 fg EColi ~= 1172E-15 g
gdw_per_cell = 1172E-15

# Calculating flux per metabolite in the medium based on the reference equation (flux = mmol/gDw)
weights_full['flux'] = weights_full['mmol/mL']/(cells_per_ml * gdw_per_cell * time)


In [30]:
# FINALLY
# Use the "flux" column from the "weights_full" df to set the lower bound for each of the metabolites in the model 
print("mGAM")
for met in root_model.exchanges:
    if 'ac' not in met.id:
        root_model.reactions.get_by_id(met.id).lower_bound = -0.001
    else:
        root_model.reactions.get_by_id(met.id).lower_bound = 0

for id in ["EX_"+metab for metab in list(weights_full['BiGG_ID'])]:
    root_model.reactions.get_by_id(id).upper_bound = 1000
    root_model.reactions.get_by_id(id).lower_bound = weights_full.loc[weights_full["BiGG_ID"] == id.lstrip("EX_"), "flux"].values[0]*-1
root_model.objective = {root_model.reactions.get_by_id('Biomass_FP'): 1}
root_model.optimize()

mGAM


Unnamed: 0,fluxes,reduced_costs
26DAPLLATi,7.901546e-08,0.000000e+00
3HAD100,1.918998e-07,-1.232595e-32
3HAD120,1.143870e-07,-1.232595e-32
3HAD121,7.751277e-08,0.000000e+00
3HAD140,1.091193e-07,0.000000e+00
...,...,...
XYLt2,0.000000e+00,0.000000e+00
YUMPS,0.000000e+00,0.000000e+00
r0502,0.000000e+00,0.000000e+00
r0839,0.000000e+00,0.000000e+00


In [31]:
root_model.slim_optimize()

1.8547097324320782e-07

In [20]:
# Model outputs
# root_model.summary().to_frame().to_csv('mgam_model_summary.csv')

In [21]:
# Model inputs
# weights_full[['BiGG_ID','flux']].to_csv('mgam.csv')

In [28]:
weights_full[['BiGG_ID','flux']]

Unnamed: 0,BiGG_ID,flux
0,Lcyst(e),0.033419
1,Lcystin(e),0.023385
2,ade(e),0.201164
3,ala_D(e),0.215032
4,ala_L(e),0.215032
5,alaala(e),0.119605
6,alaasp(e),0.242113
7,alagln(e),0.088192
8,alaglu(e),0.363958
9,alagly(e),0.169061


In [25]:
# Post-mortem
# Each metabolite in the model, its flux, and how much growth decreases when it's removed from the medium (one of the first steps)
# Sorted by growth (descending), we can take a look at the "most important" metabolites and what their fluxes were in the medium
# weights_full[['query','BiGG_ID','flux']].merge(growth_check_df, how="left", left_on="BiGG_ID", right_on="metab").sort_values(by="BiGG_ID")

Unnamed: 0,query,BiGG_ID,flux,metab,growth
0,L-cysteate,Lcyst(e),0.033419,Lcyst(e),11.03796
1,L-Cystine,Lcystin(e),0.023385,Lcystin(e),11.03796
2,Adenine,ade(e),0.201164,ade(e),11.03796
3,D-Alanine,ala_D(e),0.215032,ala_D(e),11.03796
4,L-Alanine,ala_L(e),0.215032,ala_L(e),11.03796
5,D-Alanyl-D-alanine,alaala(e),0.119605,alaala(e),11.03796
6,L-alanyl-L-aspartate,alaasp(e),0.242113,alaasp(e),11.03796
7,L-alanyl-L-glutamine,alagln(e),0.088192,alagln(e),11.03796
8,L-alanyl-L-glutamate,alaglu(e),0.363958,alaglu(e),11.03796
9,L-alanylglycine,alagly(e),0.169061,alagly(e),11.03796


In [26]:
# Manually extracted outputs
csv_list = []
for rxn in root_model.exchanges:
    csv_dict = {}
    csv_dict["metabolite"] = rxn.id
    csv_dict["lower_bound"] = rxn.lower_bound
    csv_dict["upper_bound"] = rxn.upper_bound
    csv_dict["flux"] = rxn.flux
    csv_list.append(csv_dict)

pd.DataFrame(csv_list).to_csv("mgam_file_final.csv")

Unnamed: 0,reaction,metabolite,factor,flux
DM_4HBA,DM_4HBA,4hba[c],-1.0,0.0
DM_AACALD,DM_AACALD,aacald[c],-1.0,0.0
DM_HMFURN,DM_HMFURN,hmfurn[c],-1.0,0.0
DM_HQN,DM_HQN,hqn[c],-1.0,0.0
DM_NA1,DM_NA1,na1[c],-1.0,-0.444203
DM_SELNP,DM_SELNP,selnp[c],-1.0,0.0
EX_3mop(e),EX_3mop(e),3mop[e],-1.0,0.0
EX_4abz(e),EX_4abz(e),4abz[e],-1.0,0.0
EX_5oxpro(e),EX_5oxpro(e),5oxpro[e],-1.0,0.0
EX_Lcyst(e),EX_Lcyst(e),Lcyst[e],-1.0,0.0


In [53]:
root_model.summary().to_frame().merge(weights_full[['BiGG_ID','flux']], how="left", on="flux").dropna().to_csv("rate_limiting_mgam.csv")
root_model.summary().to_frame().merge(weights_full[['BiGG_ID','flux']], how="left", on="flux").dropna()

Unnamed: 0,reaction,metabolite,factor,flux,BiGG_ID
63,EX_gam(e),gam[e],-1.0,0.151718,gam(e)
64,EX_glc(e),glc-D[e],-1.0,0.085202,glc(e)
65,EX_glcur(e),glcur[e],-1.0,0.079067,glcur(e)
71,EX_glyc3p(e),glyc3p[e],-1.0,0.116487,glyc3p(e)
136,EX_ribflv(e),ribflv[e],-1.0,0.042753,ribflv(e)
