# Biomass composition ➟  biomass reaction
The purpose of this notebook is to convert biomass composition measurements into biomass equations for various R. opacus strains growing on various substrates

In [1]:
import cobra
import pandas as pd

### Get biomass composition data (.csv import is a temporary. In the future this data will be from EDD)

In [2]:
biomass_comp_filename = '../../EDD_Ropacus_Multiomics/EDD_Ropacus_Multiomics_BiomassComp.csv'
biomass_comp_df = pd.read_csv(biomass_comp_filename)
cols_to_drop = [col for col in biomass_comp_df.columns if 'Unnamed' in col]
biomass_comp_df.drop(cols_to_drop, inplace=True, axis=1)
biomass_comp_df

Unnamed: 0,Line Name,Measurement Type,Time (hrs),Value,Units
0,RoWT_PHE-R2,Lipid Content,25.5,14.08,%
1,RoWT_PHE-R2,Carbohydrate Content,25.5,17.21,%
2,RoWT_PHE-R2,Protein Content,25.5,38.84,%
3,RoP1_L-R1,Lipid Content,25.5,16.44,%
4,RoP1_L-R1,Carbohydrate Content,25.5,12.47,%
5,RoP1_L-R1,Protein Content,25.5,39.43,%
6,RoP1_H-R3,Lipid Content,46.5,14.94,%
7,RoP1_H-R3,Carbohydrate Content,46.5,13.68,%
8,RoP1_H-R3,Protein Content,46.5,45.23,%
9,RoWT_PV-R2,Lipid Content,27.0,15.28,%


### Break out individual conditions
define a function to take in a condition name and return a dictionary of macromolecule compositions

In [3]:
def composition_name_to_dictionary(name, biomass_df):
    biomass_comp_df = biomass_df[biomass_df['Line Name'] == name]
    biomass_dictionary = {}
    for _, row in biomass_comp_df.iterrows():
        # The 10 times multiplier makes the units mg macromolecule/g dry cell weight
        biomass_dictionary[row['Measurement Type']] = row['Value']*10 
    return biomass_dictionary

In [4]:
composition_name_to_dictionary('RoWT_PHE-R2', biomass_comp_df)

{'Lipid Content': 140.8,
 'Carbohydrate Content': 172.10000000000002,
 'Protein Content': 388.40000000000003}

In [5]:
composition_name_to_dictionary('RoP1_L-R1', biomass_comp_df)

{'Lipid Content': 164.4,
 'Carbohydrate Content': 124.7,
 'Protein Content': 394.3}

In [6]:
composition_name_to_dictionary('RoP1_H-R3', biomass_comp_df)

{'Lipid Content': 149.4,
 'Carbohydrate Content': 136.8,
 'Protein Content': 452.29999999999995}

### Pull up original biomass equation

In [7]:
model = cobra.io.read_sbml_model("../GSMs/Ropacus_annotated_curated.xml")

Split into biomass reactions into substrates and products

In [8]:
original_biomass_reaction = model.reactions.get_by_id('Growth')
substrate_str = original_biomass_reaction.reaction.split('-->')[0]
product_str = original_biomass_reaction.reaction.split('-->')[1]

Print the biomass equation's metabolites and reaction coefficients

In [9]:
substrate_list = substrate_str.split('+')
product_list = product_str.split('+')

substrate_dict = {item.strip().split(' ')[1] : item.strip().split(' ')[0] for item in substrate_list}
product_dict = {item.strip().split(' ')[1] : item.strip().split(' ')[0] for item in product_list}

print('substrates')
placeholder = [print(f'{float(substrate_dict[met]):.6f}', model.metabolites.get_by_id(met).name) for met in substrate_dict]
print()
print('products')
placeholder = [print(f'{float(product_dict[met]):.6f}', model.metabolites.get_by_id(met).name) for met in product_dict]

substrates
0.000216 10-Formyltetrahydrofolate
0.498716 L-Alanine
0.000216 S-Adenosyl-L-methionine
0.287170 L-Arginine
0.234029 L-Asparagine
0.234029 L-Aspartate
52.547151 ATP
0.005053 Calcium
0.005053 Chloride
0.000559 Coenzyme A
0.000097 Co2+
0.129616 CTP
0.000688 Cu2+
0.088911 L-Cysteine
0.025403 DATP
0.026229 DCTP
0.026229 DGTP
0.025403 DTTP
0.000216 Flavin adenine dinucleotide oxidized
0.006519 Fe2+
0.007580 Fe3+
0.255489 L-Glutamine
0.255489 L-Glutamate
0.594780 Glycine
0.000971 Glycerol teichoic acid  n45   unlinked  unsubstituted C151H297N2O238P46
0.000971 Glycerol teichoic acid  n45   unlinked  D ala substituted C286H612N47O328P46
0.000971 Glycerol teichoic acid  n45   unlinked  glucose substituted C421H749N2O464P46
0.208826 GTP
47.184845 H2O
0.091976 L-Histidine
0.282060 L-Isoleucine
0.189503 Potassium
0.437399 L-Leucine
0.000049 Lipoteichoic acid  n24   linked  glucose substituted C26236H47072O25500P2400
0.000049 Lipoteichoic acid  n24   linked  N acetyl D glucosamine C31036H

Define the types of each biomass reaction metabolite. These will be used to quantify the amount of dry cell weight each group takes up.

In [10]:
substrate_type_dict = {
    '10fthf_c' : 'carbon carrier',
    'ala__L_c': 'protein',
    'amet_c': 'carbon carrier',
    'arg__L_c': 'protein',
    'asn__L_c': 'protein',
    'asp__L_c': 'protein',
    'atp_c': 'energy molecule',
    'ca2_c': 'salt',
    'cl_c': 'salt',
    'coa_c': 'carbon carrier',
    'cobalt2_c': 'salt',
    'ctp_c': 'energy molecule',
    'cu2_c': 'salt',
    'cys__L_c': 'protein',
    'datp_c': 'nucleic acid',
    'dctp_c': 'nucleic acid',
    'dgtp_c': 'nucleic acid',
    'dttp_c': 'nucleic acid',
    'fad_c': 'energy molecule',
    'fe2_c': 'salt',
    'fe3_c': 'salt',
    'gln__L_c': 'protein',
    'glu__L_c': 'protein',
    'gly_c': 'protein',
    'gtca1_45_BS_c': 'lipid',
    'gtca2_45_BS_c': 'lipid',
    'gtca3_45_BS_c': 'lipid',
    'gtp_c': 'energy molecule',
    'h2o_c': 'water',
    'his__L_c': 'protein',
    'ile__L_c': 'protein',
    'k_c': 'salt',
    'leu__L_c': 'protein',
    'lipo1_24_BS_c': 'lipid',
    'lipo2_24_BS_c': 'lipid',
    'lipo3_24_BS_c': 'lipid',
    'lipo4_24_BS_c': 'lipid',
    'lys__L_c': 'protein',
    'met__L_c': 'protein',
    'mg2_c': 'salt',
    'mlthf_c': 'carbon carrier',
    'mn2_c': 'salt',
    'mql8_c': 'energy molecule',
    'nad_c': 'energy molecule',
    'nadp_c': 'energy molecule',
    'peptido_BS_c': 'carbohydrate',
    'phe__L_c': 'protein',
    'pro__L_c': 'protein',
    'pydx5p_c': 'energy molecule',
    'ribflv_c': 'energy molecule',
    'ser__L_c': 'protein',
    'so4_c': 'salt',
    'thf_c': 'carbon carrier',
    'thmpp_c': 'energy molecule',
    'thr__L_c': 'protein',
    'trp__L_c': 'protein',
    'tyr__L_c': 'protein',
    'utp_c': 'energy molecule',
    'val__L_c': 'protein',
    'zn2_c': 'salt',
}
set(substrate_type_dict.values())

{'carbohydrate',
 'carbon carrier',
 'energy molecule',
 'lipid',
 'nucleic acid',
 'protein',
 'salt',
 'water'}

Define a dictionary that has the molar mass of each metablite in g/mol or mg/mmol (equivalent units). These will be used to quantify the amount of dry cell weight each metabolite makes up.

In [11]:
element_masses = {
    'H': 1.008,
    'C': 12.011,
    'N': 14.007,
    'O': 15.999,
    'P': 30.974,
    'S': 32.06,
    'Cl': 35.45,
    'K': 39.098,
    'Ca': 40.078,
    'Cu': 63.546,
    'Mn': 54.938,
    'Fe': 55.845,
    'Co': 58.933,
    'Mg': 24.305,
    'Zn': 65.38
}

substrate_masses = {}

for met in substrate_dict:
    element_dict = model.metabolites.get_by_id(met).elements
    molar_mass = 0
    for element in element_dict:
        molar_mass += element_masses[element] * element_dict[element]
    substrate_masses[met] = molar_mass    
substrate_masses

{'10fthf_c': 471.42999999999995,
 'ala__L_c': 89.094,
 'amet_c': 399.44599999999997,
 'arg__L_c': 175.212,
 'asn__L_c': 132.119,
 'asp__L_c': 132.095,
 'atp_c': 503.15,
 'ca2_c': 40.078,
 'cl_c': 35.45,
 'coa_c': 763.502,
 'cobalt2_c': 58.933,
 'ctp_c': 479.124,
 'cu2_c': 63.546,
 'cys__L_c': 121.154,
 'datp_c': 487.15099999999995,
 'dctp_c': 463.125,
 'dgtp_c': 503.15,
 'dttp_c': 478.1360000000001,
 'fad_c': 783.5409999999999,
 'fe2_c': 55.845,
 'fe3_c': 55.845,
 'gln__L_c': 146.146,
 'glu__L_c': 146.122,
 'gly_c': 75.067,
 'gtca1_45_BS_c': 7373.617,
 'gtca2_45_BS_c': 11382.847,
 'gtca3_45_BS_c': 14687.976999999999,
 'gtp_c': 519.149,
 'h2o_c': 18.015,
 'his__L_c': 155.157,
 'ile__L_c': 131.175,
 'k_c': 39.098,
 'leu__L_c': 131.175,
 'lipo1_24_BS_c': 844881.272,
 'lipo2_24_BS_c': 943408.472,
 'lipo3_24_BS_c': 669568.472,
 'lipo4_24_BS_c': 455742.872,
 'lys__L_c': 147.198,
 'met__L_c': 149.20800000000003,
 'mg2_c': 24.305,
 'mlthf_c': 455.4309999999999,
 'mn2_c': 54.938,
 'mql8_c': 719

### Calculate the mass that each group makes up in the Default biomass equation

In [12]:
biomass_types = set(substrate_type_dict.values())

total_mass = 0
for biomass_type in biomass_types:
    
    mass = 0
    for met in substrate_dict:
        if substrate_type_dict[met] == biomass_type:
            mass += float(substrate_dict[met]) * substrate_masses[met]
        if substrate_type_dict[met] not in ['energy molecule', 'salt', 'water', 'carbon carrier']:
            total_mass += float(substrate_dict[met]) * substrate_masses[met]
        
    print(biomass_type, mass)
# print()
# print(total_mass)

salt 9.295469045918262
carbohydrate 9.620852655189088
carbon carrier 0.8101036482552552
water 850.0349822009696
energy molecule 26678.746144703928
nucleic acid 49.86635926461023
protein 656.0539383052892
lipid 173.90320979912588


In [13]:
for met in substrate_dict:
    if substrate_type_dict[met] == 'protein':
        print(float(substrate_dict[met]), substrate_masses[met], float(substrate_dict[met]) * substrate_masses[met])
#         mass += float(substrate_dict[met]) * substrate_masses[met]
# print(biomass_type, mass)

0.498715520392769 89.094 44.43256057387336
0.287169982631549 175.212 50.315626996838965
0.234028507069996 132.119 30.919612325580804
0.234028507069996 132.095 30.913995641411123
0.088910541899028 121.154 10.771867793234838
0.255489170191616 146.146 37.338720266823906
0.255489170191616 146.122 37.332588526739315
0.594780263899724 75.067 44.64837007016058
0.0919764896094138 155.157 14.270796198327819
0.282060393397933 131.175 36.999272103973865
0.437398507887213 131.175 57.375749272105175
0.333158227436256 147.198 49.04022476216201
0.149206219068509 149.20800000000003 22.262761534774096
0.179864725321287 165.19199999999998 29.71221370527404
0.214611485471606 115.132 24.70864954531694
0.209501896237989 105.09299999999999 22.017182781338978
0.246292297911539 119.12 29.33833852722253
0.0551860879358643 204.22899999999998 11.270599553053628
0.13387648051658 181.19099999999997 24.25721338127964
0.410827284680896 117.14800000000001 48.12759474579761
