# Biomass composition ➟  biomass reaction
This notebook converts biomass composition measurements into biomass equations for various R. opacus strains growing on phenol and glucose.

### Method
<ol>
<li>Load biomass compositions for different strains and conditions</li>
<li>Calculate the macromolecule breakdown for the bacillus subtilis growth reaction built into CarveMe</li>
<li>For each macromolecule, calculate the ratio of R.opacus concentration : Bacillus subtilis concentration</li>
<li>Define phenol and glucose R.opacus biomass reactions by finding the coeffient for each precursor by multipling the B. subtilis coefficient by the macromolecule scaling factor</li>
</ol>

### Load dependencies

In [1]:
import cobra
import pandas as pd

### Get biomass composition data (.csv import is a temporary. In the future this data will be from EDD)

In [2]:
biomass_comp_filename = '../../EDD_Ropacus_Multiomics/EDD_Ropacus_Multiomics_BiomassComp.csv'
biomass_comp_df = pd.read_csv(biomass_comp_filename)

# remove blank columns
cols_to_drop = [col for col in biomass_comp_df.columns if 'Unnamed' in col]
biomass_comp_df.drop(cols_to_drop, inplace=True, axis=1)

biomass_comp_df.head(10)
# biomass_comp_df[(biomass_comp_df['Line Name']  == 'RoWT_PHE-R2') | (biomass_comp_df['Line Name'] == 'RoWT_GLC-R2')]

Unnamed: 0,Line Name,Measurement Type,Time (hrs),Value,Units
0,RoWT_PHE-R2,Lipid Content,25.5,14.08,%
1,RoWT_PHE-R2,Carbohydrate Content,25.5,17.21,%
2,RoWT_PHE-R2,Protein Content,25.5,38.84,%
3,RoP1_L-R1,Lipid Content,25.5,16.44,%
4,RoP1_L-R1,Carbohydrate Content,25.5,12.47,%
5,RoP1_L-R1,Protein Content,25.5,39.43,%
6,RoP1_H-R3,Lipid Content,46.5,14.94,%
7,RoP1_H-R3,Carbohydrate Content,46.5,13.68,%
8,RoP1_H-R3,Protein Content,46.5,45.23,%
9,RoWT_PV-R2,Lipid Content,27.0,15.28,%


### Load model and pull up original biomass equation (from Bacillus subtilis)

In [3]:
model = cobra.io.read_sbml_model("../GSMs/ropacus_model_A.xml")
model

0,1
Name,ropacus_model_A
Memory address,0x07fc03cd4e6d0
Number of metabolites,1587
Number of reactions,2390
Number of groups,0
Objective expression,1.0*Growth - 1.0*Growth_reverse_699ae
Compartments,"cytosol, periplasm, extracellular space"


### Load subtilis biomass information

In [4]:
bacillis_df = pd.read_csv('../biomass_equation_data/subtilis_biomass_data.csv')
bacillis_df.head()

Unnamed: 0,name,abbreviation,category,coefficient,molecular_weight
0,10-Formyltetrahydrofolate,10fthf_c,carbon carrier,-0.000216,471.43
1,L-Alanine,ala__L_c,protein,-0.498716,89.094
2,S-Adenosyl-L-methionine,amet_c,carbon carrier,-0.000216,399.446
3,L-Arginine,arg__L_c,protein,-0.28717,175.212
4,L-Asparagine,asn__L_c,protein,-0.234029,132.119


### Define function to get the total mass of a single macromolecule type per g biomass

In [5]:
def get_total_macro_mass(biomass_df, macromolecule):
    
    # isolate just reactants
    biomass_df = biomass_df[biomass_df.coefficient < 0]
    
    # isolate just metabolites from that macromolecule type
    macro_df = biomass_df[biomass_df.category == macromolecule]
    
    # return the sum of the coefficients and molecular_weights
    # the -1 is needed since the coffients of reactants are negative
    return sum([-1*row.coefficient * row.molecular_weight for _, row in macro_df.iterrows()])

### Calculate the macromolecule distribution in the Subtilis biomass equation

In [6]:
# define macro categories
macros = ['protein', 'lipid', 'carbohydrate']

# fill a dictionary with bacillis macros
bacillis_macros = {macro:get_total_macro_mass(bacillis_df, macro) for macro in macros}

print(bacillis_macros)

{'protein': 655.9755072570001, 'lipid': 175.241005523, 'carbohydrate': 9.621337439000001}


### Define function to take in a strain and measurement type and returns a value

In [7]:
def strain_measurement_value(strain, macro):
    
    # modify macro string to match measurement dataframe
    measurement_type = macro.capitalize() + ' Content'
    
    # filter dataframe to isolate a single strain and measurement type
    value = biomass_comp_df[(biomass_comp_df['Line Name']  == strain) & (biomass_comp_df['Measurement Type'] == measurement_type)]['Value']
    
    # convert string value to float 
    # return value multiplied by 10 to convert from percent to mg macro/g biomass
    return 10*float(value)

### Get phenol and glucose macro dictionary

In [8]:
phenol_macros = {macro:strain_measurement_value('RoWT_PHE-R2', macro) for macro in macros}
print('phenol', phenol_macros)

glucose_macros = {macro:strain_measurement_value('RoWT_GLC-R2', macro) for macro in macros}
print('glucose', glucose_macros)

phenol {'protein': 388.40000000000003, 'lipid': 140.8, 'carbohydrate': 172.10000000000002}
glucose {'protein': 240.39999999999998, 'lipid': 402.59999999999997, 'carbohydrate': 146.4}


### Define a function to get biomass equation from original and new macromolecule dictionaries

In [9]:
def get_new_biomass_metabolites(old_biomass_df, old_macros, new_macros):
    
    # get the multiplier needed to scale each macro
    macro_multipliers = {macro:(new_macros[macro]/old_macros[macro]) for macro in old_macros.keys()}
    
    # make metabolite dictionary by looping over old dataframe
    metabolite_dictionary = {}
    for _, row in old_biomass_df.iterrows():
        
        # convert the metabolite id into a metabolite object
        metabolite = model.metabolites.get_by_id(row.abbreviation)
        
        # if the metabolite is one of the primary three macros then scale its coefficient
        if row.category in ['protein', 'lipid', 'carbohydrate']:
            metabolite_dictionary[metabolite] = macro_multipliers[row.category]*row.coefficient
            
        # if not use original coefficient
        else:
            metabolite_dictionary[metabolite] = row.coefficient
            
    return metabolite_dictionary

### Make phenol biomass reaction and add to model

In [10]:
phenol_growth_equation = cobra.Reaction()

# add metadata
phenol_growth_equation.name = 'Phenol biomass reaction'
phenol_growth_equation.id = 'Growth_Phenol'

# get metabolite dictionary for phenol
metabolite_dictionary = get_new_biomass_metabolites(bacillis_df, bacillis_macros, phenol_macros)

# add metabolites to the reaction
phenol_growth_equation.add_metabolites(metabolite_dictionary)

# add reaction to the model
model.add_reactions([phenol_growth_equation])

# print the reaction
model.reactions.get_by_id('Growth_Phenol').reaction

'0.00021600000000000002 10fthf_c + 0.2952873884117614 ala__L_c + 0.00021600000000000002 amet_c + 0.17003200083856446 arg__L_c + 0.13856746569714248 asn__L_c + 0.13856746569714248 asp__L_c + 52.547151 atp_c + 0.005053 ca2_c + 0.005053 cl_c + 0.000559 coa_c + 9.7e-05 cobalt2_c + 0.129616 ctp_c + 0.000688 cu2_c + 0.052643783217458676 cys__L_c + 0.025403 datp_c + 0.026229000000000002 dctp_c + 0.026229000000000002 dgtp_c + 0.025403 dttp_c + 0.00021600000000000002 fad_c + 0.006519 fe2_c + 0.00758 fe3_c + 0.15127383035220948 gln__L_c + 0.15127383035220948 glu__L_c + 0.3521664291491499 gly_c + 0.0007801644346423031 gtca1_45_BS_c + 0.0007801644346423031 gtca2_45_BS_c + 0.0007801644346423031 gtca3_45_BS_c + 0.208826 gtp_c + 47.184845 h2o_c + 0.05445855524298432 his__L_c + 0.16700639397055922 ile__L_c + 0.189503 k_c + 0.258981882281531 leu__L_c + 3.93697809448742e-05 lipo1_24_BS_c + 3.93697809448742e-05 lipo2_24_BS_c + 3.93697809448742e-05 lipo3_24_BS_c + 3.93697809448742e-05 lipo4_24_BS_c + 0.19

### Make glucose biomass reaction and add to model

In [11]:
glucose_growth_equation = cobra.Reaction()

# add metadata
glucose_growth_equation.name = 'Glucose biomass reaction'
glucose_growth_equation.id = 'Growth_Glucose'

# get metabolite dictionary for glucose
metabolite_dictionary = get_new_biomass_metabolites(bacillis_df, bacillis_macros, glucose_macros)

# add metabolites to the reaction
glucose_growth_equation.add_metabolites(metabolite_dictionary)

# add reaction to the model
model.add_reactions([glucose_growth_equation])

# print the reaction
model.reactions.get_by_id('Growth_Glucose').reaction

'0.00021600000000000002 10fthf_c + 0.18276799220954537 ala__L_c + 0.00021600000000000002 amet_c + 0.10524122811944102 arg__L_c + 0.08576626867557427 asn__L_c + 0.08576626867557427 asp__L_c + 52.547151 atp_c + 0.005053 ca2_c + 0.005053 cl_c + 0.000559 coa_c + 9.7e-05 cobalt2_c + 0.129616 ctp_c + 0.000688 cu2_c + 0.03258384522522416 cys__L_c + 0.025403 datp_c + 0.026229000000000002 dctp_c + 0.026229000000000002 dgtp_c + 0.025403 dttp_c + 0.00021600000000000002 fad_c + 0.006519 fe2_c + 0.00758 fe3_c + 0.09363086719019349 gln__L_c + 0.09363086719019349 glu__L_c + 0.21797324811394342 gly_c + 0.002230782680305335 gtca1_45_BS_c + 0.002230782680305335 gtca2_45_BS_c + 0.002230782680305335 gtca3_45_BS_c + 0.208826 gtp_c + 47.184845 h2o_c + 0.033707097529385756 his__L_c + 0.10336853015067567 ile__L_c + 0.189503 k_c + 0.16029671601565407 leu__L_c + 0.00011257296738924964 lipo1_24_BS_c + 0.00011257296738924964 lipo2_24_BS_c + 0.00011257296738924964 lipo3_24_BS_c + 0.00011257296738924964 lipo4_24_BS

### Define function to convert a biomass reaction into a macro dictionary

In [12]:
def reaction_to_single_macro_mass(reaction, old_biomass_df, macro):
    
    # isolate just reactants
    old_biomass_df = old_biomass_df[old_biomass_df.coefficient < 0]
    
    # isolate just the metabolites in that macro group
    macro_df = old_biomass_df[old_biomass_df.category == macro]
    
    # multiply the metabolite coefficient in new reaction with the molecular weight from dataframe
    # -1 is needed to since these are all coeffients are negative
    masses = [-1 * row.molecular_weight * reaction.get_coefficient(row.abbreviation) for _, row in macro_df.iterrows()]
    
    return sum(masses)
    
    
reaction_to_single_macro_mass(model.reactions.get_by_id('Growth_Glucose'), bacillis_df, 'protein')

240.39999999999992

### Test new biomass reactions

In [13]:
# get reactions as variables to plug into function
phenol_reaction = model.reactions.get_by_id('Growth_Phenol')
glucose_reaction = model.reactions.get_by_id('Growth_Glucose')

# get reaction macros
phenol_reaction_macros = {macro:reaction_to_single_macro_mass(phenol_reaction, bacillis_df, macro) for macro in ['protein', 'lipid', 'carbohydrate']}
glucose_reaction_macros = {macro:reaction_to_single_macro_mass(glucose_reaction, bacillis_df, macro) for macro in ['protein', 'lipid', 'carbohydrate']}

# compare to expected macros
print('expected phenol macros', phenol_macros)
print('phenol reaction macros', phenol_reaction_macros)
print('\n')
print('expected glucose macros', glucose_macros)
print('glucose reaction macros', glucose_reaction_macros)

expected phenol macros {'protein': 388.40000000000003, 'lipid': 140.8, 'carbohydrate': 172.10000000000002}
phenol reaction macros {'protein': 388.3999999999999, 'lipid': 140.8, 'carbohydrate': 172.10000000000002}


expected glucose macros {'protein': 240.39999999999998, 'lipid': 402.59999999999997, 'carbohydrate': 146.4}
glucose reaction macros {'protein': 240.39999999999992, 'lipid': 402.59999999999997, 'carbohydrate': 146.4}


### Save model with new biomass reactions

In [14]:
model.id = 'ropacus_model_B'
model.name = 'R. opacus model B'
cobra.io.write_sbml_model(model, "../GSMs/ropacus_model_B.xml")
model

0,1
Name,ropacus_model_B
Memory address,0x07fc03cd4e6d0
Number of metabolites,1587
Number of reactions,2392
Number of groups,0
Objective expression,1.0*Growth - 1.0*Growth_reverse_699ae
Compartments,"cytosol, periplasm, extracellular space"
