# Biomass composition ➟  biomass reaction
The purpose of this notebook is to convert biomass composition measurements into biomass equations for various R. opacus strains growing on various substrates

In [1]:
import cobra
import pandas as pd

### Get biomass composition data (.csv import is a temporary. In the future this data will be from EDD)

In [2]:
biomass_comp_filename = '../../EDD_Ropacus_Multiomics/EDD_Ropacus_Multiomics_BiomassComp.csv'
biomass_comp_df = pd.read_csv(biomass_comp_filename)
cols_to_drop = [col for col in biomass_comp_df.columns if 'Unnamed' in col]
biomass_comp_df.drop(cols_to_drop, inplace=True, axis=1)
biomass_comp_df.head()

Unnamed: 0,Line Name,Measurement Type,Time (hrs),Value,Units
0,RoWT_PHE-R2,Lipid Content,25.5,14.08,%
1,RoWT_PHE-R2,Carbohydrate Content,25.5,17.21,%
2,RoWT_PHE-R2,Protein Content,25.5,38.84,%
3,RoP1_L-R1,Lipid Content,25.5,16.44,%
4,RoP1_L-R1,Carbohydrate Content,25.5,12.47,%


### Define a function to take in a condition name and return a dictionary of macromolecule compositions
The 10 times multiplier makes the units mg macromolecule/g dry cell weight

In [3]:
def composition_name_to_dictionary(name, biomass_df):
    biomass_comp_df = biomass_df[biomass_df['Line Name'] == name]
    biomass_dictionary = {}
    for _, row in biomass_comp_df.iterrows():
        biomass_dictionary[row['Measurement Type']] = row['Value']
    return biomass_dictionary

In [4]:
WT_phenol_composition = composition_name_to_dictionary('RoWT_PHE-R2', biomass_comp_df)
WT_glucose_composition = composition_name_to_dictionary('RoWT_GLC-R2', biomass_comp_df)

print('WT with phenol:', WT_phenol_composition)
print('WT with glucose:', WT_glucose_composition)

WT with phenol: {'Lipid Content': 14.08, 'Carbohydrate Content': 17.21, 'Protein Content': 38.84}
WT with glucose: {'Lipid Content': 40.26, 'Carbohydrate Content': 14.64, 'Protein Content': 24.04}


### Pull up original biomass equation

In [5]:
model = cobra.io.read_sbml_model("../GSMs/Ropacus_annotated_curated_with_phenol.xml")
original_biomass_reaction = model.reactions.get_by_id('Growth')
original_biomass_reaction.reaction

'0.000216499790822049 10fthf_c + 0.498715520392769 ala__L_c + 0.000216499790822049 amet_c + 0.287169982631549 arg__L_c + 0.234028507069996 asn__L_c + 0.234028507069996 asp__L_c + 52.5471506268106 atp_c + 0.00505327987098101 ca2_c + 0.00505327987098101 cl_c + 0.000559210222033633 coa_c + 9.70851079919502e-05 cobalt2_c + 0.129616385977893 ctp_c + 0.000688333415662927 cu2_c + 0.088910541899028 cys__L_c + 0.0254032893571737 datp_c + 0.0262294836261852 dctp_c + 0.0262294836261852 dgtp_c + 0.0254032893571737 dttp_c + 0.000216499790822049 fad_c + 0.00651926500165946 fe2_c + 0.00758040523201147 fe3_c + 0.255489170191616 gln__L_c + 0.255489170191616 glu__L_c + 0.594780263899724 gly_c + 0.000970851079919502 gtca1_45_BS_c + 0.000970851079919502 gtca2_45_BS_c + 0.000970851079919502 gtca3_45_BS_c + 0.208826183886365 gtp_c + 47.1848449736869 h2o_c + 0.0919764896094138 his__L_c + 0.282060393397933 ile__L_c + 0.189503334842727 k_c + 0.437398507887213 leu__L_c + 4.85425539959751e-05 lipo1_24_BS_c + 4.8

### Load dataframe with substrate metabolite information

In [6]:
substrate_df = pd.read_csv('../biomass_equation_data/subtitlis_biomass_equation_data.csv')
substrate_df

Unnamed: 0,coefficient,name,abbreviation,category,molecular_weight
0,0.000216,10-Formyltetrahydrofolate,10fthf_c,carbon carrier,471.43
1,0.498716,L-Alanine,ala__L_c,protein,89.094
2,0.000216,S-Adenosyl-L-methionine,amet_c,carbon carrier,399.446
3,0.28717,L-Arginine,arg__L_c,protein,175.212
4,0.234029,L-Asparagine,asn__L_c,protein,132.119
5,0.234029,L-Aspartate,asp__L_c,protein,132.095
6,52.547151,ATP,atp_c,energy molecule,503.15
7,0.005053,Calcium,ca2_c,salt,40.078
8,0.005053,Chloride,cl_c,salt,35.45
9,0.000559,CoenzymeA,coa_c,carbon carrier,763.502


### Calculate the macromolecule distribution in the Subtilis biomass equation

In [7]:
subtilis_mg_protein = 0
subtilis_mg_lipid = 0
subtilis_mg_carbohydrate = 0

for met in list(substrate_df['abbreviation']):
    met_category = list(substrate_df[substrate_df['abbreviation'] == met]['category'])[0]
    met_molar_mass = list(substrate_df[substrate_df['abbreviation'] == met]['molecular_weight'])[0]
    met_coefficient = list(substrate_df[substrate_df['abbreviation'] == met]['coefficient'])[0]
    
    if met_category == 'protein':
        subtilis_mg_protein += met_molar_mass*met_coefficient
    if met_category == 'lipid':
        subtilis_mg_lipid += met_molar_mass*met_coefficient
    if met_category == 'carbohydrate':
        subtilis_mg_carbohydrate += met_molar_mass*met_coefficient

print(f'subtiltis mg of protein / g dry cell weight: {subtilis_mg_protein:.2f} mg')
print(f'subtiltis mg of lipid / g dry cell weight: {subtilis_mg_lipid:.2f} mg')
print(f'subtiltis mg of carbohydrate / g dry cell weight: {subtilis_mg_carbohydrate:.2f} mg')

subtiltis mg of protein / g dry cell weight: 656.05 mg
subtiltis mg of lipid / g dry cell weight: 175.24 mg
subtiltis mg of carbohydrate / g dry cell weight: 9.62 mg


### Make Phenol Biomass Equation

Calculate the scaling factor for each macromolecule

In [8]:
ropacus_phenol_mg_protein = 10* WT_phenol_composition['Protein Content']
ropacus_phenol_mg_lipid = 10* WT_phenol_composition['Lipid Content']
ropacus_phenol_mg_carbohydrate = 10* WT_phenol_composition['Carbohydrate Content']

lipid_multiplier = ropacus_phenol_mg_protein / subtilis_mg_lipid
protein_multiplier = ropacus_phenol_mg_lipid/ subtilis_mg_protein
carbohydrate_multiplier = ropacus_phenol_mg_carbohydrate/ subtilis_mg_carbohydrate

# print(f'R. opacus phenol mg of protein / g dry cell weight: {ropacus_phenol_mg_protein:.2f} mg')
# print(f'R. opacus phenol mg of lipid / g dry cell weight: {ropacus_phenol_mg_lipid:.2f} mg')
# print(f'R. opacus phenol mg of carbohydrate / g dry cell weight: {ropacus_phenol_mg_carbohydrate:.2f} mg\n')

print(f'The lipid content is {lipid_multiplier:.3f} times higher in R. opacus with phenol than in B. Subtilis.')
print(f'The protein content is {protein_multiplier:.3f} times higher in R. opacus with phenol than in B. Subtilis.')
print(f'The carbohydrate content is {carbohydrate_multiplier:.3f} times higher in R. opacus with phenol than in B. Subtilis.')

The lipid content is 2.216 times higher in R. opacus with phenol than in B. Subtilis.
The protein content is 0.215 times higher in R. opacus with phenol than in B. Subtilis.
The carbohydrate content is 17.887 times higher in R. opacus with phenol than in B. Subtilis.


Create a new reaction for phenol biomass

In [9]:
# phenol_growth_equation = model.reactions.get_by_id('Growth').copy()
phenol_growth_equation = cobra.Reaction()
phenol_growth_equation.name = 'Phenol biomass reaction'
phenol_growth_equation.id = 'Growth_Phenol'
phenol_growth_equation

0,1
Reaction identifier,Growth_Phenol
Name,Phenol biomass reaction
Memory address,0x07fac6388e790
Stoichiometry,--> -->
GPR,
Lower bound,0.0
Upper bound,1000.0


In [16]:
for met in list(model.reactions.get_by_id('Growth').metabolites):
    print(met)
# list(model.reactions.get_by_id('Growth').metabolites)

10fthf_c
ala__L_c
amet_c
arg__L_c
asn__L_c
asp__L_c
atp_c
ca2_c
cl_c
coa_c
cobalt2_c
ctp_c
cu2_c
cys__L_c
datp_c
dctp_c
dgtp_c
dttp_c
fad_c
fe2_c
fe3_c
gln__L_c
glu__L_c
gly_c
gtca1_45_BS_c
gtca2_45_BS_c
gtca3_45_BS_c
gtp_c
h2o_c
his__L_c
ile__L_c
k_c
leu__L_c
lipo1_24_BS_c
lipo2_24_BS_c
lipo3_24_BS_c
lipo4_24_BS_c
lys__L_c
met__L_c
mg2_c
mlthf_c
mn2_c
mql8_c
nad_c
nadp_c
peptido_BS_c
phe__L_c
pro__L_c
pydx5p_c
ribflv_c
ser__L_c
so4_c
thf_c
thmpp_c
thr__L_c
trp__L_c
tyr__L_c
utp_c
val__L_c
zn2_c
adp_c
h_c
pi_c
ppi_c


In [17]:
metabolites_to_add = {}

# for met in list(substrate_df['abbreviation']):
for met in list(model.reactions.get_by_id('Growth').metabolites):
    met_category = list(substrate_df[substrate_df['abbreviation'] == met]['category'])[0]
    met_subtilis_coefficient = list(substrate_df[substrate_df['abbreviation'] == met]['coefficient'])[0]
    metabolite = model.metabolites.get_by_id(met)
    
    if met_category == 'protein':
        met_ropacus_coefficient = protein_multiplier*met_subtilis_coefficient
    elif met_category == 'lipid':
        met_ropacus_coefficient = lipid_multiplier*met_subtilis_coefficient
    elif met_category == 'carbohydrate':
        met_ropacus_coefficient = carbohydrate_multiplier*met_subtilis_coefficient
    else:
        met_ropacus_coefficient = met_subtilis_coefficient
        
    metabolites_to_add[metabolite] = met_ropacus_coefficient

IndexError: list index out of range

In [11]:
metabolites_to_add

{<Metabolite 10fthf_c at 0x7fac66b27a90>: 0.00021600000000000002,
 <Metabolite ala__L_c at 0x7fac66a66ed0>: 0.10703270532982996,
 <Metabolite amet_c at 0x7fac66b0e3d0>: 0.00021600000000000002,
 <Metabolite arg__L_c at 0x7fac669b5b50>: 0.06163143350036347,
 <Metabolite asn__L_c at 0x7fac669af110>: 0.05022649563205266,
 <Metabolite asp__L_c at 0x7fac66a56050>: 0.05022649563205266,
 <Metabolite atp_c at 0x7fac66b371d0>: 52.547151,
 <Metabolite ca2_c at 0x7fac669282d0>: 0.005053,
 <Metabolite cl_c at 0x7fac66a72390>: 0.005053,
 <Metabolite coa_c at 0x7fac66b4a710>: 0.000559,
 <Metabolite cobalt2_c at 0x7fac6694de10>: 9.7e-05,
 <Metabolite ctp_c at 0x7fac66a44e90>: 0.129616,
 <Metabolite cu2_c at 0x7fac66992dd0>: 0.000688,
 <Metabolite cys__L_c at 0x7fac66a88c90>: 0.0190817717169301,
 <Metabolite datp_c at 0x7fac669ec4d0>: 0.025403,
 <Metabolite dctp_c at 0x7fac66907090>: 0.026229000000000002,
 <Metabolite dgtp_c at 0x7fac669f0250>: 0.026229000000000002,
 <Metabolite dttp_c at 0x7fac668c07d

### Add glucose biomass equation to model