# Study B
This notebook shows the full workflow for building models, simulating growth and obtaining SCFA predictions from data collected by the _ex vivo_ study conducted by the Hamaker Lab in 2021 (Study B)

In [None]:
import pandas as pd
import numpy as np 
import os
import micom
import micom.measures
from plotnine import *
import scipy

%matplotlib inline

## Flux Calculation
Here we will calculate production rate from the experimental SCFA measurements and convert them to dicts for later use

In [None]:
scfa = pd.read_csv('../data/raw_data/studyB_original_scfas.csv') # read in GC-MS data
scfa['Acetate'] = scfa['Acetate']/4 # divide SCFA production by culturing time
scfa['Butyrate'] = scfa['Butyrate']/4
scfa['Propionate'] = scfa['Propionate']/4
scfa['Donor'] = scfa['Donor'].astype('str')
scfa['sample_id'] = scfa[['Treatment','Donor']].agg('_'.join, axis=1) # create sample ids with donor ID and treatment 
vals = scfa.groupby(scfa['sample_id']).cumcount().values# add replicate number
scfa['sample_id'] = (scfa['sample_id']+'_'+ (vals+1).astype(str))
scfa = scfa[(scfa.Treatment.str.contains('Blank'))|
            (scfa.Treatment.str.contains('Pectin'))|
            (scfa.Treatment.str.contains('FOS'))].set_index('sample_id') # isolate samples treated with pectin or controls
acetateMeasured = scfa['Acetate'].to_dict()
butyrateMeasured = scfa['Butyrate'].to_dict()
propionateMeasured = scfa['Propionate'].to_dict()
scfa.to_csv('../data/studyB_SCFAs.csv')

## Taxonomy Table
Next, we will read in abundance data for all samples. We will convert this into a taxnomy table to use in MICOM

In [None]:
abundance = pd.read_csv('../data/raw_data/studyB_original_abundance.csv', index_col = 0) # Load the abundance
abundance['donor'] = abundance['donor'].astype('str')
abundance['sample_id'] = abundance[['treatment_II_B','donor']].agg(
    '_'.join, axis=1) # Join the treatment and donor into sample ID
abundance = abundance.drop(columns = ['treatment_II_B','donor'])
abundance.set_index('sample_id',inplace = True)
abundance = abundance.sort_index() #sort the index in order
abundance = abundance[~abundance.index.str.contains('II_')] # Remove the treatment without SCFA measurements
abundance.reset_index(inplace = True)
vals = abundance.groupby(abundance['sample_id']).cumcount().values# add replicate number
abundance['sample_id'] = (abundance['sample_id']+'_'+ (vals+1).astype(str))
abundance = pd.melt(abundance,
                    id_vars = 'sample_id', 
                    value_vars = abundance.columns[1:], 
                    var_name = 'id', 
                    value_name = 'abundance') #melt into taxonomy table 
abundance['id'] = (abundance['id'].
                   str.replace('k__','').
                   str.replace('p__','').
                   str.replace('c__','').
                   str.replace('o__','').
                   str.replace('f__','').
                   str.replace('g__','').
                   str.replace('s__','')) # replace leading taxonomic identifiers
abundance['genus'] = abundance['id'].str.split(';').str[-2]
abundance = abundance[(abundance['sample_id'].str.contains('Blank'))|
                      (abundance['sample_id'].str.contains('Pectin'))|
                      (abundance['sample_id'].str.contains('FOS'))] # isolate treatment samples
abundance['id'] = abundance['genus']
abundance = abundance.groupby(['sample_id','id','genus']).sum().reset_index().dropna() # sum duplicates
table = pd.pivot_table(abundance, 
               index = 'sample_id',
               columns = 'genus',
               values = 'abundance').fillna(0.0) # build abundance matrix
table.to_csv('/users/nbohmann/exvivo/studyB.csv')
abundance

## Build Models
Now, we'll build our models, with cutoff of 0.001

In [None]:
manifest = micom.workflows.build(abundance,
                                 out_folder = '../models/studyB',
                                 model_db = '../agora103_refseq216_s_1.qza',
                                 cutoff = 0.001, 
                                 threads = 10)

## Load Medium
Load in the carbon-stripped European Diet, and construct the intervention diets by augmenting with inulin and pectin

In [None]:
medium = pd.read_csv('../media/studyBmedium.csv')

controlMedium = medium

pectMedium = pd.concat([medium,
                           pd.DataFrame({'reaction':['EX_pect_m', 'EX_amylopect900_m'],
                                         'flux':[1,0]
                                        })]) 
fosMedium = pd.concat([medium,
                           pd.DataFrame({'reaction':['EX_kestopt_m', 'EX_kestottr_m', 'EX_kesto_m'],
                                         'flux':[100,100,100]
                                        })])

## Grow Models
Now we'll grow the samples using the respective media we constructed

In [None]:
manifest = pd.read_csv('../models/studyB/manifest.csv')
controlManifest = manifest[manifest.sample_id.str.startswith('Blank')]
pectManifest = manifest[manifest.sample_id.str.startswith('Pect')]
fosManifest = manifest[manifest.sample_id.str.startswith('FOS')]
controlGrowth = micom.workflows.grow(controlManifest,
                                     '../models/studyB/',
                                     medium=controlMedium,
                                     tradeoff= 0.7, 
                                     strategy='none',
                                     threads = 10)

pectGrowth = micom.workflows.grow(pectManifest,
                                  '../models/studyB/',
                                  medium = pectMedium, 
                                  tradeoff = 0.7,
                                  strategy = 'none',
                                  threads = 20)

fosGrowth = micom.workflows.grow(fosManifest,
                                     '../models/studyB/',
                                     medium=fosMedium,
                                     tradeoff=0.7, 
                                     strategy='none',
                                     threads = 20)

## Compare SCFA Fluxes
We can now extract the production fluxes of SCFAs from each growth simulation, and construct a dataframe with both measured and predicted production rates.

In [None]:
controlProduction = micom.measures.production_rates(controlGrowth) # Calculate production rates
pectProduction = micom.measures.production_rates(pectGrowth)
fosProduction = micom.measures.production_rates(fosGrowth)

production = pd.concat([controlProduction, pectProduction,fosProduction]) # Concatenate production rates
scfas = production[(production['name']==('butyrate'))|
                   (production['name']==('acetate'))|
                   (production['name']==('propionate'))] # Isolate SCFAs


res = pd.pivot(scfas, index = 'sample_id',
                         columns = 'name',
                         values = 'flux') # Pivot the data


res.rename(columns = {'acetate':'acetatePredicted',
                      'butyrate':'butyratePredicted',
                      'propionate':'propionatePredicted' # Rename columns for simplicity
                      }, inplace = True
          )
res['acetateMeasured'] = res.index.map(acetateMeasured) # Map measured production rates onto dataframe
res['butyrateMeasured'] = res.index.map(butyrateMeasured)
res['propionateMeasured'] = res.index.map(propionateMeasured)
res['treatment'] = res.index.str.split('_').str[0] # Get identifiers from sample ID
res['donor'] = res.index.str.split('_').str[1]
res = res.groupby(['treatment','donor']).mean().reset_index() # Get mean of replicates
res['sample_id'] = res['treatment']+'_'+res['donor'] 
res.set_index('sample_id', inplace = True)
res[res.columns[2:]] = res[res.columns[2:]].astype('float')
res['treatment'] = res['treatment'].str.replace('Blank','Control') # Rename controls
res

## Plot Results
Finally, we'll plot predicted vs measured fluxes against each other

In [None]:
fig1 = (ggplot(
    res, aes(x = 'acetateMeasured', y = 'acetatePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    +scale_color_manual(limits = ['Control','Pectin', 'FOS'], 
                        values = ['cornflowerblue', 'mediumseagreen', 'purple'])
    +labs(title='Acetate',
          x='Measured Acetate (mmol/L/h)',
          y = 'Predicted Acetate (mmol/gDW/h)',
          color = 'Treatment')
    +theme(text = element_text(size=35, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig1

In [None]:
scipy.stats.linregress(res['acetatePredicted'],
                       res['acetateMeasured'])

In [None]:
fig2 = (ggplot(
    res, aes(x = 'butyrateMeasured', y = 'butyratePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    +scale_color_manual(limits = ['Control','Pectin', 'FOS'], 
                        values = ['cornflowerblue', 'mediumseagreen', 'purple'])
    +labs(title='Butyrate',
          x='Measured Butyrate (mmol/L/h)',
          y = 'Predicted Butyrate (mmol/gDW/h)',
          color = 'Treatment')
    +theme(text = element_text(size=35, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig2

In [None]:
scipy.stats.linregress(res['butyratePredicted'],
                       res['butyrateMeasured'])

In [None]:
fig3 = (ggplot(
    res, aes(x = 'propionateMeasured', y = 'propionatePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    +scale_color_manual(limits = ['Control','Pectin', 'FOS'], 
                        values = ['cornflowerblue', 'mediumseagreen', 'purple'])
    +labs(title='Propionate',
          x='Measured Propionate (mmol/L/h)',
          y = 'Predicted Propionate (mmol/gDW/h)',
          color = 'Treatment')
    +theme(text = element_text(size=35, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig3

In [None]:
scipy.stats.linregress(res['propionatePredicted'],
                       res['propionateMeasured'])

In [None]:
res.to_csv('../data/studyB.csv')