# Study A
This notebook shows the full workflow for building models, simulating growth and obtaining SCFA predictions from data collected by the _ex vivo_ study conducted by the Gibbons Lab in 2019

In [None]:
import pandas as pd
import micom
import micom.measures
from plotnine import * 
import scipy

%matplotlib inline

## Flux Calculation
Here we will calculate production rate from the experimental SCFA measurements for each sample and convert them to dicts for later use when merging with results. 

In [None]:
scfa=pd.read_csv('../data/raw_data/studyA_original_scfas.csv')
scfa=scfa[['Unique Sample ID','Treatment','Time \nPoint','Subject or Animal ID','Analyte','Result']]
scfa=scfa.rename(columns={'Unique Sample ID':'sample_id', # rename columns for readability
                          'Time \nPoint':'timepoint',
                          'Treatment':'treatment',
                          'Subject or Animal ID':'subject_id',
                          'Analyte':'analyte','Result':'result'}).set_index('sample_id')
scfa['id'] = scfa['subject_id'].str[-1]+'_'+scfa['treatment']+'_'+scfa.index.str[1] # create unique ID for each sample
scfa['flux'] = scfa.groupby(['id','analyte'])['result'].diff() # calculate difference between timepoints to calculate production
metadata = scfa['id'].to_dict() 
scfa = scfa.dropna()
scfa = pd.pivot_table(scfa,
                      index = 'id',
                      columns = 'analyte',
                      values = 'flux') # pivot data
scfa = scfa/6 # divide production by culturing time to calculate flux
acetateMeasured = (scfa['Acetic acid']/59.04).to_dict() # divide by molecular weight to get units in mM
butyrateMeasured = (scfa['Butyric acid']/88.11).to_dict()
propionateMeasured = (scfa['Propionic acid']/74.08).to_dict()
scfa.to_csv('../data/studyA_SCFAs.csv')

## Taxonomy Table
Next, we will read in abundance data for all samples. We will convert this into a taxnomy table to use in MICOM

In [None]:
abundance = pd.read_csv('../data/raw_data/studyA_original_abundance.csv')
abundance['sample_id'] = abundance['sample'].map(metadata)
abundance = abundance.rename(columns = 
                           {'d':'domain',
                            'p':'phylum', 
                            'c':'class',
                            'o':'order',
                            'f':'family',
                            'g':'genus',
                            's':'species',
                            'reads':'abundance'}).drop(
    columns = 'sample')
abundance['id'] = abundance['species'].str.replace(' ', '_')
abundance = abundance.groupby(['sample_id','species','id']).sum().reset_index()
table = pd.pivot_table(abundance, 
               index = 'sample_id',
               columns = 'species',
               values = 'abundance').fillna(0.0) # build abundance matrix
table.to_csv('/users/nbohmann/exvivo/studyA.csv')

## Build Models
Now, we'll build our models, with cutoff of 0.001

In [None]:
manifest = micom.workflows.build(taxonomy,
                               out_folder = '../models/studyA/',
                               model_db = '../agora103_refseq216_species_1.qza',
                               cutoff = 0.001, 
                               threads = 20)

## Load Medium
Load in the carbon-stripped European Diet, and construct the intervention diets by augmenting with inulin and pectin

In [None]:
medium = pd.read_csv('../media/studyAmedium.csv', index_col = 0)

controlMedium = medium

inulinMedium = pd.concat([medium,
                           pd.DataFrame({'reaction':['EX_inulin_m'],
                                         'metabolite':['inulin_m'],
                                         'global_id':['EX_inulin(e)'],
                                         'flux':[14]
                                        },index = ['EX_inulin_m'])])
medium

# Grow Models
Now we'll grow the samples using the respective media we constructed

In [None]:
manifest = pd.read_csv('../models/studyA/manifest.csv')
controlManifest = manifest[manifest['sample_id'].str.contains('PBS')]
inulinManifest = manifest[manifest['sample_id'].str.contains('inulin')]

controlGrowth = micom.workflows.grow(controlManifest,
                              '../models/studyA/',
                              controlMedium, 
                              tradeoff = 0.7,
                              strategy = 'none',
                              threads = 20)
inulinGrowth = micom.workflows.grow(inulinManifest,
                              '../models/studyA/',
                              inulinMedium, 
                              tradeoff = 0.7,
                              strategy = 'none',
                              threads = 20)

## Compare SCFA Fluxes
We can now extract the production fluxes of SCFAs from each growth simulation, and construct a dataframe with both measured and predicted production rates.

In [None]:
inulinProduction = micom.measures.production_rates(inulinGrowth)
controlProduction = micom.measures.production_rates(controlGrowth) # calculate predicted production rates
production = pd.concat([inulinProduction, controlProduction])
res = production[(production['name']==('butyrate'))|
                 (production['name']==('acetate'))|
                 (production['name']==('propionate'))] #isolate scfas
res = pd.pivot(res, index = 'sample_id',
                    columns = 'name',
                    values = 'flux') # pivot data 
res.rename(columns = {'acetate':'acetatePredicted',
                      'butyrate':'butyratePredicted',
                      'propionate':'propionatePredicted' #rename columns to identify predictions
                      }, inplace = True
          )
res['acetateMeasured'] = res.index.map(acetateMeasured)
res['butyrateMeasured'] = res.index.map(butyrateMeasured)
res['propionateMeasured'] = res.index.map(propionateMeasured) # map the corresponding measured data 
res['donor'] = res.index.str.split('_').str[0]
res['treatment'] = res.index.str.split('_').str[1]

res.dropna(inplace = True)
res = res.groupby(['treatment','donor']).mean().reset_index() # calculate mean of replicates
res['sample_id'] = res['treatment']+'_'+res['donor']
res.set_index('sample_id', inplace = True)
res['treatment'] = res['treatment'].str.replace(
    'PBS','Control').str.replace(
    'inulin','Inulin')
res[res.columns[2:]] = res[res.columns[2:]].astype('float')

## Plot Results
Finally, we'll plot predicted vs measured fluxes against each other

In [None]:
fig1 = (ggplot(
    res, aes(x = 'acetateMeasured', y = 'acetatePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    +geom_smooth(aes(groups = 'treatment', color = 'treatment', fill = 'treatment'),
                 method = 'lm', linetype = '--')
    +scale_color_manual(limits = ['Control','Inulin'], values = ['cornflowerblue', 'coral'])
    +labs(title='Acetate',
          x='Measured Acetate (mmol/L/h)',
          y = 'Predicted Acetate (mmol/gDW/h)',
          color = 'Treatment')
    +theme(text = element_text(size=35, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig1

In [None]:
scipy.stats.linregress(res['acetatePredicted'], res['acetateMeasured'])

In [None]:
fig2 = (ggplot(
    res, aes(x = 'butyrateMeasured', y = 'butyratePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    +geom_smooth(aes(groups = 'treatment', color = 'treatment', fill = 'treatment'),
                 method = 'lm', linetype = '--')
    +scale_color_manual(limits = ['Control','Pectin'], values = ['cornflowerblue', 'coral'])
    +labs(title='Butyrate',
          x='Measured Butyrate (mmol/L/h)',
          y = 'Predicted Butyrate (mmol/gDW/h)',
          color = 'Treatment')
    +theme(text = element_text(size=35, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig2

In [None]:
scipy.stats.linregress(res['butyratePredicted'], res['butyrateMeasured'])

In [None]:
fig3 = (ggplot(
    res, aes(x = 'propionateMeasured', y = 'propionatePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    +geom_smooth(aes(groups = 'treatment', color = 'treatment', fill = 'treatment'),
                 method = 'lm', linetype = '--', alpha = .5)
    +scale_color_manual(limits = ['Control','Pectin'], values = ['cornflowerblue', 'coral'])
    +labs(title='Propionate',
          x='Measured Propionate (mmol/L/h)',
          y = 'Predicted Propionate (mmol/gDW/h)',
          color = 'Treatment')
    +theme(text = element_text(size=35, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig3

In [None]:
scipy.stats.linregress(res['propionatePredicted'], res['propionateMeasured'])

In [None]:
res.to_csv('../results/studyA.csv')