# Study C
This notebook shows the full workflow for building models, simulating growth and obtaining SCFA predictions from data collected by the _ex vivo_ study conducted by the Gurry Lab in 2021 (Study C)

In [None]:
import pandas as pd
import numpy as np 
import micom
import micom.measures
import scipy
from plotnine import *

%matplotlib inline

## Flux Calculation
This function will calculate production rate from the experimental SCFA measurements, as well as standard deviations, and concatenate them into a dataframe

Here we'll iterate the above function across GC-MS data for each sample.

In [None]:
scfa = pd.read_csv('../data/raw_data/studyC_original_scfas.csv')
scfa = scfa[scfa['treatment'] != 'INUL+PECT']
scfa = scfa.groupby(['index','sample','treatment']).mean().reset_index()
scfa

## Taxonomy Table
Next, we will read in abundance data for all samples. We will convert this into a taxnomy table to use in MICOM

In [None]:
abundance = pd.read_csv('../data/raw_data/studyC_original_abundances.csv') # Read in abundances
abundance['id'] = abundance['s']
abundance = abundance[['sample','id','reads','s']] # Filter table to relevant columns
abundance.rename(columns = {'sample':'sample_id','reads':'abundance','s':'species'}, inplace = True) # Rename for clarity
abundance = abundance.groupby(by = ['sample_id','id','species']).sum().reset_index() #Sum duplicates
abundance['species'] = abundance.species.str.replace('_',' ')
table = pd.pivot_table(abundance,  # Pivot to create abundance matrix
               index = 'sample_id',
               columns = 'species',
               values = 'abundance').fillna(0.0)
table.to_csv('../data/studyC.csv')
abundance

## Build Models
Now, we'll build our models, with cutoff of 0.001

In [None]:
manifest = micom.workflows.build(abundance, 
                                 out_folder = '../models/studyC'
                                 model_db = '../agora103_refseq216_species_1.qza',
                                 cutoff = 0.001,
                                 threads = 10)

## Load Medium
Load in the carbon-stripped European Diet, and construct the intervention diets by augmenting with inulin and pectin

In [None]:
medium = pd.read_csv('../media/studyCmedium.csv')

controlMedium = medium


pectinMedium = pd.concat([medium,
                           pd.DataFrame({'reaction':['EX_pect_m'],
                                         'metabolite':['pect_m'],
                                         'global_id':['EX_pect(e)'],
                                         'flux':[1]
                                        },index = ['pect_m'])])


inulinMedium = pd.concat([medium,
                           pd.DataFrame({'reaction':['EX_inulin_m'],
                                         'metabolite':['inulin_m'],
                                         'global_id':['EX_inulin(e)'],
                                         'flux':[14]
                                        },index = ['inulin_m'])])

# Grow Models
Now we'll grow the samples using the respective media we constructed

In [None]:
manifest = pd.read_csv('../models/studyC/manifest.csv')
controlGrowth = micom.workflows.grow(manifest,
                                  model_folder='../models/studyC',
                                  medium = controlMedium, 
                                  tradeoff = 0.7,
                                  strategy ='none', 
                                  threads = 20)
pectinGrowth = micom.workflows.grow(manifest,
                                  model_folder='../models/studyC',
                                  medium = pectinMedium, 
                                  tradeoff = 0.7, 
                                  strategy = 'none', 
                                  threads = 20)
inulinGrowth = micom.workflows.grow(manifest, 
                                  model_folder='../models/studyC',
                                  medium = inulinMedium, 
                                  tradeoff = 0.7, 
                                  strategy = 'none',
                                  threads = 20)

## Compare SCFA Fluxes
We can now extract the production fluxes of SCFAs from each growth simulation, and construct a dataframe with both measured and predicted production rates.

In [None]:
controlProduction = micom.measures.production_rates(controlGrowth)
controlProduction['treatment'] = 'CTRL'
pectProduction = micom.measures.production_rates(pectGrowth)
pectProduction['treatment'] = 'PECT'
inulinProduction = micom.measures.production_rates(inulinGrowth)
inulinProduction['treatment'] = 'INUL'
production = pd.concat([controlProduction, pectProduction, inulinProduction])
production['sample_id'] = production['sample_id']+'_'+production['treatment']
scfas = production[(production['name']==('butyrate'))|
                   (production['name']==('acetate'))|
                   (production['name']==('propionate'))]

res = pd.pivot(scfas, index = 'sample_id', 
                         columns = 'name',
                         values = 'flux')
res.rename(columns = {'acetate':'acetatePredicted',
                      'butyrate':'butyratePredicted',
                      'propionate':'propionatePredicted'
                      }, inplace = True
          )
res['acetateMeasured'] = res.index.map(acetateMeasured)
res['butyrateMeasured'] = res.index.map(butyrateMeasured)
res['propionateMeasured'] = res.index.map(propionateMeasured)
res['treatment'] = res.index.str.split('_').str[1].str.replace(
    'CTRL','Control').str.replace(
    'INUL','Inulin').str.replace(
    'PECT','Pectin')
res

## Plot Results
Finally, we'll plot predicted vs measured fluxes against each other

In [None]:
fig1 = (ggplot(
    res, aes(x = 'acetateMeasured', y = 'acetatePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    +scale_color_manual(limits = ['Control','Inulin','Pectin'], 
                        values = ['cornflowerblue', 'coral', 'mediumseagreen'])
    +labs(title='Acetate',
          x='Measured ($\dfrac{mmol}{L*h}$)',
          y = 'Predicted ($\dfrac{mmol}{gDCW*h}$)',
          color = 'Treatment')
    +theme(text = element_text(size=35, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig1

In [None]:
scipy.stats.linregress(res['acetatePredicted'], res['acetateMeasured'])

In [None]:
fig2 = (ggplot(
    res, aes(x = 'butyrateMeasured', y = 'butyratePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    +scale_color_manual(limits = ['Control','Inulin','Pectin'], 
                        values = ['cornflowerblue', 'coral', 'mediumseagreen'])
    +labs(title='Butyrate',
          x='Measured ($\dfrac{mmol}{L*h}$)',
          y = 'Predicted ($\dfrac{mmol}{gDCW*h}$)',
          color = 'Treatment')
    +theme(
            text = element_text(size=35, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig2

In [None]:
scipy.stats.linregress(res['butyratePredicted'], res['butyrateMeasured'])

In [None]:
fig3 = (ggplot(
    res, aes(x = 'propionateMeasured', y = 'propionatePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    +scale_color_manual(limits = ['Control','Inulin','Pectin'], 
                        values = ['cornflowerblue', 'coral', 'mediumseagreen'])
    +labs(title='Propionate',
          x='Measured ($\dfrac{mmol}{L*h}$)',
          y = 'Predicted ($\dfrac{mmol}{gDCW*h}$)', 
         color = 'Treatment')
    +theme(text = element_text(size=35, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig3

In [None]:
scipy.stats.linregress(res['propionatePredicted'], res['propionateMeasured'])

## Save all results

In [None]:
res.to_csv('../results/studyC.csv')