## This notebook shows the full workflow for building models, simulating growth and obtaining SCFA predictions from data collected by the _ex vivo_ study conducted by Gurry et al. 2021 (Study C)

In [None]:
import os
import pandas as pd
from plotnine import *
from tqdm import tqdm
import numpy as np
import scipy
import micom
import micom.measures

%matplotlib inline

## We'll start with the measured SCFA values from the _ex vivo_ experiments, and calculated production flux from the concentrations

In [None]:
# Read in the SCFA data
scfas = pd.read_csv('../data/studyC_SCFAs.csv',index_col = 0) 
# Melt the data into long form
scfas = pd.melt(scfas, 
                id_vars = ['subject_id','treatment','timepoint','replicate'], 
                value_vars = ['butyrate','propionate','acetate'],
                value_name = 'concentration',
                var_name = 'analyte')
# Sort values
scfas = scfas.sort_values(by = ['subject_id','treatment','analyte','timepoint','replicate'])
# Calculate the difference in concentration between timeoints
scfas['flux'] = scfas.groupby(['subject_id','treatment','replicate','analyte'])['concentration'].diff()
# Divide SCFA difference by culturing time to calculate flux
scfas['flux'] = scfas['flux']/4
# Drop rows with NAs
scfas.dropna(inplace = True)
# Find the mean of fluxes between replicates
res = scfas.groupby(['subject_id','treatment','analyte']).mean()
# Add the standard deviation for each group
res['sem']= scfas.groupby(['subject_id','treatment','analyte'])['flux'].sem()
# Reset the index of the dataframe keeping only samples that are used 
res = res.reset_index()[['subject_id','treatment','analyte','flux','sem']]
# Pivot the table for ease of plotting
measured = pd.pivot_table(res,columns = 'analyte',index = ['subject_id','treatment'],values = ['flux','sem'])
measured

## Next, we'll load up abundances and build a taxonomy table for MICOM

In [None]:
taxonomy = pd.read_csv('../data/studyC_abundances.csv')
# Melt the table into long form
taxonomy = pd.melt(taxonomy, 
                   id_vars = 'sample_id', 
                   value_vars = taxonomy.columns[1:], 
                   var_name = 'id', 
                   value_name = 'abundance')
# Add a genus column, as required by MICOM for the taxonomy table
taxonomy['genus'] = taxonomy['id'].str.split(';').str[5].str.split('.').str[0]

## We need a model database to pull our reconstructions from to build models

In [None]:
agora = ('../agora/data')

## Now, we'll build our models, with cutoff of 0.001

In [None]:
models = micom.workflows.build(taxonomy,out_folder = '../models/studyC',
                      model_db = agora, cutoff = 0.001, threads = 20)

## Next, we'll gather our medium. We'll load in the carbon-depleted standard european medium that was constructed and functionally completed in the medium_construction.ipynb notebook.

In [None]:
# Read in the medium
medium = pd.read_csv('../media/european_lowcarbon.csv',index_col = 0)[['reaction','metabolite','flux']]
# Dilute to 10% of original content
medium['flux'] = medium['flux']*.1
# We'll also build two treatment media, including an inulin supplement or a pectin supplement
pectin = pd.DataFrame({'reaction':['EX_pect_m'],'metabolite':['pect_m'],'flux':[0.75]})
inulin = pd.DataFrame({'reaction':['EX_inulin_m'],'metabolite':['inulin_m'],'flux':[10.50]})
pectin_medium = pd.concat([medium, pectin])
inulin_medium = pd.concat([medium, inulin])

## Now, we'll use the diet to grow the models we just built. First, we'll do so without the addition of fiber, to simulate control samples 

In [None]:
# Read the model manifest
manifest = pd.read_csv('../models/studyC/manifest.csv')
# Grow the models with a tradeoff value of 0.7
growth = micom.workflows.grow(manifest,'../models/studyC',medium=medium,tradeoff=.7,strategy='none',threads = 20)
# # Collect the absolute production rates
control = micom.measures.production_rates(growth)

## We'll do the same with the supplementation of pectin, on the treated samples

In [None]:
# Read the model manifest
manifest = pd.read_csv('../models/studyC/manifest.csv')
# Grow the models with a tradeoff value of 0.7
growth = micom.workflows.grow(manifest,'../models/studyC',medium=pectin_medium,tradeoff=.7,strategy='none',threads = 20)
# Collect the absolute production rates
pectin = micom.measures.production_rates(growth)

## And again with inulin

In [None]:
# Read the model manifest
manifest = pd.read_csv('../models/studyC/manifest.csv')
# Grow the models with a tradeoff value of 0.7
growth = micom.workflows.grow(manifest,'../models/studyC',medium=inulin_medium,tradeoff=.7,strategy='none',threads = 20)
# Collect the absolute production rates
inulin = micom.measures.production_rates(growth)

## Now that we have the results, we'll isolate the SCFA production and format it for concatenation with the measured results. Here's a function that will do this for both the control and inulin treated samples. 


In [None]:
def get_fluxes(df):
    # Isolate SCFA fluxes
    df = df[(df['metabolite']=='but[e]')|(df['metabolite']=='ppa[e]')|(df['metabolite']=='ac[e]')]
    # Map the metadata onto the dataframe to align with the measured samples 
    df['subject_id'] = df['sample_id'].str.split('_').str[1]
    df['treatment'] = df['sample_id'].str.split('_').str[0]
    # Take the average production of the replicates
    res = df.groupby(['subject_id','treatment','name']).mean()
    # Calculate the standard deviation of the replicates
    res['sem'] = df.groupby(['subject_id','treatment','name'])['flux'].sem()
    # Reset the index
    res = res.rename(columns = {'flux':'flux-predicted', 'sem':'sem-predicted'}).reset_index()
    # Pivot the table for alignment with the measurement dataframe
    predicted = pd.pivot_table(res,
                columns = 'name',
                index = ['subject_id','treatment'], 
                values = ['flux-predicted','sem-predicted'])
    return predicted

## Now we can iterate the function over both the control and treatment results, concatenating them into one dataframe

In [None]:
# Control Samples
predicted = get_fluxes(control)
# Pectin samples
predicted = pd.concat([predicted, get_fluxes(pectin)], axis = 0)
# And inulin samples
predicted = pd.concat([predicted, get_fluxes(inulin)], axis = 0)

## Now we'll concatenate the predictions with the measured scfa values, and rename the columns so they don't overlap

In [None]:
# Concatenate dataframes
scfasTotal = pd.concat([measured,predicted], axis = 1)
# Flatten multiindex
scfasTotal.columns = scfasTotal.columns.to_flat_index()
# Join column names for plotting
scfasTotal.columns = ['_'.join(col) for col in scfasTotal.columns.values]
scfasTotal.reset_index(inplace = True)

## Finally, plot the results

In [None]:
ac_plot = (ggplot(
    scfasTotal, aes(x = ('flux_acetate'), y = ('flux-predicted_acetate')))
    +geom_point(aes(color = 'treatment'),size = 5)
    +geom_errorbar(aes(x="flux_acetate", 
                        ymin = scfasTotal['flux-predicted_acetate'] - scfasTotal['sem-predicted_acetate'],
                        ymax = scfasTotal['flux-predicted_acetate'] + scfasTotal['sem-predicted_acetate']))
    +geom_errorbarh(aes(y ="flux-predicted_acetate", 
                        xmin = scfasTotal['flux_acetate'] - scfasTotal['sem_acetate'],
                        xmax = scfasTotal['flux_acetate'] + scfasTotal['sem_acetate']))
    +geom_smooth(method='lm',linetype='--')
    +scale_color_manual(values = ['deepskyblue','darksalmon'])
    +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Acetate')
    +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                            colour = "white",size = 0.5, linetype = "solid"),
                            panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                            axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                            legend_title=element_blank(),legend_position='right'))
ac_plot

In [None]:
ppa_plot = (ggplot(
    scfasTotal, aes(x = ('flux_propionate'), y = ('flux-predicted_propionate')))
    +geom_point(aes(color = 'treatment'),size = 5)
    +geom_errorbar(aes(x='flux_propionate', 
                        ymin = scfasTotal['flux-predicted_propionate'] - scfasTotal['sem-predicted_propionate'],
                        ymax = scfasTotal['flux-predicted_propionate'] + scfasTotal['sem-predicted_propionate']))
    +geom_errorbarh(aes(y = 'flux-predicted_propionate', 
                        xmin = scfasTotal['flux_propionate'] - scfasTotal['sem_propionate'],
                        xmax = scfasTotal['flux_propionate'] + scfasTotal['sem_propionate']))
    +geom_smooth(method='lm',linetype='--')
    +scale_color_manual(values = ['deepskyblue','darksalmon'])
    +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Propionate')
    +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                            colour = "white",size = 0.5, linetype = "solid"),
                            panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                            axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                            legend_title=element_blank(),legend_position='right'))
ppa_plot

In [None]:
but_plot = (ggplot(
    scfasTotal, aes(x = ('flux_butyrate'), y = ('flux-predicted_butyrate')))
    +geom_point(aes(color = 'treatment'),size = 5)
    +geom_errorbar(aes(x = 'flux_butyrate', 
                        ymin = scfasTotal['flux-predicted_butyrate'] - scfasTotal['sem-predicted_butyrate'],
                        ymax = scfasTotal['flux-predicted_butyrate'] + scfasTotal['sem-predicted_butyrate']))
    +geom_errorbarh(aes(y ='flux-predicted_butyrate', 
                        xmin = scfasTotal['flux_butyrate'] - scfasTotal['sem_butyrate'],
                        xmax = scfasTotal['flux_butyrate'] + scfasTotal['sem_butyrate']))
    +geom_smooth(method='lm',linetype='--')
    +scale_color_manual(values = ['deepskyblue','darksalmon'])
    +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Butyrate')
    +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                            colour = "white",size = 0.5, linetype = "solid"),
                            panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                                  axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                            legend_title=element_blank(),legend_position='right'))
but_plot

## Save the results for future use

In [None]:
flux.to_csv('/proj/gibbons/nbohmann/exvivo/scfa_paper/gurry1.csv')