## Here, we'll show the full workflow for building models and getting predictions for SCFA production using MICOM metabolic modeling for the _ex vivo_ study conducted by Cantu-Jungles et al. 2021 (Study B)

In [None]:
import os
import pandas as pd
from plotnine import *
from tqdm import tqdm
import numpy as np
import scipy
import micom
import micom.measures

%matplotlib inline

## We'll start with the measured SCFA values from the _ex vivo_ experiments. Total SCFA production is already calcuated in mM 

In [None]:
# Read in the SCFA data
scfas = pd.read_csv('../data/studyB_SCFAs.csv') 
# Divide SCFA production by culturing time to estimate flux
scfas[['acetate','butyrate','propionate']]= scfas[['acetate','butyrate','propionate']]/24 
# Make the subject id column a string, for simplicity
scfas['subject_id'] = scfas['subject_id'].astype('str')
# Melt the SCFA columns into one column
scfas = pd.melt(scfas, id_vars = ['subject_id','treatment'], value_vars = ['acetate','butyrate','propionate'],
             value_name = 'flux', var_name = 'analyte')
# Get mean values for each SCFA per sample
res = scfas.groupby(['treatment','subject_id','analyte']).mean()
# Get standard deviation for each 
res['sem'] = scfas.groupby(['treatment','subject_id','analyte']).sem()
# Reset the index of the dataframe keeping only samples that are used 
res = res.reset_index()[['treatment','subject_id','analyte','flux','sem']]
# Pivot the table for ease of plotting
measured = pd.pivot_table(res,columns = 'analyte',index = ['subject_id','treatment'],values = ['flux','sem'])
measured

## Next, we will read in abundance data for all samples. As samples are cultured in triplicate, we will add a numerical suffix to each, indicating the replicate number. We also want only those samples treated with PBS or pectin

In [None]:
# Read in the abundance data 
taxonomy = pd.read_csv('../data/studyB_abundances.csv')
# Count the number of times a sample_id appears in the dataframe
vals = taxonomy.groupby(taxonomy['sample_id']).cumcount().values
# Add the replicate number to the end of the sample_id
taxonomy['sample_id'] = (taxonomy['sample_id']+'_'+ (vals+1).astype(str))
# Sort to only control and pectin treated samples
taxonomy = taxonomy[(taxonomy['sample_id'].str.contains('Blank'))|(taxonomy['sample_id'].str.contains('Pectin'))]

## Now we will convert the abundance table to be long form. 

In [None]:
# Melt the table into long form
taxonomy = pd.melt(taxonomy, 
                   id_vars = 'sample_id', 
                   value_vars = taxonomy.columns[1:], 
                   var_name = 'id', 
                   value_name = 'abundance')
# Replace the prefixes in the id column so the data can be read by MICOM
taxonomy['id'] = (taxonomy['id'].str.replace('k__','').str.replace('p__','').
                   str.replace('c__','').str.replace('o__','').str.replace('f__','').
                   str.replace('g__','').str.replace('s__',''))
# Add a genus column, as required by MICOM for the taxonomy table
taxonomy['genus'] = taxonomy['id'].str.split(';').str[-2]

## We need a model database to use as a resource for genome-scale metabolic models. We will pass in the file path to the AGORA db

In [None]:
agora = ('../agora/data')

## We'll get to building the models in MICOM, passing in the abundance table, identifying a model folder, passing in the database location, and assigning a relative abundance cutoff

In [None]:
models = micom.workflows.build(taxonomy,out_folder = '../models/studyB',
                      model_db = agora, cutoff = 0.001, threads = 20)

## Next, we'll gather our medium. We'll load in the carbon-depleted standard european medium that was constructed and functionally completed in the medium_construction.ipynb notebook.

In [None]:
# Read in the medium
medium = pd.read_csv('../media/european_lowcarbon.csv',index_col = 0)[['reaction','metabolite','flux']]
# Dilute to 10% of original content
medium['flux'] = medium['flux']*.1
# We'll also build a treatment medium, including an pectin supplement
pectin = pd.DataFrame({'reaction':['EX_pect_m'],'metabolite':['pect_m'],'flux':[0.75]})
treatment_medium = pd.concat([medium, pectin])

## Now, we'll use the diet to grow the models we just built. First, we'll do so without the addition of inulin, to simulate control samples 

In [None]:
# Read the model manifest
manifest = pd.read_csv('../models/studyB/manifest.csv')
# Isolate only the  samples that underwent control treatment
manifest = manifest[manifest.sample_id.str.contains('Blank')]
# Grow the models with a tradeoff value of 0.7
growth = micom.workflows.grow(manifest,'../models/studyB',medium=medium,tradeoff=.7,strategy='none',threads = 20)
# Collect the absolute production rates
control = micom.measures.production_rates(growth)

## We'll do the same with the supplementation of inulin, on the treated samples

In [None]:
# Read the model manifest
manifest = pd.read_csv('../models/studyB/manifest.csv')
# Isolate the samples that underwent treatment
manifest = manifest[manifest.sample_id.str.contains('Pectin')]
# Grow the models with a tradeoff value of 0.7
growth = micom.workflows.grow(manifest,'../models/studyB',medium=treatment_medium,tradeoff=.7,strategy='none',threads = 20)
# Collect the absolute production rates
treatment = micom.measures.production_rates(growth)

## Now that we have the results, we'll isolate the SCFA production and format it for concatenation with the measured results. Here's a function that will do this for both the control and inulin treated samples. 


In [None]:
def get_fluxes(df):
    # Isolate SCFA fluxes
    df = df[(df['metabolite']=='but[e]')|(df['metabolite']=='ppa[e]')|(df['metabolite']=='ac[e]')]
    # Map the metadata onto the dataframe to align with the measured samples 
    df['subject_id'] = df['sample_id'].str.split('_').str[1]
    df['treatment'] = df['sample_id'].str.split('_').str[0]
    # Take the average production of the replicates
    res = df.groupby(['subject_id','treatment','name']).mean()
    # Calculate the standard deviation of the replicates
    res['sem'] = df.groupby(['subject_id','treatment','name'])['flux'].sem()
    # Reset the index
    res = res.rename(columns = {'flux':'flux-predicted', 'sem':'sem-predicted'}).reset_index()
    # Pivot the table for alignment with the measurement dataframe
    predicted = pd.pivot_table(res,
                columns = 'name',
                index = ['subject_id','treatment'], 
                values = ['flux-predicted','sem-predicted'])
    return predicted

## Now we can iterate the function over both the control and treatment results, concatenating them into one dataframe

In [None]:
# Control Samples
predicted = get_fluxes(control)
# And predicted samples
predicted = pd.concat([predicted, get_fluxes(treatment)], axis = 0)

## Now we'll concatenate the predictions with the measured scfa values, and rename the columns so they don't overlap

In [None]:
# Concatenate dataframes
scfasTotal = pd.concat([measured,predicted], axis = 1)
# Flatten multiindex
scfasTotal.columns = scfasTotal.columns.to_flat_index()
# Join column names for plotting
scfasTotal.columns = ['_'.join(col) for col in scfasTotal.columns.values]
scfasTotal.reset_index(inplace = True)

## Finally, we can plot the results for all three SCFAs

In [None]:
ac_plot = (ggplot(
    scfasTotal, aes(x = ('flux_acetate'), y = ('flux-predicted_acetate')))
    +geom_point(aes(color = 'treatment'),size = 5)
    +geom_errorbar(aes(x="flux_acetate", 
                        ymin = scfasTotal['flux-predicted_acetate'] - scfasTotal['sem-predicted_acetate'],
                        ymax = scfasTotal['flux-predicted_acetate'] + scfasTotal['sem-predicted_acetate']))
    +geom_errorbarh(aes(y ="flux-predicted_acetate", 
                        xmin = scfasTotal['flux_acetate'] - scfasTotal['sem_acetate'],
                        xmax = scfasTotal['flux_acetate'] + scfasTotal['sem_acetate']))
    +geom_smooth(method='lm',linetype='--')
    +scale_color_manual(values = ['deepskyblue','darksalmon'])
    +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Acetate')
    +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                            colour = "white",size = 0.5, linetype = "solid"),
                            panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                            axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                            legend_title=element_blank(),legend_position='right'))
ac_plot

In [None]:
ppa_plot = (ggplot(
    scfasTotal, aes(x = ('flux_propionate'), y = ('flux-predicted_propionate')))
    +geom_point(aes(color = 'treatment'),size = 5)
    +geom_errorbar(aes(x='flux_propionate', 
                        ymin = scfasTotal['flux-predicted_propionate'] - scfasTotal['sem-predicted_propionate'],
                        ymax = scfasTotal['flux-predicted_propionate'] + scfasTotal['sem-predicted_propionate']))
    +geom_errorbarh(aes(y = 'flux-predicted_propionate', 
                        xmin = scfasTotal['flux_propionate'] - scfasTotal['sem_propionate'],
                        xmax = scfasTotal['flux_propionate'] + scfasTotal['sem_propionate']))
    +geom_smooth(method='lm',linetype='--')
    +scale_color_manual(values = ['deepskyblue','darksalmon'])
    +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Propionate')
    +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                            colour = "white",size = 0.5, linetype = "solid"),
                            panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                            axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                            legend_title=element_blank(),legend_position='right'))
ppa_plot

In [None]:
but_plot = (ggplot(
    scfasTotal, aes(x = ('flux_butyrate'), y = ('flux-predicted_butyrate')))
    +geom_point(aes(color = 'treatment'),size = 5)
    +geom_errorbar(aes(x = 'flux_butyrate', 
                        ymin = scfasTotal['flux-predicted_butyrate'] - scfasTotal['sem-predicted_butyrate'],
                        ymax = scfasTotal['flux-predicted_butyrate'] + scfasTotal['sem-predicted_butyrate']))
    +geom_errorbarh(aes(y ='flux-predicted_butyrate', 
                        xmin = scfasTotal['flux_butyrate'] - scfasTotal['sem_butyrate'],
                        xmax = scfasTotal['flux_butyrate'] + scfasTotal['sem_butyrate']))
    +geom_smooth(method='lm',linetype='--')
    +scale_color_manual(values = ['deepskyblue','darksalmon'])
    +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Butyrate')
    +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                            colour = "white",size = 0.5, linetype = "solid"),
                            panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                                  axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                            legend_title=element_blank(),legend_position='right'))
but_plot

## Save all results

In [None]:
scfa.to_csv('../results/studyB.csv')