## Here, we'll show the full workflow for building models and getting predictions for SCFA production using MICOM metabolic modeling for the _ex vivo_ study conducted by members of the Gibbons Lab in 2019 (Study A)

In [None]:
import os
import pandas as pd
from plotnine import *
from tqdm import tqdm
import numpy as np
import scipy
import micom
import micom.measures

%matplotlib inline

## We'll start with the measured SCFA values from the _ex vivo_ experiments, and calculate production flux of each SCFA

In [None]:
# Load the targeted SCFA metabolomic data. Result column is in mM
scfas = pd.read_csv('../data/studyA_SCFAs.csv')
# Identify relavent metadata. We'll use this in later cells
metadata = scfas.set_index('sample_id')[['subject_id','treatment']].to_dict()
# Find the difference between T1 and T0 concentrations to calculate total production
scfas['flux'] = scfas.groupby(['subject_id','replicate','treatment','analyte'])['result'].diff()
# There are now two entries for each samples, one with null entries. We'll drop those
scfas.dropna(inplace = True)
# Divide measured production by 6 hours to calculate estimated flux across the experiment
scfas['flux'] = scfas['flux']/6 
# Find the mean of fluxes between replicates
res = scfas.groupby(['treatment','timepoint','subject_id','analyte']).mean()
# Add the standard deviation for each group
res['std'] = scfas.groupby(['treatment','timepoint','subject_id','analyte'])['flux'].std()
# Reset the index for clarity, keeping only the columns we need
res = res.reset_index()[['treatment','subject_id','analyte','flux','std']]
# Pivot the table for ease of plotting
measured = pd.pivot_table(res,columns = 'analyte',index = ['subject_id','treatment'],values = ['flux','std'])

## Next, we will use genus abundance counts from 16S sequencing to construct a taxonomy table, which we will use to construct models in MICOM. This table requires a sample ID column, a taxonomic idenitifier column, an abundance column, and a column of the lowest taxonomic level for which we are building models, in this case at the genus level. 

In [None]:
taxonomy = pd.read_csv('../data/studyA_abundances.csv') #read abundance data for StudyA
#we'll rename the columns from this data for simplicity and readability by MICOM
taxonomy = taxonomy.rename(columns = 
                           {'d':'domain',
                            'p':'phylum', 
                            'c':'class',
                            'o':'order',
                            'f':'family',
                            'g':'genus',
                            'reads':'abundance',
                            'sample':'sample_id'})
#we need a taxonomic identifier column. In this case, we can use the genus identifier. 
taxonomy['id'] = taxonomy['genus'] 
#map metadata so we can keep track of samples
taxonomy['subject_id'] = taxonomy['sample_id'].map(metadata['subject_id'])
taxonomy['treatment'] = taxonomy['sample_id'].map(metadata['treatment'])
taxonomy

## We'll also identify the model database we want to use to construct the models

In [None]:
agora = ('../agora/data')

## Now, we'll build the models with a abundance cutoff of 0.001

In [None]:
models = micom.workflows.build(taxonomy,out_folder = '../models/studyA',
                      model_db = agora, cutoff = 0.001, threads = 20)

## First, we'll gather our medium. We'll load in the carbon-depleted standard european medium that was constructed and functionally completed in the medium_construction.ipynb notebook.

In [None]:
# Read in the medium
medium = pd.read_csv('../media/european_lowcarbon.csv',index_col = 0)[['reaction','metabolite','flux']]
# Dilute to 10% of original content
medium['flux'] = medium['flux']*.1
# We'll also build a treatment medium, including an inulin supplement
inulin = pd.DataFrame({'reaction':['EX_inulin_m'],'metabolite':['inulin_m'],'flux':[10.50]})
treatment_medium = pd.concat([medium, inulin])

## Now, we'll use the diet to grow the models we just built. First, we'll do so without the addition of inulin, to simulate control samples 

In [None]:
# Read the model manifest
manifest = pd.read_csv('../models/studyA/manifest.csv')
# Isolate only the T0 samples that underwent control treatment, designated A and G
manifest = manifest[(manifest.sample_id.str.contains('A'))|(manifest.sample_id.str.contains('G'))]
# Grow the models with a tradeoff value of 0.7
growth = micom.workflows.grow(manifest,'../models/studyA',medium=medium,tradeoff=.7,strategy='none',threads = 20)
# Collect the absolute production rates
control = micom.measures.production_rates(growth)

## We'll do the same with the supplementation of inulin, on the treated samples

In [None]:
# Read the model manifest
manifest = pd.read_csv('../models/studyA/manifest.csv')
# Isolate only the T0 samples that underwent inulin treatment, designated E and K
manifest = manifest[(manifest.sample_id.str.contains('E'))|(manifest.sample_id.str.contains('K'))]
# Grow the models with a tradeoff value of 0.7
growth = micom.workflows.grow(manifest,'../models/studyA',medium=treatment_medium,tradeoff=.7,strategy='none',threads = 20)
# Collect the absolute production rates
treatment = micom.measures.production_rates(growth)

## Now that we have the results, we'll isolate the SCFA production and format it for concatenation with the measured results. Here's a function that will do this for both the control and inulin treated samples. 

In [None]:
def get_fluxes(df):
    # Isolate SCFA fluxes
    df = df[(df['metabolite']=='but[e]')|(df['metabolite']=='ppa[e]')|(df['metabolite']=='ac[e]')]
    # Map the metadata onto the dataframe to align with the measured samples 
    df['subject_id'] = df['sample_id'].map(metadata['subject_id'])
    df['treatment'] = df['sample_id'].map(metadata['treatment'])
    # Take the average production of the replicates
    res = df.groupby(['subject_id','treatment','name']).mean()
    # Calculate the standard deviation of the replicates
    res['std'] = df.groupby(['subject_id','treatment','name'])['flux'].std()
    # Reset the index
    res = res.rename(columns = {'flux':'flux-predicted', 'std':'std-predicted'}).reset_index()
    # Pivot the table for alignment with the measurement dataframe
    predicted = pd.pivot_table(res,
                columns = 'name',
                index = ['subject_id','treatment'], 
                values = ['flux-predicted','std-predicted'])
    return predicted

## Now we can iterate the function over both the control and treatment results, concatenating them into one dataframe

In [None]:
# Control Samples
predicted = get_fluxes(control)
# And predicted samples
predicted = pd.concat([predicted, get_fluxes(treatment)], axis = 0)

## Now we'll concatenate the predictions with the measured scfa values, and rename the columns so they don't overlap

In [None]:
# Concatenate dataframes
scfasTotal = pd.concat([measured,predicted], axis = 1)
# Flatten multiindex
scfasTotal.columns = scfasTotal.columns.to_flat_index()
# Join column names for plotting
scfasTotal.columns = ['_'.join(col) for col in scfasTotal.columns.values]
scfasTotal.reset_index(inplace = True)

## Finally, we can plot the results for all three SCFAs

In [None]:
ac_plot = (ggplot(
    scfasTotal, aes(x = ('flux_acetate'), y = ('flux-predicted_acetate')))
    +geom_point(aes(color = 'treatment'),size = 5)
    +geom_errorbar(aes(x="flux_acetate", ymin = scfasTotal['flux-predicted_acetate'] - scfasTotal['std-predicted_acetate'],
                            ymax=scfasTotal['flux-predicted_acetate'] + scfasTotal['std-predicted_acetate']))
    +geom_errorbarh(aes(y ="flux-predicted_acetate", xmin = scfasTotal['flux_acetate'] - scfasTotal['std_acetate'],
                            xmax=scfasTotal['flux_acetate'] + scfasTotal['std_acetate']))
    +geom_smooth(method='lm',linetype='--')
    +scale_color_manual(values = ['deepskyblue','darksalmon'])
    +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Acetate')
    +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                            colour = "white",size = 0.5, linetype = "solid"),
                            panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                            axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                            legend_title=element_blank(),legend_position='right'))
ac_plot

In [None]:
but_plot = (ggplot(
    scfasTotal, aes(x = ('flux_butyrate'), y = ('flux-predicted_butyrate')))
    +geom_point(aes(color = 'treatment'),size = 5)
    +geom_errorbar(aes(x='flux_butyrate', ymin = scfasTotal['flux-predicted_butyrate'] - scfasTotal['std-predicted_butyrate'],
                            ymax=scfasTotal['flux-predicted_butyrate'] + scfasTotal['std-predicted_butyrate']))
    +geom_errorbarh(aes(y ='flux-predicted_butyrate', xmin = scfasTotal['flux_butyrate'] - scfasTotal['std_butyrate'],
                            xmax=scfasTotal['flux_butyrate'] + scfasTotal['std_butyrate']))
    +geom_smooth(method='lm',linetype='--')
    +scale_color_manual(values = ['deepskyblue','darksalmon'])
    +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Butyrate')
    +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                            colour = "white",size = 0.5, linetype = "solid"),
                            panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                            axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                            legend_title=element_blank(),legend_position='right'))
but_plot

In [None]:
but_plot = (ggplot(
    scfasTotal, aes(x = ('flux_propionate'), y = ('flux-predicted_propionate')))
    +geom_point(aes(color = 'treatment'),size = 5)
    +geom_errorbar(aes(x='flux_propionate', ymin = scfasTotal['flux-predicted_propionate'] - scfasTotal['std-predicted_propionate'],
                            ymax=scfasTotal['flux-predicted_propionate'] + scfasTotal['std-predicted_propionate']))
    +geom_errorbarh(aes(y ='flux-predicted_propionate', xmin = scfasTotal['flux_propionate'] - scfasTotal['std_propionate'],
                            xmax=scfasTotal['flux_propionate'] + scfasTotal['std_propionate']))
    +geom_smooth(method='lm',linetype='--')
    +scale_color_manual(values = ['deepskyblue','darksalmon'])
    +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Propionate')
    +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                            colour = "white",size = 0.5, linetype = "solid"),
                            panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                            axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                            legend_title=element_blank(),legend_position='right'))
but_plot

## Now we'll save the results so for use later 

In [None]:
scfasTotal.to_csv('../results/studyA.csv')