## This notebook shows the full workflow for building models, simulating growth and obtaining SCFA predictions from data collected by the _ex vivo_ study conducted by members of the Gibbons Lab in 2019 (Study A)

In [None]:
import os
import micom as mm
import pandas as pd
from tqdm import tqdm
from scipy import stats
import numpy as np 
from micom.viz import plot_tradeoff
from plotnine import *

%matplotlib inline

## Load in the carbon-depleted standard european medium and grow control group models 

In [None]:
os.chdir('/proj/gibbons/nbohmann/exvivo/diets')
medium = pd.read_csv('european_agora_low_carb.csv')
medium['flux'] = medium['flux']*.1
medium = medium[~medium.reaction.str.contains('EX_inulin_m')]
os.chdir('/proj/gibbons/nbohmann/exvivo/gibbons_2019/micom/exvivo_build_001/')
manifest = pd.read_csv('data/manifest.csv')
manifest = manifest[(manifest.sample_id.str.contains('A'))|(manifest.sample_id.str.contains('G'))]
growth = mm.workflows.grow(manifest,'data',medium=medium,tradeoff=.7,strategy='none',threads = 20)
fluxes = growth.exchanges
fluxes = fluxes[fluxes.direction == "export"].groupby(["sample_id", "metabolite", "reaction"]).apply(
    lambda df: sum(df.flux * df.abundance)).reset_index()
os.chdir('/proj/gibbons/nbohmann/exvivo/gibbons_2019/data/scfa_production/')
fluxes.to_csv('ctrl_flux.csv')

## Now augment the medium with inulin and grow again, simulating fiber supplementation. 

In [None]:
os.chdir('/proj/gibbons/nbohmann/exvivo/diets')
medium = pd.read_csv('european_agora_low_carb.csv')
medium['flux'] = medium['flux']*.1
medium = medium[~medium.reaction.str.contains('EX_inulin_m')]
medium = medium.append({'reaction':'EX_inulin_m','flux':10.5},ignore_index=True)
os.chdir('/proj/gibbons/nbohmann/exvivo/gibbons_2019/micom/exvivo_build_001/')
manifest = pd.read_csv('data/manifest.csv')
manifest = manifest[(manifest.sample_id.str.contains('E'))|(manifest.sample_id.str.contains('K'))]
# trade = mm.workflows.tradeoff(manifest, model_folder='data',
#                                  medium = medium,tradeoffs=[0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1],
#                                  presolve = True, threads=20)
# pl = plot_tradeoff(trade, filename="tradeoff_inulin.html")
growth = mm.workflows.grow(manifest,'data',medium=medium,tradeoff=.7,strategy='none',threads = 10)
fluxes = growth.exchanges
fluxes = fluxes[fluxes.direction == "export"].groupby(["sample_id", "metabolite", "reaction"]).apply(
    lambda df: sum(df.flux * df.abundance)).reset_index()
os.chdir('/proj/gibbons/nbohmann/exvivo/gibbons_2019/data/scfa_production/')
fluxes.to_csv('inulin_flux.csv')

## Now we'll load up the experimental SCFA measurments, and calculate flux between culturing start time and endpoint

In [None]:
os.chdir('/proj/gibbons/ex_vivos_2019/data')
meta=pd.read_excel('ISBI-0202-19TASA_Results.xlsx')
meta=meta[['Unique Sample ID','Treatment','Time \nPoint','Subject or Animal ID','Analyte','Result']]
meta=meta.rename(columns={'Unique Sample ID':'sample_id','Time \nPoint':'timepoint','Treatment':'treatment'
                          ,'Subject or Animal ID':'subject_id','Analyte':'analyte','Result':'result'})

meta_flux=pd.DataFrame(columns=['sample_id','subject_id','treatment','analyte','result'])
for x in tqdm(meta.index): #use time series concentrations to estimate flux for inulin spiked samples
    if meta.timepoint[x]==('T1'):
        continue
    sb=meta.subject_id[x]
    treat=meta.treatment[x]
    an=meta.analyte[x]
    sp=meta.sample_id[x]
    df=meta[meta.subject_id.str.match(sb)&meta.analyte.str.match(an)&
            meta.treatment.str.match(treat)&meta.sample_id.str.endswith(sp[1])].reset_index()
    result=(df[df.timepoint.str.contains('T1')].result.reset_index(drop=True)[0] - 
            df[df.timepoint.str.contains('T0')].result.reset_index(drop=True)[0])
    meta_flux=pd.concat([meta_flux, pd.DataFrame({'sample_id':[sp],'subject_id':[sb],'treatment':[treat],
                                                  'analyte':[an],'result':[result]})])
meta_flux = meta_flux[~meta_flux.treatment.str.contains("tryptophan")]
meta_flux = pd.pivot_table(meta_flux,columns = 'analyte',index = 'sample_id',values = 'result')
meta_flux = meta_flux/6
meta_flux = meta_flux[['Acetic acid','Butyric acid','Propionic acid']]
meta_flux['Acetic acid'] = meta_flux['Acetic acid']/59.04
meta_flux['Butyric acid'] = meta_flux['Butyric acid']/88.11
meta_flux['Propionic acid'] = meta_flux['Propionic acid']/74.08
meta_flux

## This function will get the growth results from each sample, and filter down to butyrate, propionate and acetate.

In [None]:
def find_scfas(df,treatment):
    #narrow down to scfas of interest
    new_df = df[(df.reaction.str.startswith("EX_but(e)"))|(df.reaction.str.startswith("EX_ac(e)"))|
                        (df.reaction.str.startswith("EX_ppa(e)"))].reset_index(drop = True)
    #rename columns for clarity
    new_df = new_df.rename(columns = {'0':'flux'})
    #here, put the data in a pivot table. this makes manipulating it eaiser
    new_df = pd.pivot_table(new_df, index = 'sample_id', columns = 'metabolite', values = 'flux')
    #add a treatment column so we can append the tables together
    new_df['treatment'] = treatment
    #reset the index so we don't lose the sample ids when appending
    new_df.reset_index(inplace=True)
    return new_df

## Now we'll collect all the predicted SCFA production fluxes, and standard deviations

In [None]:
os.chdir('/proj/gibbons/nbohmann/exvivo/gibbons_2019/data/scfa_production/')
inulin = pd.read_csv('inulin_flux.csv',index_col = 0)
ctrl = pd.read_csv('ctrl_flux.csv',index_col = 0)
micom = find_scfas(ctrl,'CTRL')
micom = pd.concat([micom,find_scfas(inulin,'INUL')])
micom = micom.set_index('sample_id')
ac_micom = micom['ac[e]'].to_dict()
but_micom = micom['but[e]'].to_dict()
ppa_micom = micom['ppa[e]'].to_dict()
condition = micom['treatment'].to_dict()
micom

## Concatenate predicted fluxes and measured fluxes into one dataframe

In [None]:
meta_flux['Acetic acid - Predicted'] = meta_flux.index.map(ac_micom)
meta_flux['Butyric acid - Predicted'] = meta_flux.index.map(but_micom)
meta_flux['Propionic acid - Predicted'] = meta_flux.index.map(ppa_micom)
meta_flux['condition'] = meta_flux.index.map(condition)
meta_flux['condition'] = meta_flux['condition'].str.replace('CTRL','Control')
meta_flux['condition'] = meta_flux['condition'].str.replace('INUL','Inulin')
meta_flux['sample_name'] = meta_flux.index.str[0]
stdevs = meta_flux.groupby('sample_name').std()
meta_flux = meta_flux.groupby('sample_name').mean().reset_index()
meta_flux['condition'] = ['Control','Inulin','Control','Inulin']
meta_flux['but_dev'] = meta_flux['sample_name'].map(stdevs['Butyric acid - Predicted'].to_dict())
meta_flux['ppa_dev'] = meta_flux['sample_name'].map(stdevs['Propionic acid - Predicted'].to_dict())
meta_flux['ac_dev'] = meta_flux['sample_name'].map(stdevs['Acetic acid - Predicted'].to_dict())
meta_flux['but_dev_meas'] = meta_flux['sample_name'].map(stdevs['Butyric acid'].to_dict())
meta_flux['ppa_dev_meas'] = meta_flux['sample_name'].map(stdevs['Propionic acid'].to_dict())
meta_flux['ac_dev_meas'] = meta_flux['sample_name'].map(stdevs['Acetic acid'].to_dict())
meta_flux[meta_flux.columns[8:]] = meta_flux[meta_flux.columns[8:]]/np.sqrt(2)

## Finally, we'll plot predicted vs measured fluxes against each other

In [None]:
plt1=(
    ggplot(
        meta_flux,aes(x='Propionic acid',y='Propionic acid - Predicted'))
        +geom_point(aes(color = 'condition'),size=5)
        +geom_errorbar(aes(x="Propionic acid", ymin = meta_flux['Propionic acid - Predicted'] - meta_flux['ppa_dev'],
                            ymax=meta_flux['Propionic acid - Predicted'] + meta_flux['ppa_dev']))
        +geom_errorbarh(aes(y ="Propionic acid - Predicted", xmin = meta_flux['Propionic acid'] - meta_flux['ppa_dev_meas'],
                            xmax=meta_flux['Propionic acid'] + meta_flux['ppa_dev_meas']))
        +geom_smooth(method='lm',linetype='--')
        +scale_color_manual(values = ['deepskyblue','darksalmon'])
        +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted($\dfrac{mmol}{gDCW*h}$)',title='Propionate')
        +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                                colour = "white",size = 0.5, linetype = "solid"),
                                panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                                axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                legend_title=element_blank(),legend_position='right'))
plt1

In [None]:
plt2=(
    ggplot(
        meta_flux,aes(x='Butyric acid',y='Butyric acid - Predicted'))
        +geom_point(aes(color = 'condition'),size=5)
        +geom_errorbar(aes(x="Butyric acid", ymin = meta_flux['Butyric acid - Predicted'] - meta_flux['but_dev'],
                            ymax=meta_flux['Butyric acid - Predicted'] + meta_flux['but_dev']))
        +geom_errorbarh(aes(y ="Butyric acid - Predicted", xmin = meta_flux['Butyric acid'] - meta_flux['but_dev_meas'],
                            xmax=meta_flux['Butyric acid'] + meta_flux['but_dev_meas']))
        +geom_smooth(method='lm',linetype='--')
        +scale_color_manual(values = ['deepskyblue','darksalmon'])
        +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted ($\dfrac{mmol}{gDCW*h}$)',title='Butyrate')
        +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                                colour = "white",size = 0.5, linetype = "solid"),
                                panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                                axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                legend_title=element_blank(),legend_position='right'))
plt2

In [None]:
plt3=(
    ggplot(
        meta_flux,aes(x='Acetic acid',y='Acetic acid - Predicted'))
        +geom_point(aes(color = 'condition'),size=5)
        +geom_errorbar(aes(x="Acetic acid", ymin = meta_flux['Acetic acid - Predicted'] - meta_flux['ac_dev'],
                            ymax=meta_flux['Acetic acid - Predicted'] + meta_flux['ac_dev']))
        +geom_errorbarh(aes(y ="Acetic acid - Predicted", xmin = meta_flux['Acetic acid'] - meta_flux['ac_dev_meas'],
                            xmax=meta_flux['Acetic acid'] + meta_flux['ac_dev_meas']))
        +geom_smooth(method='lm',linetype='--')
        #+geom_text(aes(label = 'split'),nudge_y = 1)
        +scale_color_manual(values = ['deepskyblue','darksalmon'])
        +labs(x='Measured($\dfrac{mmol}{L*h}$)',y = 'Predicted ($\dfrac{mmol}{gDCW*h}$)',title='Acetate')
        +theme(text = element_text(size=20),panel_background=element_rect(fill = "white",
                                colour = "white",size = 0.5, linetype = "solid"),
                                panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                                axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                legend_title=element_blank(),legend_position='right'))
plt3

## Save all results

In [None]:
meta_flux.to_csv('/proj/gibbons/nbohmann/exvivo/scfa_paper/2019_exvivos.csv')