### Here we will build and grow microbial community models for participants in the 2021 study by Cantu Jungles et al., 2021. Models will be grown on a diluted EU standard diet supplemented with pectin, and subsequent predictions for SCFA production from the microbiome will be compared with experimental data. (Study B)

In [None]:
import pandas as pd
import numpy as np 
import os
import micom as mm
from plotnine import *
#import qiime2 as q2
from scipy import stats

pd.options.mode.chained_assignment = None  # default='warn'

%matplotlib inline

### First, we will collect GC-MS data from the _in vitro_ experiments, with targeted SCFA concentrations.

In [None]:
os.chdir("/proj/gibbons/nbohmann/exvivo/hamaker_2021/data/")
scfa = pd.read_csv("SCFA_forR_ correlation.csv") # read in GC-MS data
scfa['Acetate'] = scfa['Acetate']/24 # divide SCFA production by culturing time
scfa['Butyrate'] = scfa['Butyrate']/24
scfa['Propionate'] = scfa['Propionate']/24
scfa['Donor'] = scfa['Donor'].astype('str')
scfa['sample_id'] = scfa[['Treatment','Donor']].agg('_'.join, axis=1) # create sample ids with donor ID and treatment 
stdevs_meas = scfa.groupby(['sample_id','Treatment']
                          ).std().reset_index().set_index('sample_id') # calculate standard deviations
scfa = scfa.groupby(['sample_id','Treatment']
                   ).mean().reset_index().set_index('sample_id') #average together triplicate samples
scfa = scfa[(scfa.Treatment.str.contains('Blank'))|
            (scfa.Treatment.str.contains('Pectin'))] # isolate samples treated with pectin or controls
scfa

### Next, we will read in abundance data for all samples. As samples are cultured in triplicate, we will add a numerical suffix to each, indicating the replicate number. 

In [None]:
os.chdir('/proj/gibbons/nbohmann/exvivo/hamaker_2021/data/')
abundance = pd.read_csv('hightha1perc_forR.csv') # Load the abundance
abundance['donor'] = abundance['donor'].astype('str')
abundance['sample_id'] = abundance[['treatment_II_B','donor']].agg(
    '_'.join, axis=1) # Join the treatment and donor into sample ID
abundance = abundance.drop(columns = ['treatment_II_B','donor'])
abundance.set_index('sample_id',inplace = True)
abundance = abundance.sort_index() #sort the index in order
abundance = abundance[~abundance.index.str.contains('II_')] # Remove the treatment without SCFA measurements
abundance.reset_index(inplace = True)
vals = abundance.groupby(abundance['sample_id']).cumcount().values# add replicate number
abundance['sample_id'] = (abundance['sample_id']+'_'+ (vals+1).astype(str))
abundance

### Now we will convert the abundance table to be long form. 

In [None]:
abundance = pd.melt(abundance, id_vars = 'sample_id', value_vars = abundance.columns[1:], var_name = 'id', value_name = 'abundance')
abundance['id'] = (abundance['id'].str.replace('k__','').str.replace('p__','').
                   str.replace('c__','').str.replace('o__','').str.replace('f__','').
                   str.replace('g__','').str.replace('s__',''))
abundance['genus'] = abundance['id'].str.split(';').str[-2]
abundance

### We need a model database to use as a resource for genome-scale metabolic models. We will pass in the file path to the AGORA db

In [None]:
agora = ('/proj/gibbons/refs/micom_dbs/agora103_genus.qza')

### Now we'll get to building the models in MICOM, passing in the abundance table, identifying a model folder, passing in the database location, and assigning a relative abundance cutoff

In [None]:
models = mm.workflows.build(abundance,out_folder = '/proj/gibbons/nbohmann/exvivo/hamaker_2021/micom/models/',
                      model_db = agora, cutoff = 0.001, threads = 20)

## We can peak at the resulting models by loading up the model manifest

In [None]:
os.chdir('/proj/gibbons/nbohmann/exvivo/hamaker_2021/micom/')
manifest = pd.read_csv('models/manifest.csv', index_col = 0)
manifest

## Now we'll move on to growing the samples. First, we will load in the medium we plan to use. This is an EU standard medium with easily digestible carbon sources removed, to mimic the fecal homogenate used in the experiment. Additionally, we will dilute the medium to 10% of its original content. In this cell, we will grow the control samples. 

In [None]:
os.chdir('/proj/gibbons/nbohmann/exvivo/diets')
medium = pd.read_csv('european_agora_low_carb.csv')
medium['flux'] = medium['flux']*0.1
os.chdir("/proj/gibbons/nbohmann/exvivo/hamaker_2021/micom/")
manifest = pd.read_csv('models/manifest.csv')
manifest = manifest[manifest.sample_id.str.startswith('Blank')]
growth = mm.workflows.grow(manifest,'models',medium=medium,tradeoff=.7, strategy='none',threads = 20)
fluxes = growth.exchanges
fluxes = fluxes[fluxes.direction == "export"].groupby(["sample_id", "metabolite", "reaction"]).apply(
    lambda df: sum(df.flux * df.abundance)).reset_index()
os.chdir('/proj/gibbons/nbohmann/exvivo/hamaker_2021/data/')
fluxes.to_csv('ctrl_flux.csv') 

### Now we will use the same process to grow the treatment samples. To the medium, we will add 0.75 mmol/gDW*h of pectin. 

In [None]:
os.chdir('/proj/gibbons/nbohmann/exvivo/diets')
medium = pd.read_csv('european_agora_low_carb.csv')
medium['flux'] = medium['flux']*0.1
os.chdir("/proj/gibbons/nbohmann/exvivo/hamaker_2021/micom/")
manifest = pd.read_csv('models/manifest.csv')
manifest = manifest[manifest.sample_id.str.startswith('Pect')]
treatment_medium = medium.append({'reaction':'EX_pect_m','flux': .75},ignore_index=True)
growth = mm.workflows.grow(manifest,'models',medium=treatment_medium,tradeoff=.7,strategy='none',threads = 20)
fluxes = growth.exchanges
fluxes = fluxes[fluxes.direction == "export"].groupby(["sample_id", "metabolite", "reaction"]).apply(
    lambda df: sum(df.flux * df.abundance)).reset_index()
os.chdir('/proj/gibbons/nbohmann/exvivo/hamaker_2021/data/')
fluxes.to_csv('pect_fluxes.csv')

## This function will get the growth results from each sample, and filter down to butyrate, propionate and acetate.

In [None]:
def find_scfas(df,treatment):
    #narrow down to scfas of interest
    
    new_df = df[(df.reaction.str.startswith("EX_but(e)"))|(df.reaction.str.startswith("EX_ac(e)"))|
                        (df.reaction.str.startswith("EX_ppa(e)"))].reset_index(drop = True)
    #rename columns for clarity
    new_df = new_df.rename(columns = {'0':'flux'})
    #here, put the data in a pivot table. this makes manipulating it eaiser
    new_df = pd.pivot_table(new_df, index = 'sample_id', columns = 'metabolite', values = 'flux')
    #add a treatment column so we can append the tables together
    new_df['treatment'] = treatment
    #reset the index so we don't lose the sample ids when appending
    new_df.reset_index(inplace=True)
    return new_df

## Now we'll use that function to collect all our predicted SCFA fluxes, as well as as standard deviations

In [None]:
os.chdir("/proj/gibbons/nbohmann/exvivo/hamaker_2021/data/")
ctrl = pd.read_csv('ctrl_flux.csv',index_col = 0)
pect = pd.read_csv('pect_fluxes.csv',index_col = 0)
micom = find_scfas(ctrl,'Blank')
micom = micom.append(find_scfas(pect,'Pectin'),ignore_index=True)
micom['sample_id'] = micom['sample_id'].str.split('_').str[1]
stdevs = micom.groupby(['sample_id', 'treatment']).std().reset_index()
micom = micom.groupby(['sample_id','treatment']).mean().reset_index()
micom['sample'] = micom['treatment']+'_'+micom['sample_id']
micom = micom.set_index('sample').drop(columns = {'sample_id','treatment'})
ac_micom = micom['ac[e]'].to_dict()
but_micom = micom['but[e]'].to_dict()
ppa_micom = micom['ppa[e]'].to_dict()
stdevs['sample_name'] = stdevs['treatment']+'_'+stdevs['sample_id']
stdevs.set_index('sample_name',inplace = True)

## Now we'll construct a dataframe with all predicted and measured SCFA production fluxes and standard deviations

In [None]:
scfa['Acetate - Predicted'] = scfa.index.map(ac_micom)
scfa['Butyrate - Predicted'] = scfa.index.map(but_micom)
scfa['Propionate - Predicted'] = scfa.index.map(ppa_micom)
scfa['but-devs'] = scfa.index.map(stdevs['but[e]'].to_dict())
scfa['ppa-devs'] = scfa.index.map(stdevs['ppa[e]'].to_dict())
scfa['ac-devs'] = scfa.index.map(stdevs['ac[e]'].to_dict())
scfa['but-devs_meas'] = scfa.index.map(stdevs_meas['Butyrate'].to_dict())
scfa['ppa-devs_meas'] = scfa.index.map(stdevs_meas['Propionate'].to_dict())
scfa['ac-devs_meas'] = scfa.index.map(stdevs_meas['Acetate'].to_dict())
scfa['Treatment'][scfa[scfa['Treatment'].str.contains('Blank')].index] = 'Control'
scfa

## Now, plot measured against predicted SCFA production fluxes

In [None]:
plt=(
    ggplot(
        scfa, aes(x ='Butyrate',y = 'Butyrate - Predicted'))
        +geom_smooth(method='lm',linetype='--')
        +geom_point(aes(color = 'Treatment'),size=5)
        +geom_errorbar(aes(x="Butyrate", ymin = scfa['Butyrate - Predicted'] - scfa['but-devs'],
                            ymax=scfa['Butyrate - Predicted'] + scfa['but-devs']))
        +geom_errorbarh(aes(y ="Butyrate - Predicted", xmin = scfa['Butyrate'] - scfa['but-devs_meas'],
                            xmax=scfa['Butyrate'] + scfa['but-devs_meas']))
        +scale_color_manual(values = ['deepskyblue','yellowgreen'])
        +labs(x='Measured ($\dfrac{mmol}{L*h}$)',y='Predicted ($\dfrac{mmol}{gDW*h}$)',title = 'Butyrate')
        +theme(text = element_text(size=15),panel_background=element_rect(fill = "white",
                                colour = "white",size = 0.5, linetype = "solid"),
                                panel_grid=
               element_line(size = .2, linetype = "solid",colour = "gray"),
                                axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                legend_title=element_blank(),
                                legend_position='right'))

plt

In [None]:
plt=(
    ggplot(
        scfa, aes(x ='Propionate',y = 'Propionate - Predicted'))
        +geom_smooth(method='lm',linetype='--')
        +geom_point(aes(color = 'Treatment'),size=5)
        +geom_errorbar(aes(x="Propionate", ymin = scfa['Propionate - Predicted'] - scfa['ppa-devs'],
                            ymax=scfa['Propionate - Predicted'] + scfa['ppa-devs']))
        +geom_errorbarh(aes(y ="Propionate - Predicted", xmin = scfa['Propionate'] - scfa['ppa-devs_meas'],
                            xmax=scfa['Propionate'] + scfa['ppa-devs_meas']))
        +scale_color_manual(values = ['deepskyblue','yellowgreen'])
        +labs(x='Measured ($\dfrac{mmol}{L*h}$)',y='Predicted ($\dfrac{mmol}{gDW*h}$)',title = 'Propionate')
        +theme(text = element_text(size=15),panel_background=element_rect(fill = "white",
                                colour = "white",size = 0.5, linetype = "solid"),
                                panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                                axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                legend_title=element_blank(),
                                legend_position='right'))

plt

In [None]:
plt=(
    ggplot(
        scfa, aes(x ='Acetate',y = 'Acetate - Predicted'))
        +geom_smooth(method='lm',linetype='--')
        +geom_point(aes(color = 'Treatment'),size=5)
        +geom_errorbar(aes(x="Acetate", ymin = scfa['Acetate - Predicted'] - scfa['ac-devs'],
                            ymax=scfa['Acetate - Predicted'] + scfa['ac-devs']))
        +geom_errorbarh(aes(y ="Acetate - Predicted", xmin = scfa['Acetate'] - scfa['ac-devs_meas'],
                            xmax=scfa['Acetate'] + scfa['ac-devs_meas']))
        +scale_color_manual(values = ['deepskyblue','yellowgreen'])
        +labs(x='Measured ($\dfrac{mmol}{L*h}$)',y='Predicted ($\dfrac{mmol}{gDW*h}$)',title = 'Acetate')
        +theme(text = element_text(size=15),panel_background=element_rect(fill = "white",
                                colour = "white",size = 0.5, linetype = "solid"),
                                panel_grid=element_line(size = .2, linetype = "solid",colour = "gray"),
                                axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                legend_title=element_blank(),
                                legend_position='right'))

plt

## Save all results

In [None]:
scfa.to_csv('/proj/gibbons/nbohmann/exvivo/scfa_paper/hamaker.csv')