# Study D
This notebook shows the full workflow for building models, simulating growth and obtaining SCFA predictions from data collected by the Gut Puzzle project in the Gibbons Lab, 2023

In [None]:
import pandas as pd
import micom
import micom.measures
from plotnine import *
import scipy

%matplotlib inline

## SCFA Flux
Here we will gather the net SCFA production across the culturing period. The "Measured" columns contain the measured flux over the course of the experiment (mM/h, [T1]-[T0]/h), while the "Baseline" columns contain the T0 concentrations (mM)

In [None]:
scfa = pd.read_csv('../data/raw_data/studyD_original_scfas.csv',index_col = 0)

scfa['donor'] = scfa.index.str.split('_').str[0]
scfa['treatment'] = scfa.index.str.split('_').str[1]

scfa = scfa.groupby(['donor','treatment']).mean().reset_index()

scfa['sample_id'] = scfa['donor']+'_'+scfa['treatment']
scfa.set_index('sample_id', inplace = True)

scfa

## Taxonomy Table
We will read in abundance data for all samples. We will convert this into a taxnomy table to use in MICOM

In [None]:
abundance = pd.read_csv('../data/raw_data/studyD_original_abundance.csv')[['species','reads','sample']] # read in original abundance data
abundance.rename(columns = {'reads':'abundance','sample':'sample_id'}, inplace = True) # rename for consistency
abundance['sample_id'] = abundance['sample_id'].astype('str') # type check
abundance = abundance.groupby(['sample_id','species']).sum().reset_index() #sum duplicates
table = pd.pivot_table(abundance, 
                       index = 'species',
                       columns = 'sample_id',
                       values = 'abundance') # pivot data into abundance matrix
table.fillna(0, inplace = True) #fill NaNs with 0 
table.to_csv('../data/studyD.csv')
abundance['species'] = abundance['species'].str.split(' ').str[1]
abundance['id'] = abundance['species']
abundance = abundance[abundance['sample_id'].isin(scfa['donor'])]
abundance

## Build Models
Now, we'll build our models, with cutoff of 0.001

In [None]:
manifest = micom.workflows.build(abundance,
                                out_folder = '../models/studyD',
                                model_db = '../agora103_species.qza',
                                cutoff = 0.001,
                                threads = 20)

## Load Medium
Load in the carbon-stripped European Diet, and construct the intervention diets by augmenting with inulin and pectin

In [None]:
medium = pd.read_csv('../media/studyDmedium.csv', index_col = 0) # read medium 

controlMedium = medium # define control medium 

pectinMedium = pd.concat([medium,
                           pd.DataFrame({'reaction':['EX_pect_m'],
                                         'metabolite':['pect_m'],
                                         'global_id':['EX_pect(e)'],
                                         'flux':[1]
                                        },index = ['EX_pect_m'])]) # add pectin to treatment medium 


inulinMedium = pd.concat([medium,
                           pd.DataFrame({'reaction':['EX_inulin_m'],
                                         'metabolite':['inulin_m'],
                                         'global_id':['EX_inulin(e)'],
                                         'flux':[14]
                                        },index = ['EX_inulin_m'])]) # add inulin to treatment medium

inulinMedium

## Grow Models
Now we'll grow the samples using the respective media we constructed

In [None]:
manifest = pd.read_csv('../models/studyD/manifest.csv') # read manifest 
manifest = manifest[manifest['sample_id'].astype('str').isin(scfa['donor'])] # filter to samples in SCFA data

controlGrowth = micom.workflows.grow(manifest, # grow samples for all treatments
                              '../models/studyD',
                              controlMedium,
                              tradeoff = 0.7,
                              strategy = 'none',
                              threads = 10)

pectinGrowth = micom.workflows.grow(manifest,
                              '../models/studyD',
                              pectinMedium,
                              tradeoff = 0.7,
                              strategy = 'none',
                              threads = 10)

inulinGrowth = micom.workflows.grow(manifest,
                              '../models/studyD',
                              inulinMedium,
                              tradeoff = 0.7,
                              strategy = 'none',
                              threads = 10)


## Compare SCFA Fluxes
We can now extract the production fluxes of SCFAs from each growth simulation, and construct a dataframe with both measured and predicted production rates.

In [None]:
inulinProduction = micom.measures.production_rates(inulinGrowth) # calculate production rates 
inulinProduction['sample_id'] = inulinProduction['sample_id']+'_inulin' # annotate sample ids with treatment 
pectinProduction = micom.measures.production_rates(pectinGrowth)
pectinProduction['sample_id'] = pectinProduction['sample_id']+'_pectin'
controlProduction = micom.measures.production_rates(controlGrowth)
controlProduction['sample_id'] = controlProduction['sample_id']+'_control'

production = pd.concat([inulinProduction,
                        pectinProduction, 
                        controlProduction]) # concatenate results 
res = production[(production['name']==('butyrate'))|
                 (production['name']==('acetate'))|
                 (production['name']==('propionate'))] # filter to SCFA production 
res = pd.pivot(res, index = 'sample_id',
                    columns = 'name',
                    values = 'flux') # pivot data
res.rename(columns = {'acetate':'acetatePredicted',
                      'butyrate':'butyratePredicted',
                      'propionate':'propionatePredicted'
                      }, inplace = True # rename columns for clarity 
          )

res['donor'] = res.index.str.split('_').str[0].astype('int')
res['treatment'] = res.index.str.split('_').str[1]
res = pd.concat([res, scfa], axis = 1)
res = res.loc[:,~res.columns.duplicated()].copy()

res['treatment'] = res['treatment'].str.capitalize()
res

## Scale by Biomass
To compare results between studies we will scale the predicted SCFAs by biomass. Without accurate qPCR, we will instead use human reads as proxy. 

In [None]:
abundance = pd.read_csv('../data/raw_data/studyD_original_abundance.csv')
sampleReads = abundance.groupby('sample')['reads'].sum()
humanReads = abundance[abundance['genus']=='Homo'].set_index('sample')['reads']
pctHuman = humanReads/sampleReads                                                
pctHuman = pctHuman.groupby(level = 0).mean().to_dict()
res['pct_human'] = res['donor'].map(pctHuman)
res['acetatePredicted'] = res['acetatePredicted']*(1-res['pct_human'])
res['butyratePredicted'] = res['butyratePredicted']*(1-res['pct_human'])
res['propionatePredicted'] = res['propionatePredicted']*(1-res['pct_human'])

## Plot Results
Finally, we'll plot predicted vs measured fluxes against each other

In [None]:
fig1 = (ggplot(
    res, aes(x = 'butyrateMeasured', y = 'butyratePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
    
    +scale_color_manual(limits = ['Control','Pectin','Inulin', 'FOS'], 
                        values = ['cornflowerblue', 'mediumseagreen', 'coral', 'purple'])
    +labs(title='Butyrate',
          x='Measured Butyrate (mmol/L/h)',
          y = 'Predicted Butyrate (mmol/gDW/h)',
          color = 'Treatment',fill = 'Treatment')
    +theme(text = element_text(size=20, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig1

In [None]:
scipy.stats.linregress(res['butyratePredicted'],
                       res['butyrateMeasured'])

In [None]:
fig2 = (ggplot(
    res, aes(x = 'propionateMeasured', y = 'propionatePredicted'))
    +geom_smooth(method = 'lm', linetype = '--')
    +geom_point(aes(color = 'treatment'), size = 8)
   
    +scale_color_manual(limits = ['Control','Pectin', 'Inulin', 'FOS'], 
                        values = ['cornflowerblue', 'mediumseagreen', 'coral', 'purple'])
    +labs(title='Propionate',
          x='Measured Propionate (mmol/L/h)',
          y = 'Predicted Propionate (mmol/gDW/h)',
          color = 'Treatment')
    +theme(text = element_text(size=20, color = 'black'),panel_background=element_rect(fill = "white",
                                    colour = "white",size = 0.5, linetype = "solid"),panel_grid= element_blank(),
                                    axis_line = element_line(size = 2, linetype = "solid",colour = "black"),
                                    legend_position='right',axis_text_x=element_text(rotation = 20, hjust = 1))
)
fig2

In [None]:
scipy.stats.linregress(res['propionatePredicted'],
                       res['propionateMeasured'])

In [None]:
res.to_csv('../results/studyD.csv') # save to results directory