In [None]:
import pandas as pd
import micom
import matplotlib.pyplot as plt
import micom.measures
import numpy as np 
from plotnine import *
import scipy.stats
import pyreadr

%matplotlib inline

# SCFA Prediction
This notebook simulates the growth of MCMMs and validates the predicted probiotic growth rates

## Collect Metadata

In [None]:
# Load the Metadata Tabe
metadata = pd.read_table('../data/Hiseq_metagenomic_202_190916 metadata_conditions.txt')
metadata = metadata.drop(metadata.index[[0,83,84]])

# Identify Subgroups
baseline = metadata[metadata['time_point']=='00_Baseline']['Name']
endpoint = metadata[metadata['time_point']=='12_Week']['Name']
treatment = metadata[metadata['treatment_group']=='wbf11']['Name']
placebo = metadata[metadata['treatment_group']=='placebo']['Name']
subject_dict = metadata.set_index('Name')['subject_id'].to_dict()
treatment_dict = metadata.set_index('Name')['treatment_group'].to_dict()

metadata

## Prepare MCMM Medium
Load standard european medium as assumed background diet

In [None]:
# Load Medium 
eu_medium = micom.qiime_formats.load_qiime_medium('../european_medium.qza')

# Add inulin supplement, as in trial
eu_medium_wbf11 = eu_medium.copy()
eu_medium_wbf11.loc['EX_inulin_m', 'flux'] = 4

## Grow Models
All iterations, including placebo group, treatment group with and without inulin & probiotic cocktail

In [None]:
# Probiotic-treated Samples 
manifest_probiotic = pd.read_csv('../WBF011_treated/manifest.csv')

# With Inulin
growth_treatment= micom.workflows.grow(manifest_probiotic, 
                              model_folder = '../WBF011_treated/',
                              medium = eu_medium_wbf11,
                              tradeoff = 0.99, 
                              strategy = 'none',                                    
                              threads = 20)

# Without Inulin
growth_noPrebiotic = micom.workflows.grow(manifest_probiotic, 
                              model_folder = '../WBF011_treated/',
                              medium = eu_medium,
                              tradeoff = 0.99, 
                              strategy = 'none',                                    
                              threads = 20)

# Untreated Samples
manifest_noProbiotic = pd.read_csv('../WBF011_noProbiotic/manifest.csv')

# With Inulin
growth_noProbiotic =  micom.workflows.grow(manifest_noProbiotic, 
                              model_folder = '../WBF011_noProbiotic/',
                              medium = eu_medium_wbf11,
                              tradeoff = 0.99, 
                              strategy = 'none',                                    
                              threads = 20)

# Without Inulin
growth_noTreatment = micom.workflows.grow(manifest_noProbiotic, 
                              model_folder = '../WBF011_noProbiotic/',
                              medium = eu_medium,
                              tradeoff = 0.99, 
                              strategy = 'none',                                    
                              threads = 20)

# Placebo-Group Samples 
manifest_placebo = pd.read_csv('../Placebo/manifest.csv')

# Without Inulin
growth_placebo = micom.workflows.grow(manifest_placebo, 
                              model_folder = '../Placebo/',
                              medium = eu_medium,
                              tradeoff = 0.99, 
                              strategy = 'none',                                    
                              threads = 20)

## Calculate SCFAs
Calculate overall SCFA production from each growth simulation. 

In [None]:
# Calculate Production Rates and Annotate
production_baseline= micom.measures.production_rates(growth_noTreatment)
production_baseline['timepoint'] = 'No Treatment'
production_noPrebiotic = micom.measures.production_rates(growth_noPrebiotic)
production_noPrebiotic['timepoint'] = 'WBF-011, no Inulin'
production_treatment = micom.measures.production_rates(growth_treatment)
production_treatment['timepoint'] = 'WBF-011 + Inulin'
production_noProbiotic = micom.measures.production_rates(growth_noProbiotic)
production_noProbiotic['timepoint'] = 'Inulin, no WBF-011'
production_placebo = micom.measures.production_rates(growth_placebo)
production_placebo['timepoint'] = 'Placebo'

# Concatenate into DF
production = pd.concat([production_baseline, production_treatment, production_placebo, production_noPrebiotic, production_noProbiotic], axis = 0)

# Map Subject IDs 
production['subject_id'] = production['sample_id'].map(subject_dict).str.split('_').str[1] 

# Isolate Butyrate and Propionate
production_but = production[production['metabolite'] == 'but[e]'] 
production_ppa = production[production['metabolite'] == 'ppa[e]']

## Plot Predicted Butyrate Production
Display violin plots for butyrate production for each condition

In [None]:
but_plot = (ggplot(production_but, aes(x = 'timepoint', y = 'flux'))
            +geom_violin(color = 'gray', fill = 'cadetblue', alpha = 0.3, width = 1)
            +geom_jitter(color = 'navy', width = .1, size = 4)
            +labs(x = 'Timepoint', y = 'Predicted Butyrate (mmol/h/gDW)',  title = 'Butyrate')
            +scale_x_discrete(limits = ['Placebo','No Treatment','WBF-011, no Inulin','Inulin, no WBF-011','WBF-011 + Inulin'])
            +ylim(0, 70)
            +theme_minimal()
            +theme(figure_size=(12, 8),  axis_title=element_text(size=18),
        axis_text=element_text(size=15),
        plot_title=element_text(size=20))
            
           )
ggsave(but_plot, '../figures/butyratePrediction.svg',dpi =200)
but_plot

## Test Significance
Use non-parametric Mann-Whitney to determine signficance in difference of butyrate production between conditions

In [None]:
scipy.stats.mannwhitneyu(production_but[production_but['timepoint'] == 'Inulin, no WBF-011']['flux'],
                         production_but[production_but['timepoint'] == 'WBF-011 + Inulin']['flux'])

## Plot Predicted Propionate Production
Display violin plots for propionate production for each condition

In [None]:
ppa_plot = (ggplot(production_ppa, aes(x = 'timepoint', y = 'flux'))
          +geom_violin(color = 'gray', fill = 'cadetblue', alpha = 0.3, width = 1)
            +geom_jitter(color = 'navy', width = .1, size = 4)
            +labs(x = 'Timepoint', y = 'Predicted Propionate (mmol/h/gDW)',  title = 'Propionate')
            +scale_x_discrete(limits = ['Placebo','No Treatment','WBF-011, no Inulin','Inulin, no WBF-011','WBF-011 + Inulin'])
            +ylim(0, 140)
            +theme_minimal()
            +theme(figure_size=(12, 8),  axis_title=element_text(size=18),
        axis_text=element_text(size=15),
        plot_title=element_text(size=20))
            
           )
ggsave(ppa_plot, '../figures/propionatePrediction.svg',dpi =200)
ppa_plot

## Test Significance
Use non-parametric Mann-Whitney to determine signficance in difference of propionate production between conditions

In [None]:
scipy.stats.mannwhitneyu(production_ppa[production_ppa['timepoint'] == 'WBF-011 + Inulin']['flux'],
production_ppa[production_ppa['timepoint'] == 'Inulin, no WBF-011']['flux'])

## Pivot SCFAs 
Reformat to calculate Δbutyrate between start and end conditions

In [None]:
# Concatenate baselnie and treatment 
production = pd.concat([production_baseline, production_treatment], axis = 0) 

# Map Subject IDs
production['subject_id'] = production['sample_id'].map(subject_dict).str.split('_').str[1]

# Isolate SCFAs
production_but = production[production['metabolite'] == 'but[e]'] 
production_ppa = production[production['metabolite'] == 'ppa[e]']


# Build pivot table for butyrate, propionate
but_pvt = pd.pivot_table(production_but, 
                         index = 'subject_id',
                         columns = 'timepoint',
                         values = 'flux').rename(columns = {'No Treatment':'baseline_but',
                                                            'WBF-011 + Inulin':'endpoint_but'}).fillna(0.0)
                                                            
ppa_pvt = pd.pivot_table(production_ppa, 
                         index = 'subject_id',
                         columns = 'timepoint',
                         values = 'flux').rename(columns = {'No Treatment':'baseline_ppa',
                                                            'WBF-011 + Inulin':'endpoint_ppa'}).fillna(0.0)

# Concatenate pivot tables
scfas_pvt = pd.concat([but_pvt, ppa_pvt], axis = 1) 

# Calculate butyrate + propionate for baseline and endpoint
scfas_pvt['baseline_tot'] = scfas_pvt['baseline_but']+scfas_pvt['baseline_ppa']
scfas_pvt['endpoint_tot'] = scfas_pvt['endpoint_but']+scfas_pvt['endpoint_ppa']

# Calculate Δ for butyrate, propionate and both between endpoints
scfas_pvt['delta_but'] = scfas_pvt['endpoint_but']-scfas_pvt['baseline_but']
scfas_pvt['delta_ppa'] = scfas_pvt['endpoint_ppa']-scfas_pvt['baseline_ppa']
scfas_pvt['delta_tot'] = scfas_pvt['endpoint_tot']-scfas_pvt['baseline_tot']
scfas_pvt

## Merge with CRP
Load CRP data and merge with predicted butyrate and propionate production

In [None]:
# Read CRP data
CRP = pd.read_excel('../data/tabCRPMeasures.xlsx')

# Rename columns for clarity
CRP.rename(columns = {'SUBJECT_ID':'subject_id',
                      'CRP_V1':'baselineCRP',
                      'CRP_V3':'midpointCRP',
                      'CRP_V7':'endpointCRP'},
           inplace = True)

# Calculate endpoint vs starting CRP
CRP['deltaCRP'] = CRP['endpointCRP']-CRP['baselineCRP']
CRP = CRP[['subject_id','baselineCRP','endpointCRP','deltaCRP']]
CRP.set_index('subject_id', inplace = True)
CRP.index = CRP.index.astype('str')
CRP = CRP[CRP.index.isin(probiotic_rates_pvt.index)]

# Concatenate DFs 
comp_df = pd.concat([CRP, scfas_pvt], axis = 1).fillna(0.0)

## Regress CRP Measures against Predictions

In [None]:
crp = comp_df.columns[0:3]
predictions = comp_df.columns[3:]

comp_df.reset_index(inplace = True)
regressions = pd.DataFrame()
for measure in crp:
    for prediction in predictions:
        slope, intcpt, r, p, std = scipy.stats.linregress(comp_df[measure], comp_df[prediction])
        res = pd.DataFrame({'measure':[measure], 'prediction':[prediction], 'rvalue':[r], 'pvalue':[p]})
        regressions = pd.concat([regressions, res])
regressions

## Plot Δbutyrate vs. ΔCRP 

In [None]:
butCRP_plot = (ggplot(comp_df, aes(x = 'delta_but', y = 'deltaCRP'))
            +geom_point(size = 3)
            +geom_smooth(method = 'lm')
            +labs(x = 'ΔButyrate (mmol/gDW/h)', y = 'ΔCRP', title = 'Butyrate vs. CRP')
            +theme_minimal()    
            +theme(figure_size=(8, 6))
           )
ggsave(butCRP_plot, '../figures/crp_but.svg', dpi = 300)
butCRP_plot

## Plot Δpropionate vs. ΔCRP 

In [None]:
ppaCRP_plot = (ggplot(comp_df, aes(x = 'delta_ppa', y = 'deltaCRP'))
            +geom_point(size = 3)
            +geom_smooth(method = 'lm')
            +labs(x = 'ΔPropionate (mmol/gDW/h)', y = 'ΔCRP', title = 'Propionate vs. CRP')
            +theme_minimal()    
            +theme(figure_size=(8, 6))
           )
ggsave(ppaCRP_plot, '../figures/crp_ppa.svg', dpi = 300)
ppaCRP_plot

In [None]:
x