## In this notebook, we'll investigate how to use MICOM microbial community metabolic modeling to design and test pre- and probiotic interventions aimed at augmenting butyrate production from the gut microbiome. We'll utilize the models we built in the arivale.ipynb notebook for this purpose.

In [5]:
import pandas as pd
import micom 
import micom.measures
import numpy as np 
import seaborn as sns
import scipy
import statsmodels
from plotnine import *

import os
%matplotlib inline

## First, we grow all our Arivale models on the standard European diet, which was completed using the medium_construction.ipynb notebook

In [None]:
# Load up the manifest
manifest = pd.read_csv('../models/arivale/manifest.csv',index_col = 0)
# Load the standard European medium
EU_medium = pd.read_csv('../media/european.csv')
# Grow the models in MICOM
growth = micom.workflows.grow(manifest, model_folder='../models/arivale',
                                 medium = EU_medium, tradeoff = 0.7, strategy = 'none',threads = 20,presolve = True)
# Calculate production flux
exchanges = micom.measures.production_rates(growth)
# Isolate butyrate production
european = exchanges[exchanges['reaction'].str.startswith('EX_but(e)')].reset_index(drop = True)
# Add a column indicating the medium that was used
european['diet'] = 'EU'

## Next, we grow the models on the high fiber diet

In [1]:
# Load up the manifest
manifest = pd.read_csv('../models/arivale/manifest.csv',index_col = 0)
# Load the high fiber medium
HF_medium = pd.read_csv('../media/highfiber.csv')
# Grow the models in MICOM
growth = micom.workflows.grow(manifest, model_folder='../models/arivale',
                                 medium = HF_medium, tradeoff = 0.7, strategy = 'none',threads = 20,presolve = True)
# Calculate production flux
exchanges = micom.measures.production_rates(growth)
# Isolate butyrate production
high_fiber = exchanges[exchanges['reaction'].str.startswith('EX_but(e)')].reset_index(drop = True)
# Add a column indicating the medium that was used
high_fiber['diet'] = 'High Fiber'

NameError: name 'pd' is not defined

## Lets merge these dataframes into one

In [None]:
butyrate = pd.concat([butyrate, high_fiber])

## Before we move forward, let's define a function that will calculate percentage change between two diets

In [None]:
def percentage_change(col1,col2):
    return ((col2 - col1) / col1) * 100

## Calculate the percentage change of butyrate production in each sample between dietary contexts, and define individuals who are 'non-responders' or 'regressors'

In [None]:
# Pivot the table 
butyrate_pvt = pd.pivot_table(butyrate,index = 'sample_id',columns = 'diet', values = 'flux')
# Calculate percentage change in butyrate production from the EU diet to the high fiber diet
butyrate_pvt['change'] = percentage_change(butyrate_pvt['EU'],butyrate_pvt['High Fiber'])
# Define non-responders (low butyrate production in both contexts)
nonresponders = butyrate_pvt[(butyrate_pvt['EU']<10)&(butyrate_pvt['change']<20)]
# Define regressors (high butyrate on EU medium, decrease on high fiber) 
regressors = butyrate_pvt[(butyrate_pvt['EU']>19.9)&(butyrate_pvt['change']<0)]
# Define the bounds of butyrate production in each of these groups
box_nonresponders = [nonresponders[['EU','High Fiber']].min().min(), nonresponders[['EU','High Fiber']].max().max()]
box_regressors = [regressors[['EU','High Fiber']].min().min(), regressors[['EU','High Fiber']].max().max()]

## Make a histogram showing the butyrate production profiles of the population on each diet

In [None]:
hist_plt = (ggplot(
    butyrate,aes(x = 'but'))
    +annotate(geom_rect, xmin=box_nonresponders[0], xmax=box_nonresponders[1], ymin=0, ymax=float('inf'),
              fill = 'darkgreen', alpha=0.3)
    +annotate(geom_rect, xmin=box_regressors[0], xmax=box_regressors[1], ymin=0, ymax=float('inf'),
              fill = 'orange', alpha=0.3)
    +geom_density(aes(fill = 'diet'))
    +labs(x = 'Butyrate Production ($\dfrac{mmol}{gDW*h}$)',y = 'Fraction',
          color = 'Butyrate Production Quantile, EU Diet')
    +scale_fill_discrete(name = 'Diet', labels = ['European','High Fiber'])
    +theme(text = element_text(size=20),
       panel_background=element_rect(fill = "white", colour = "white",size = 0.5, linetype = "solid"),
       panel_grid=element_blank(),
       axis_line = element_line(size = 2, linetype = "solid", colour = "black"),
       legend_title=element_blank(), legend_position='right'))
hist_plt

## Before testing interventions on the subsets, we'll pull in all our dietary interventions. These include an average European diet, a high-fiber diet, and each of those two diets supplemented with inulin or pectin (neither present in the original high-fiber diet)

In [None]:
# Standard European diet 
eu_diet = (pd.read_csv('/proj/gibbons/nbohmann/exvivo/diets/western_completed.csv').set_index('reaction'))
# High Fiber diet
hf_diet = (pd.read_csv('/proj/gibbons/nbohmann/exvivo/diets/highfiber_completed.csv')
           .set_index('reaction'))
# European diet with pectin suplementation
eu_diet_pect= pd.concat([eu_diet, pd.DataFrame(index = ['EX_pect_m'],data = {'flux': [0.75], 'dilution':[1.0],
                                                                             'metabolite':['pect_m']})])
# European diet with inulin suplementation
eu_diet_inulin = pd.concat([eu_diet, pd.DataFrame(index = ['EX_inulin_m'],data = {'flux': [10.5], 'dilution':[1.0],
                                                                             'metabolite':['inulin_m']})])
# High Fiber diet with pectin suplementation
hf_diet_pect= pd.concat([hf_diet, pd.DataFrame(index = ['EX_pect_m'],data = {'flux': [0.75], 'dilution':[1.0],
                                                                             'metabolite':['pect_m']})])
# High Fiber diet with inulin supplementation
hf_diet_inulin = pd.concat([hf_diet, pd.DataFrame(index = ['EX_inulin_m'],data = {'flux': [10.5], 'dilution':[1.0],
                                                                             'metabolite':['inulin_m']})])

## Let's define implementation of interventions here - we can iterate across these with each intervention

In [None]:
# Input the name of the community model, and the intervention
def diet_intervention(com_name, diet):
    # Load file
    com = micom.load_pickle(com_name+'.pickle')
    # Apply intervention medium
    com.medium = diet.flux
    # Grow the model using the MICOM single-model API 
    growth = com.cooperative_tradeoff(fraction = 0.7,fluxes = True)
    # Collect fluxes 
    res = growth.fluxes.mul(growth.members.abundance, axis = 0)
    # Isolate production flux
    res = res[res['EX_but(e)']>0]
    # Sum production fluxes together
    sol = res['EX_but(e)'].sum()
    return res

In [11]:
pd.DataFrame({'sample_id':['bakjsbdf'], 
              'genus': ['Faecalibacterium'], 
              'abundance':[0.10], 
              'taxon':['Faecalibacterium'],
              'id':['Faecalibacterium'],
              'file':['../agora/data/Faecalibacterium.json']})

Unnamed: 0,sample_id,genus,abundance,taxon,id,file
0,bakjsbdf,Faecalibacterium,0.1,Faecalibacterium,Faecalibacterium,../agora/data/Faecalibacterium.json


## Let's also define our probiotic intervention, which we can apply to different diets. 

In [13]:
# Input the name of the community model and the diet 
def probiotic_intervention(com_name, diet): 
    # Load file
    com = micom.load_pickle(com_name+'.pickle')
    # Remove any Faecalibacterium already in the model
    taxonomy = com.taxonomy[~com.taxonomy.index.str.contains('Faecalibacterium')]
    # Scale abundance to 90% of original 
    taxonomy['abundance'] = taxonomy['abundance']/(10/9)
    # Build a new taxonomy table 
    taxonomy = pd.concat([taxonomy,pd.DataFrame({'sample_id':['bakjsbdf'], 
              'genus': ['Faecalibacterium'], 
              'abundance':[0.10], 
              'taxon':['Faecalibacterium'],
              'id':['Faecalibacterium'],
              'file':['../agora/data/Faecalibacterium.json']})])
    # Construct a new community with the table
    probiotic_com = micom.Community(taxonomy)
    # Apply a diet 
    probiotic_com.medium = diet.flux
    # Grow the model with the single model API
    growth_probiotic = probiotic_com.cooperative_tradeoff(fraction = 0.7,fluxes = True)
    # Calculate flux
    res = growth_probiotic.fluxes.mul(growth_probiotic.members.abundance, axis = 0)
    # Isolate production flux
    res = res[res['EX_but(e)']>0]
    # Sum butyrate production 
    sol = res['EX_but(e)'].sum()
    return res 

## Now we iterate across all the interventions with our samples. We'll do this twice, with the list "samples" (non-responders) and "samples2" (regressors)

In [None]:
# Construct a dataframe for holding results of interventions for regressors
intRegressors = pd.DataFrame()
# Iterate through each sample and apply interventions
for com_name in regressors.index:
    sol1 = diet_intervention(com_name, eu_diet)
    sol2 = diet_intervention(com_name, hf_diet)
    sol3 = diet_intervention(com_name, eu_diet_pect)
    sol4 = diet_intervention(com_name, hf_diet_pect)
    sol5 = diet_intervention(com_name, eu_diet_inulin)
    sol6 = diet_intervention(com_name, hf_diet_inulin)
    sol7 = probiotic_intervention(com_name, eu_diet)
    sol8 = probiotic_intervention(com_name, hf_diet)
    # Create a dataframe of results
    to_add = pd.DataFrame(index = [com_name], data = {'Euro': sol1, 'High-Fiber':sol2,
                                                      'Euro + Pectin': sol3, 'HF + Pectin': sol4, 
                                                      'Euro + Inulin':sol5, 'HF + Inulin':sol6,
                                                      'Euro + Probiotic':sol7, 'HF + Probiotic': sol8})
    # Concatenate with dataframe
    intRegressors = pd.concat([intRegressors, to_add])
    
# Construct a dataframe for holding results of interventions for non-responders   
intNonResponders = pd.DataFrame()
# Iterate through each sample and apply interventions
for com_name in nonresponders.index:
    sol1 = diet_intervention(com_name, eu_diet)
    sol2 = diet_intervention(com_name, hf_diet)
    sol3 = diet_intervention(com_name, eu_diet_pect)
    sol4 = diet_intervention(com_name, hf_diet_pect)
    sol5 = diet_intervention(com_name, eu_diet_inulin)
    sol6 = diet_intervention(com_name, hf_diet_inulin)
    sol7 = probiotic_intervention(com_name, eu_diet)
    sol8 = probiotic_intervention(com_name, hf_diet)
    # Create a dataframe of results
    to_add = pd.DataFrame(index = [com_name], data = {'Euro': sol1, 'High-Fiber':sol2,
                                                      'Euro + Pectin': sol3, 'HF + Pectin': sol4, 
                                                      'Euro + Inulin':sol5, 'HF + Inulin':sol6,
                                                      'Euro + Probiotic':sol7, 'HF + Probiotic': sol8})
    # Concatenate with dataframe
    intNonresponders = pd.concat([intNonresponders, to_add])

## We'll concatenate our results, making sure to remember which set each sample comes from 


In [14]:
# Rename index to group name
intRegressors.index = (['regressors'])*len(intRegressors)
intNonresponders.index = (['non-responders'])*len(intNonresponders)
# Concatenate
intervention = pd.concat([intRegressors, intNonresponders])
# Sort by intervention result
intervention = intervention.sort_values(by = ['Euro','High-Fiber','Euro + Pectin', 'HF + Pectin',
                                          'Euro + Inulin', 'HF + Inulin','Euro + Probiotic', 'HF + Probiotic'])
# Create a new dataframe with the maximum value for each sample
max_list = intervention.rename(columns = {'Euro':0,'High-Fiber':1,'Euro + Pectin':2, 'HF + Pectin':3,
                                          'Euro + Inulin':4, 'HF + Inulin':5,'Euro + Probiotic':6, 'HF + Probiotic':7}
                              ).idxmax(axis = 'columns')
intervention

NameError: name 'intRegressors' is not defined

## Now we make a heatmap with black rectangles around the optimal intervention for each sample

In [None]:
from matplotlib.patches import Rectangle

lut = {'Non-Responders':'#b2d0b1', 'Regressors':'#fee3b2'}
row_colors = intervention.index.map(lut)

sns.set(font_scale= 1.5)# set font
cmap = sns.diverging_palette(230, 200, sep=20, as_cmap=True)
ax = sns.clustermap(intervention.T,cmap = 'Reds',figsize = (20,10), #make heatmap with annotations
                 fmt='',col_colors = row_colors, col_cluster = False, row_cluster = False,
                    annot_kws={'fontsize': 18, 'color':'white','verticalalignment': 'center'})

ax.ax_cbar.set_ylabel("Butyrate($\dfrac{mmol}{gDW*h}$)",size=20)
ax.ax_cbar.set_position((.1, .2, .03, .5))
for x in range(len(max_list)):
    ax.ax_heatmap.add_patch(Rectangle((x,max_list[x]), 1, 1, fill=False, edgecolor='black', lw=3))

ax

## Save these reuslts

In [None]:
intRegressors.to_csv('/o.csv')
intNonresponders.to_csv('/proj/gibbons/nbohmann/exvivo/scfa_paper/intervention2.csv')