# Okie dokie, here's the deal. After validating _ex vivo_ butyrate predictions and _in vivo_ relevance of these predictions to inflammation, we want to see how we can use MICOM to develop specific diets. To start, we'll do a "diet flip", wherein we will measure the change in butyrate production in models constructed from the Arivale cohort on a western diet versus a high-fiber diet. 

In [None]:
import pandas as pd
import micom 
from tqdm import tqdm
import glob
import numpy as np 
import seaborn as sns
import scipy
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
import statsmodels

from plotnine import *

import warnings
warnings.simplefilter(action='ignore')
import os
%matplotlib inline

In [None]:
os.chdir('/proj/gibbons/nbohmann/arivale/')

## First, we grow all our Arivale models on the western diet: 

In [None]:
butyrate = pd.DataFrame()
manifest = pd.read_csv('/proj/gibbons/nbohmann/arivale/models_reclass/manifest.csv',index_col = 0)
diet = pd.read_csv('/proj/gibbons/nbohmann/exvivo/diets/western_completed.csv')
growth = micom.workflows.grow(manifest, model_folder='models_reclass',
                                 medium = diet, tradeoff = 0.7, strategy = 'none',threads = 20,presolve = True)
exchanges = growth.exchanges
exchanges = (exchanges[exchanges.direction == "export"].groupby(["sample_id","metabolite", "reaction"])
         .apply(lambda df: sum(df.flux * df.abundance)).reset_index())
butyrate = butyrate.append(exchanges[exchanges['reaction'].str.startswith('EX_but(e)')].reset_index(drop = True),
                           ignore_index = True)
butyrate['diet'] = 'EU'
butyrate

## Next, we grow the models on the high fiber diet

In [None]:
high_fiber = pd.DataFrame()
manifest = pd.read_csv('/proj/gibbons/nbohmann/arivale/models_reclass/manifest.csv',index_col = 0)
hf_diet = pd.read_csv('/proj/gibbons/nbohmann/exvivo/diets/highfiber_completed.csv')
os.chdir('/proj/gibbons/nbohmann/arivale')
growth = micom.workflows.grow(manifest, model_folder='models_reclass',
                                 medium = hf_diet, tradeoff = 0.7, strategy = 'none',threads = 20,presolve = True)
exchanges = growth.exchanges
exchanges = (exchanges[exchanges.direction == "export"].groupby(["sample_id","metabolite", "reaction"])
         .apply(lambda df: sum(df.flux * df.abundance)).reset_index())
high_fiber = high_fiber.append(exchanges[exchanges['reaction'].str.startswith('EX_but(e)')].reset_index(drop = True),
                           ignore_index = True)
high_fiber['diet'] = 'high_fiber'

## Lets merge these df's and save our work

In [None]:
butyrate.rename(columns = {0:'but'},inplace = True)
high_fiber.rename(columns = {0:'but'},inplace = True)
butyrate = pd.concat([butyrate, high_fiber])
os.chdir('/proj/gibbons/nbohmann/exvivo/scfa_paper/')
butyrate.to_csv('diet_flip.csv')

## Here we'll load up the dataframe we just saved. Start here if you want to save some time :)

In [None]:
os.chdir('/proj/gibbons/nbohmann/exvivo/scfa_paper/')
butyrate = pd.read_csv('diet_flip.csv',index_col = 0)
butyrate

## Okay, let's make a histogram to show the distribution of butyrate produced between the diets

In [None]:
def percentage_change(col1,col2):
    return ((col2 - col1) / col1) * 100

In [None]:
butyrate['diet'] = butyrate['diet'].str.replace('high_fiber','High Fiber')
butyrate_pvt = pd.pivot_table(butyrate,index = 'sample_id',columns = 'diet', values = 'but')
butyrate_pvt['change'] = percentage_change(butyrate_pvt['EU'],butyrate_pvt['High Fiber'])
butyrate_pvt.sort_values(by = 'change')
nonresponders = butyrate_pvt[(butyrate_pvt['EU']<10)&(butyrate_pvt['change']<20)]
regressors = butyrate_pvt[(butyrate_pvt['EU']>19.9)&(butyrate_pvt['change']<0)]
box_nonresponders = [nonresponders[['EU','High Fiber']].min().min(), nonresponders[['EU','High Fiber']].max().max()]
box_regressors = [regressors[['EU','High Fiber']].min().min(), regressors[['EU','High Fiber']].max().max()]
butyrate

In [None]:
hist_plt = (ggplot(
    butyrate,aes(x = 'but'))
    +annotate(geom_rect, xmin=box_nonresponders[0], xmax=box_nonresponders[1], ymin=0, ymax=float('inf'),
              fill = 'darkgreen', alpha=0.3)
    +annotate(geom_rect, xmin=box_regressors[0], xmax=box_regressors[1], ymin=0, ymax=float('inf'),
              fill = 'orange', alpha=0.3)
    +geom_density(aes(fill = 'diet'))
    +labs(x = 'Butyrate Production ($\dfrac{mmol}{gDW*h}$)',y = 'Fraction',
          color = 'Butyrate Production Quantile, EU Diet')
    +scale_fill_discrete(name = 'Diet', labels = ['European','High Fiber'])
    +theme(text = element_text(size=20),
       panel_background=element_rect(fill = "white", colour = "white",size = 0.5, linetype = "solid"),
       panel_grid=element_blank(),
       axis_line = element_line(size = 2, linetype = "solid", colour = "black"),
       legend_title=element_blank(), legend_position='right'))
hist_plt

### Now we'll pivot the table and calculate individual changes between European and HF diet. Also, we'll make a list of non-responders and regressors to use for further interventions

In [None]:
butyrate_pvt = pd.pivot_table(butyrate,index = 'sample_id',columns = 'diet', values = 'but')
butyrate_pvt['change'] = percentage_change(butyrate_pvt['EU'],butyrate_pvt['High Fiber'])
samples = butyrate_pvt[(butyrate_pvt['EU']<10)&(butyrate_pvt['change']<20)]
samples2 = butyrate_pvt[(butyrate_pvt['EU']>19.9)&(butyrate_pvt['change']<0)]

### Before doing interventions, we'll pull in all our dietary interventions. These include an average European diet, a high-fiber diet, and each of those two diets supplemented with inulin or pectin

In [None]:
eu_diet = (pd.read_csv('/proj/gibbons/nbohmann/exvivo/diets/western_completed.csv').set_index('reaction'))
hf_diet = (pd.read_csv('/proj/gibbons/nbohmann/exvivo/diets/highfiber_completed.csv')
           .set_index('reaction'))
eu_diet_pect= pd.concat([eu_diet, pd.DataFrame(index = ['EX_pect_m'],data = {'flux': [0.75], 'dilution':[1.0],
                                                                             'metabolite':['pect_m']})])
eu_diet_inulin = pd.concat([eu_diet, pd.DataFrame(index = ['EX_inulin_m'],data = {'flux': [10.5], 'dilution':[1.0],
                                                                             'metabolite':['inulin_m']})])
hf_diet_pect= pd.concat([hf_diet, pd.DataFrame(index = ['EX_pect_m'],data = {'flux': [0.75], 'dilution':[1.0],
                                                                             'metabolite':['pect_m']})])
hf_diet_inulin = pd.concat([hf_diet, pd.DataFrame(index = ['EX_inulin_m'],data = {'flux': [10.5], 'dilution':[1.0],
                                                                             'metabolite':['inulin_m']})])

## Let's define implementation of interventions here - we can iterate across these with each intervention

In [None]:
def diet_intervention(com, diet):
    com = micom.load_pickle(com_name+'.pickle')
    com.medium = diet.flux
    growth = com.cooperative_tradeoff(fraction = 0.7,fluxes = True)
    res = growth.fluxes
    # res = growth.fluxes.mul(growth.members.abundance, axis = 0)
    res = res[res['EX_but(e)']>0]
    # sol = res['EX_but(e)'].sum()
    res = res['EX_but(e)']
    return res

## Let's also define our probiotic intervention, which we can apply to different diets. 

In [None]:
def probiotic_intervention(com, diet):
    com = micom.load_pickle(com_name+'.pickle')
    taxonomy = com.taxonomy[~com.taxonomy.index.str.contains('Faecalibacterium')]
    taxonomy['abundance'] = taxonomy['abundance']/(10/9)
    faecali = taxonomy.iloc[1]
    faecali['genus'] = 'Faecalibacterium'
    faecali['abundance'] = 0.10
    faecali['taxon'] = 'Faecalibacterium'
    faecali['id'] = 'Faecalibacterium'
    taxonomy = taxonomy.append(faecali)
    taxonomy['file'] = ('/proj/gibbons/nbohmann/exvivo/databases/agora103_genus/data/'+
                        taxonomy['file'].str.split('/').str[-1])
    probiotic_com = micom.Community(taxonomy)
    probiotic_com.medium = diet.flux
    growth_probiotic = probiotic_com.cooperative_tradeoff(fraction = 0.7,fluxes = True)
    res = growth_probiotic.fluxes.mul(growth_probiotic.members.abundance, axis = 0)
    res = res[res['EX_but(e)']>0]
    # sol = res['EX_but(e)'].sum()
    return res 

## Now we iterate across all the interventions with our samples. We'll do this twice, with the list "samples" (non-responders) and "samples2" (regressors)

In [None]:
os.chdir('/proj/gibbons/nbohmann/arivale/models_reclass/')
intervention = pd.DataFrame()
for com_name in tqdm(samples.index):
    sol1 = diet_intervention(com_name, eu_diet)
    sol2 = diet_intervention(com_name, hf_diet)
    sol3 = diet_intervention(com_name, eu_diet_pect)
    sol4 = diet_intervention(com_name, hf_diet_pect)
    sol5 = diet_intervention(com_name, eu_diet_inulin)
    sol6 = diet_intervention(com_name, hf_diet_inulin)
    sol7 = probiotic_intervention(com_name, eu_diet)
    sol8 = probiotic_intervention(com_name, hf_diet)
    to_add = pd.DataFrame(index = [com_name], data = {'Euro': sol1, 'High-Fiber':sol2,
                                                      'Euro + Pectin': sol3, 'HF + Pectin': sol4, 
                                                      'Euro + Inulin':sol5, 'HF + Inulin':sol6,
                                                      'Euro + Probiotic':sol7, 'HF + Probiotic': sol8})
    intervention = pd.concat([intervention, to_add])
intervention2 = pd.DataFrame()
for com_name in tqdm(samples2.index):
    sol1 = diet_intervention(com_name, eu_diet)
    sol2 = diet_intervention(com_name, hf_diet)
    sol3 = diet_intervention(com_name, eu_diet_pect)
    sol4 = diet_intervention(com_name, hf_diet_pect)
    sol5 = diet_intervention(com_name, eu_diet_inulin)
    sol6 = diet_intervention(com_name, hf_diet_inulin)
    sol7 = probiotic_intervention(com_name, eu_diet)
    sol8 = probiotic_intervention(com_name, hf_diet)
    to_add = pd.DataFrame(index = [com_name], data = {'Euro': sol1, 'High-Fiber':sol2,
                                                      'Euro + Pectin': sol3, 'HF + Pectin': sol4, 
                                                      'Euro + Inulin':sol5, 'HF + Inulin':sol6,
                                                      'Euro + Probiotic':sol7, 'HF + Probiotic': sol8})
    intervention2 = pd.concat([intervention2, to_add])

## We'll concatenate our results, making sure to remember which set each sample comes from 


In [None]:
intervention = pd.read_csv('/proj/gibbons/nbohmann/exvivo/scfa_paper/intervention.csv').drop(columns = 'Unnamed: 0')
intervention.index = (['non-responders'])*len(intervention)
intervention2 = pd.read_csv('/proj/gibbons/nbohmann/exvivo/scfa_paper/intervention2.csv').drop(columns = 'Unnamed: 0')
intervention2.index = (['regressors'])*len(intervention2)
intervention = pd.concat([intervention, intervention2])
intervention = intervention.sort_values(by = ['Euro','High-Fiber','Euro + Pectin', 'HF + Pectin',
                                          'Euro + Inulin', 'HF + Inulin','Euro + Probiotic', 'HF + Probiotic'])
intervention = np.log(intervention)
max_list = intervention.rename(columns = {'Euro':0,'High-Fiber':1,'Euro + Pectin':2, 'HF + Pectin':3,
                                          'Euro + Inulin':4, 'HF + Inulin':5,'Euro + Probiotic':6, 'HF + Probiotic':7}
                              ).idxmax(axis = 'columns')
intervention

## Now we make a heatmap with black rectangles around the optimal intervention for each sample

In [None]:
from matplotlib.patches import Rectangle

lut = {'Non-Responders':'#b2d0b1', 'Regressors':'#fee3b2'}
row_colors = intervention.index.map(lut)

sns.set(font_scale= 1.5)# set font
cmap = sns.diverging_palette(230, 200, sep=20, as_cmap=True)
ax = sns.clustermap(intervention.T,cmap = 'Reds',figsize = (20,10), #make heatmap with annotations
                 fmt='',col_colors = row_colors, col_cluster = False, row_cluster = False,
                    annot_kws={'fontsize': 18, 'color':'white','verticalalignment': 'center'})

ax.ax_cbar.set_ylabel("Butyrate($\dfrac{mmol}{gDW*h}$)",size=20)
ax.ax_cbar.set_position((.1, .2, .03, .5))
for x in range(len(max_list)):
    ax.ax_heatmap.add_patch(Rectangle((x,max_list[x]), 1, 1, fill=False, edgecolor='black', lw=3))

ax

## Save these reuslts

In [None]:
intervention.to_csv('/proj/gibbons/nbohmann/exvivo/scfa_paper/intervention.csv')
intervention2.to_csv('/proj/gibbons/nbohmann/exvivo/scfa_paper/intervention2.csv')