In [None]:
import micom
import pandas as pd
import numpy as np
import sklearn.metrics
import matplotlib.pyplot as plt
import seaborn as sns
from plotnine import *

%matplotlib inline

# Model Construction
This notebook constructs sample-specific MCMMs for samples in the WBF011 study, to compare predicted probiotic engraftment against experimental data. 

## Collect Metadata

In [None]:
# Load the Metadata Tabe
metadata = pd.read_table('../data/Hiseq_metagenomic_202_190916 metadata_conditions.txt')
metadata = metadata.drop(metadata.index[[0,83,84]])

# Identify Subgroups
baseline = metadata[metadata['time_point']=='00_Baseline']['Name']
endpoint = metadata[metadata['time_point']=='12_Week']['Name']
treatment = metadata[metadata['treatment_group']=='wbf11']['Name']
placebo = metadata[metadata['treatment_group']=='placebo']['Name']
subject_dict = metadata.set_index('Name')['subject_id'].to_dict()
treatment_dict = metadata.set_index('Name')['treatment_group'].to_dict()

metadata

## Read Taxonomic Assignments from Kraken2
Collect the species-level read counts, and separate samples into placebo and treatment samples

In [None]:
# Load the Kraken taxonomic assignment data
counts = pd.read_csv('../data/S_counts.csv', index_col=0)

# Isolate Baseline Samples
counts['sample'] = counts['sample'].str.split('_').str[0]
counts_baseline = counts[counts['sample'].isin(baseline)]

# Format for MICOM
counts_baseline.rename(columns = {'sample':'sample_id', 's':'species'}, inplace = True)
counts_baseline = counts_baseline.groupby(['sample_id','species']).sum(numeric_only = True).reset_index()
counts_baseline['tot_reads'] = counts_baseline.groupby('sample_id')['reads'].transform('sum')
counts_baseline['abundance'] = counts_baseline['reads']/counts_baseline['tot_reads']
counts_baseline['id'] = counts_baseline['species'].str.replace(' ','_')
counts_baseline['id'] = counts_baseline['id'].str.replace('_(ex_Wegman_et_al._2014)','')
counts_baseline['species'] = counts_baseline['species'].str.replace(' (ex Wegman et al. 2014)','')

# Isolate Treatment Group Samples
counts_treatment = counts_baseline[counts_baseline['sample_id'].isin(treatment)]
counts_placebo = counts_baseline[counts_baseline['sample_id'].isin(placebo)]

## Add Probiotics to Treatment Arm
Samples in the treatment arm are supplemented with a probiotic cocktail, wbf11

In [None]:
def add_all_probiotic(taxonomy):
    """Adds probiotic cocktail of AMUC, CBEI, CBUT, BINF, and EHAL to existing taxonomy table at 1% RA each"""
    taxonomy_probiotic_total = pd.DataFrame()
    for sample in taxonomy['sample_id'].unique():
        taxonomy_reduced = taxonomy[taxonomy['sample_id'] == sample].copy()
        taxonomy_reduced['abundance'] = taxonomy_reduced['abundance']*0.95
        taxonomy_probiotic = pd.concat([taxonomy_reduced,pd.DataFrame({
                  'sample_id':[sample, sample, sample, sample, sample], 
                  'species': ['Akkermansia muciniphila','Clostridium beijerinckii',
                              'Clostridium butyricum','Bifidobacterium longum',
                              'Anaerobutyricum hallii'],
                  'abundance':[0.01, 0.01, 0.01, 0.01, 0.01], 
                  'id':['Akkermansia_muciniphila','Clostridium_beijerinckii',
                              'Clostridium_butyricum','Bifidobacterium_longum',
                              'Anaerobutyricum_hallii']})])
        taxonomy_probiotic_total = pd.concat([taxonomy_probiotic_total, taxonomy_probiotic])

    return taxonomy_probiotic_total
# Add probiotics
counts_probiotic = add_all_probiotic(counts_treatment)

# Combine duplicate taxa
counts_probiotic = counts_probiotic.groupby(['sample_id','id','species']).sum().reset_index()

## Build Models
Build MCMMs for samples in the treatment arm and the placebo arm