# Model Construction
This notebook constructs sample-specific MCMMs for samples in Validation Study A , to compare predicted probiotic engraftment against experimental data. Data from Perraudeau et al. 2020 (DOI: 10.1136/bmjdrc-2020-001319)

In [None]:
import micom
import pandas as pd

# Validation Study A

## Collect Metadata

In [None]:
# Load the Metadata Tabe
metadata = pd.read_table('../data/studyA_metadata.tsv')

# Drop QC samples
metadata = metadata.drop(metadata.index[[0,83,84]]).rename(columns = {'Name':'sample_id'})

# Create dicts for relevant metadata
subject_dict = metadata.set_index('sample_id')['subject_id'].to_dict()
treatment_dict = metadata.set_index('sample_id')['treatment_group'].to_dict()
metadata

## Read Taxonomic Assignments from Kraken2
Collect the species-level read counts, and separate samples into placebo and treatment samples

In [None]:
# Load the Kraken taxonomic assignment data
counts = pd.read_csv('../data/studyA_S_counts.csv', index_col=0)

# Isolate Baseline Samples
counts['sample_id'] = counts['sample'].str.split('_').str[0]
counts = counts.merge(metadata, on='sample_id', how='left')
counts_baseline = counts[counts['time_point'] == ('00_Baseline')]

# Format for MICOM
counts_baseline = counts_baseline.groupby(['subject_id','species', 'treatment_group']).sum(numeric_only = True).reset_index()
counts_baseline['tot_reads'] = counts_baseline.groupby('subject_id')['reads'].transform('sum')
counts_baseline['abundance'] = counts_baseline['reads']/counts_baseline['tot_reads']
counts_baseline['id'] = counts_baseline['species'].str.replace(' ','_')
counts_baseline.rename(columns = {'subject_id':'sample_id'},inplace = True)

# Rename incorrectly labeled species
counts_baseline['id'] = counts_baseline['id'].str.replace('_(ex_Wegman_et_al._2014)','')
counts_baseline['species'] = counts_baseline['species'].str.replace(' (ex Wegman et al. 2014)','')

# Isolate Treatment Group Samples
counts_treatment = counts_baseline[counts_baseline['treatment_group'] == 'wbf11']
counts_placebo = counts_baseline[counts_baseline['treatment_group'] == 'placebo']


## Add Probiotics to Treatment Arm
Samples in the treatment arm are supplemented with a probiotic cocktail, wbf11

In [None]:
def add_all_probiotic(taxonomy):
    """Adds probiotic cocktail of AMUC, CBEI, CBUT, BINF, and EHAL to existing taxonomy table at 1% RA each"""
    taxonomy_probiotic_total = pd.DataFrame()
    for sample in taxonomy['sample_id'].unique():
        taxonomy_reduced = taxonomy[taxonomy['sample_id'] == sample].copy()
        taxonomy_reduced['abundance'] = taxonomy_reduced['abundance']*0.95
        taxonomy_probiotic = pd.concat([taxonomy_reduced,pd.DataFrame({
                  'sample_id':[sample, sample, sample, sample, sample], 
                  'species': ['Akkermansia muciniphila','Clostridium beijerinckii',
                              'Clostridium butyricum','Bifidobacterium longum',
                              'Anaerobutyricum hallii'],
                  'abundance':[0.04]*5, 
                  'id':['Akkermansia_muciniphila','Clostridium_beijerinckii',
                              'Clostridium_butyricum','Bifidobacterium_longum',
                              'Anaerobutyricum_hallii']})])
        taxonomy_probiotic_total = pd.concat([taxonomy_probiotic_total, taxonomy_probiotic])

    return taxonomy_probiotic_total
# Add probiotics
counts_probiotic = add_all_probiotic(counts_treatment)

# Combine duplicate taxa
counts_probiotic = counts_probiotic.groupby(['sample_id','id','species']).sum().reset_index()

## Build Models
Build MCMMs for samples in the treatment arm and the placebo arm

In [None]:
# Build Models
agora = '../agora201_refseq216_species_1.qza'

# Treatment group models
micom.workflows.build(counts_probiotic, 
                                 model_db=agora, 
                                 out_folder='../WBF011_treated',
                                 cutoff=0.001, 
                                 threads=10)

# Treatment group models w/o probiotics
micom.workflows.build(counts_treatment, 
                                 model_db=agora, 
                                 out_folder='../WBF011_noProbiotic',
                                 cutoff=0.001, 
                                 threads=10)

# Placebo group models
micom.workflows.build(counts_placebo, 
                                 model_db=agora, 
                                 out_folder='../Placebo',
                                 cutoff=0.001, 
                                 threads=10)