In [1]:
import os
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization
from seaborn import scatterplot
import seaborn as sns
import numpy as np
import requests
import matplotlib.pyplot as plt

%matplotlib inline

In [13]:
data_dir = 'data'
or_dir = '../data'
delivery_dir = 'data/deliverymode_comparison'
figure_dir = 'data/deliverymode_comparison/figures'

if not os.path.isdir(data_dir):
    os.makedirs(data_dir)
if not os.path.isdir(delivery_dir):
    os.makedirs(delivery_dir)
if not os.path.isdir(figure_dir):
    os.makedirs(figure_dir)

In [3]:
metadata = pd.read_csv(f'{or_dir}/metadata.tsv', sep='\t')
metadata.head()

Unnamed: 0,id,Library Layout,Instrument,collection_date,geo_location_name,geo_latitude,geo_longitude,host_id,age_days,weight_kg,...,birth_length_cm,sex,delivery_mode,zygosity,race,ethnicity,delivery_preterm,diet_milk,diet_weaning,age_months
0,ERR1314182,PAIRED,Illumina MiSeq,2011-11-11 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,42.1,232.0,,...,47.0,male,Cesarean,Monozygotic,Caucasian,Not Hispanic,True,fd,True,8.0
1,ERR1314183,PAIRED,Illumina MiSeq,2010-12-11 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,27.2,192.0,,...,45.0,female,Cesarean,Dizygotic,Caucasian,Hispanic,True,fd,True,6.0
2,ERR1314184,PAIRED,Illumina MiSeq,2011-12-11 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,28.1,536.0,,...,51.0,female,Cesarean,Monozygotic,Caucasian,Not Hispanic,False,,,18.0
3,ERR1314185,PAIRED,Illumina MiSeq,2011-12-11 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,28.2,537.0,,...,50.0,female,Cesarean,Monozygotic,Caucasian,Not Hispanic,False,,,18.0
4,ERR1314186,PAIRED,Illumina MiSeq,2013-01-12 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,39.2,688.0,,...,48.0,male,Cesarean,Monozygotic,African-American,Not Hispanic,True,,,23.0


## Delivery Mode comparison
Here first the data is seperated into data-sets containing all sample, with the same delivery mode. Soly for explorative purposes to see the taxa at different levels splitted by delivery mode. For that the metadata is filtered, exported as a tsv and then used to filter the feature tables.

### Data sorting

In [4]:
meta_delimode_sort = metadata.groupby(['host_id', 'age_days'])
meta_cesarean = metadata[metadata.delivery_mode == 'Cesarean']
meta_emcesarean = metadata[metadata.delivery_mode == 'Cesarean_emergency']
meta_vaginal = metadata[metadata.delivery_mode == 'Vaginal']
meta_cesarean.to_csv(f'{delivery_dir}/metadata_cesarean.tsv',sep='\t', index=False)
meta_emcesarean.to_csv(f'{delivery_dir}/metadata_emergency_cesarean.tsv',sep='\t', index=False)
meta_vaginal.to_csv(f'{delivery_dir}/metadata_vaginal.tsv',sep='\t', index=False)

In [5]:
meta_cesarean_grouped = meta_cesarean.groupby(['host_id', 'age_days'])
meta_emcesarean_grouped = meta_emcesarean.groupby(['host_id', 'age_days'])
meta_vaginal_grouped = meta_vaginal.groupby(['host_id', 'age_days'])

In [6]:
meta_cesarean = pd.read_csv(f'{delivery_dir}/metadata_cesarean.tsv', sep='\t')
meta_emcesarean = pd.read_csv(f'{delivery_dir}/metadata_emergency_cesarean.tsv', sep='\t')
meta_vaginal = pd.read_csv(f'{delivery_dir}/metadata_vaginal.tsv', sep='\t')

### Vaginal delivery

In [7]:
! qiime feature-table filter-samples \
    --i-table $data_dir/phylogeny_filtered_table.qza \
    --m-metadata-file $delivery_dir/metadata_vaginal.tsv \
    --o-filtered-table $delivery_dir/pjnb-phyl-tab-vag.qza

[32mSaved FeatureTable[Frequency] to: data/deliverymode_comparison/pjnb-phyl-tab-vag.qza[0m
[0m

In [8]:
! qiime feature-table summarize \
  --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
  --m-sample-metadata-file $delivery_dir/metadata_vaginal.tsv \
  --o-visualization $delivery_dir/pjnb-phyl-tab-vag.qzv

[32mSaved Visualization to: data/deliverymode_comparison/pjnb-phyl-tab-vag.qzv[0m
[0m

In [9]:
Visualization.load(f'{delivery_dir}/pjnb-phyl-tab-vag.qzv')

In [None]:
! qiime diversity alpha-rarefaction \
    --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
    --i-phylogeny $data_dir/reference-tree.qza \
    --p-max-depth 10000 \
    --m-metadata-file $delivery_dir/metadata_vaginal.tsv \
    --o-visualization $delivery_dir/alpha-rare-vag.qzv

In [None]:
Visualization.load(f'{delivery_dir}/alpha-rare-vag.qzv')

In [None]:
! qiime diversity core-metrics-phylogenetic \
  --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
  --i-phylogeny $data_dir/reference-tree.qza \
  --m-metadata-file $delivery_dir/metadata_vaginal.tsv \
  --p-sampling-depth 9000 \
  --output-dir $delivery_dir/core-metrics-results-vag

In [None]:
! qiime taxa barplot \
    --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
    --i-taxonomy ../taxonomy/data/taxonomy_classification.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --o-visualization $delivery_dir/taxa-bar-plots-vag.qzv

In [10]:
Visualization.load(f'{delivery_dir}/taxa-bar-plots-vag.qzv')

In [None]:
! qiime taxa collapse \
    --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
    --i-taxonomy ../taxonomy/data/taxonomy_classification.qza \
    --p-level 6 \
    --o-collapsed-table $data_dir/table_abund_l6-vag.qza

In [None]:
! qiime taxa barplot \
    --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
    --i-taxonomy ../taxonomy/data/taxonomy_classification.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --o-visualization $delivery_dir/taxa-bar-plots-l6-vag.qzv

In [None]:
Visualization.load(f'{delivery_dir}/taxa-bar-plots-vag.qzv')

### Cesarean delivery

In [None]:
! qiime feature-table filter-samples \
    --i-table $data_dir/phylogeny_filtered_table.qza \
    --m-metadata-file $delivery_dir/metadata_cesarean.tsv \
    --o-filtered-table $delivery_dir/pjnb-phyl-tab-ces.qza

In [None]:
! qiime feature-table summarize \
  --i-table $delivery_dir/pjnb-phyl-tab-ces.qza \
  --m-sample-metadata-file $delivery_dir/metadata_cesarean.tsv \
  --o-visualization $delivery_dir/pjnb-phyl-tab-ces.qzv

In [None]:
Visualization.load(f'{delivery_dir}/pjnb-phyl-tab-ces.qzv')

In [None]:
! qiime diversity alpha-rarefaction \
    --i-table $delivery_dir/pjnb-phyl-tab-ces.qza \
    --i-phylogeny $data_dir/reference-tree.qza \
    --p-max-depth 10000 \
    --m-metadata-file $delivery_dir/metadata_cesarean.tsv \
    --o-visualization $delivery_dir/alpha-rare-ces.qzv

In [None]:
Visualization.load(f'{delivery_dir}/alpha-rare-ces.qzv')

In [None]:
! qiime diversity core-metrics-phylogenetic \
  --i-table $delivery_dir/pjnb-phyl-tab-ces.qza \
  --i-phylogeny $data_dir/reference-tree.qza \
  --m-metadata-file $delivery_dir/metadata_cesarean.tsv \
  --p-sampling-depth 9000 \
  --output-dir $delivery_dir/core-metrics-results-ces

In [None]:
! qiime taxa barplot \
    --i-table $delivery_dir/pjnb-phyl-tab-ces.qza \
    --i-taxonomy ../taxonomy/data/taxonomy_classification.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --o-visualization $delivery_dir/taxa-bar-plots-ces.qzv

In [11]:
Visualization.load(f'{delivery_dir}/taxa-bar-plots-ces.qzv')

### Emergency cesarean delivery

In [None]:
! qiime feature-table filter-samples \
    --i-table $date_dir/phylogeny_filtered_table.qza \
    --m-metadata-file $delivery_dir/metadata_emergency_cesarean.tsv \
    --o-filtered-table $delivery_dir/pjnb-phyl-tab-eces.qza

In [None]:
! qiime feature-table summarize \
  --i-table $delivery_dir/pjnb-phyl-tab-eces.qza \
  --m-sample-metadata-file $delivery_dir/metadata_emergency_cesarean.tsv \
  --o-visualization $delivery_dir/pjnb-phyl-tab-eces.qzv

In [None]:
Visualization.load(f'{delivery_dir}/pjnb-phyl-tab-eces.qzv')

In [None]:
! qiime diversity alpha-rarefaction \
    --i-table $delivery_dir/pjnb-phyl-tab-eces.qza \
    --i-phylogeny $data_dir/reference-tree.qza \
    --p-max-depth 10000 \
    --m-metadata-file $delivery_dir/metadata_emergency_cesarean.tsv \
    --o-visualization $delivery_dir/alpha-rare-eces.qzv

In [None]:
Visualization.load(f'{delivery_dir}/alpha-rare-eces.qzv')

In [None]:
! qiime diversity core-metrics-phylogenetic \
  --i-table $delivery_dir/pjnb-phyl-tab-eces.qza \
  --i-phylogeny $data_dir/reference-tree.qza \
  --m-metadata-file $delivery_dir/metadata_emergency_cesarean.tsv \
  --p-sampling-depth 9000 \
  --output-dir $delivery_dir/core-metrics-results-eces

In [None]:
! qiime taxa barplot \
    --i-table $delivery_dir/pjnb-phyl-tab-eces.qza \
    --i-taxonomy ../taxonomy/data/taxonomy_classification.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --o-visualization $delivery_dir/taxa-bar-plots-eces.qzv

In [None]:
Visualization.load(f'{delivery_dir}/taxa-bar-plots-eces.qzv')

## Differential abundance in vaginal vs. cesarean delivery
The differential abundance is tested using an ANCOM test on data filtered for Vaginal and Cesarean delivery mode.

In [None]:
! qiime feature-table filter-samples \
    --i-table $data_dir/phylogeny_filtered_table.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --p-where "[delivery_mode]='Vaginal' or [delivery_mode]='Cesarean'" \
    --o-filtered-table $delivery_dir/pjnb-phyl-tab-vag-ces.qza

In [None]:
! qiime composition add-pseudocount \
    --i-table $delivery_dir/pjnb-phyl-tab-vag-ces.qza \
    --o-composition-table $delivery_dir/pjnb-phyl-tab-vag-ces-comp.qza

In [None]:
! qiime composition ancom \
    --i-table $delivery_dir/pjnb-phyl-tab-vag-ces-comp.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --m-metadata-column delivery_mode \
    --p-transform-function log \
    --o-visualization $delivery_dir/ancom_delivery_mode.qzv

In [None]:
Visualization.load(f'{delivery_dir}/ancom_delivery_mode.qzv')

### Differential abundance in cesarean vs. emergency cesarean delivery
Out of curiosity the difference between Cesarean and emergency Cesarean was explored.

In [None]:
! qiime feature-table filter-samples \
    --i-table $data_dir/phylogeny_filtered_table.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --p-where "[delivery_mode]='Cesarean' or [delivery_mode]='Cesarean_emergency'" \
    --o-filtered-table $delivery_dir/pjnb-phyl-tab-ces-eces.qza

In [None]:
! qiime composition add-pseudocount \
    --i-table $delivery_dir/pjnb-phyl-tab-ces-eces.qza \
    --o-composition-table $delivery_dir/pjnb-phyl-tab-ces-eces-comp.qza

In [None]:
! qiime composition ancom \
    --i-table $delivery_dir/pjnb-phyl-tab-ces-eces-comp.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --m-metadata-column delivery_mode \
    --p-transform-function log \
    --o-visualization $delivery_dir/ancom_delivery_mode_ces_eces.qzv

In [None]:
Visualization.load(f'{delivery_dir}/ancom_delivery_mode_ces_eces.qzv')

# Data display
The taxonomic classification is collapsed, filtered to remove in a first step very low abundand features and exported to a dataframe. The dataframe was then filtered to only contain the features with 1 or more percent overall abundance. These features are then normalized to contain the relative abundance. \
For the Vulcano plots the ANCOM tables are extracted, merged and then plotted to best display the result.

In [None]:
! qiime taxa collapse \
    --i-table ../taxonomy/data/PJNB_dada2_table-filtered.qza \
    --i-taxonomy ../taxonomy/data/taxonomy_classification.qza \
    --p-level 6 \
    --o-collapsed-table $delivery_dir/taxonomyl6.qza

In [None]:
! qiime taxa filter-table \
    --i-table ../taxonomy/data/PJNB_dada2_table-filtered.qza \
    --i-taxonomy ../taxonomy/data/taxonomy_classification.qza \
    --p-mode contains \
    --p-include g__ \
    --p-exclude 'g__;,Chloroplast,Mitochondria' \
    --o-filtered-table $delivery_dir/filtered-table-ex.qza

In [None]:
! qiime feature-table filter-samples \
    --i-table $delivery_dir/filtered-table-ex.qza \
    --p-min-frequency 10000 \
    --o-filtered-table $delivery_dir/filtered-table-freq.qza

In [None]:
! qiime tools extract \
    --input-path $delivery_dir/taxa-bar-plots-filtered-l6.qzv \
    --output-path $delivery_dir/extracted-taxa-bar-filtered-l6

In [None]:
taxonomyl6 = pd.read_csv(f'{delivery_dir}/extracted-taxa-bar-filtered-l6/39a96d1a-6370-42c7-bc21-74455818f78f/data/level-6.csv', sep=',')
onlytax = taxonomyl6.loc[ : , [col for col in taxonomyl6.columns if 'k__Bacteria' in col]]
frequency = onlytax.sum()
total = frequency.sum()
over1percentfreq = onlytax.loc[ : , [((onlytax[col].sum()/total) >= 0.01) for col in onlytax.columns]]
over1percentfreq

## Overall taxonomy
The change of taxonomic composition over age in month is displayed for qualitative analysis. 

In [None]:
most_abundant = onlytax.loc[ : , [((onlytax[col].sum()/total) >= 0.01) for col in onlytax.columns]]
most_abundant_I = over1percentfreq.columns
most_abundant_l = most_abundant_I.tolist()
metadatacol = ['delivery_mode', 'diet_weaning', 'age_months', 'zygosity', 'diet_milk']
most_abundant_l.extend(metadatacol)
tax_age_l = most_abundant_I.tolist()
tax_age_l.append('age_months')
tax_age = taxonomyl6.loc[ : , [col for col in taxonomyl6.columns if col in tax_age_l]]
taxperagemean = tax_age.groupby(['age_months'])[most_abundant.columns.tolist()].mean()

In [None]:
taxperagemean.rename(columns = {'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Ruminococcus':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Ruminococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Blautia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Blautia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Clostridium',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Prevotellaceae;g__Prevotella':'Bacteroidetes|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__[Ruminococcus]':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|[Ruminococcus]',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Clostridiaceae|Clostridium',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Faecalibacterium':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Faecalibacterium',
                               'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus':'Firmicutes|Bacilli|Lactobacillales|Streptococcaceae|Streptococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Coprococcus':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Coprococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Roseburia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Roseburia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Lachnospira':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Lachnospira',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Dorea':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Dorea',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides':'Bacteroidetes|Bacteroidia|Bacteroidales|Bacteroidaceae|Bacteroides',
                               'k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Bifidobacteriales;f__Bifidobacteriaceae;g__Bifidobacterium':'Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium'}, 
                     inplace = True)

In [None]:
taxperagemean_rel = taxperagemean.div(taxperagemean.sum(axis=1), axis=0)

sns.set(rc={'figure.figsize': (10, 5), 'font.family': ['Helvetica']}, style='white', palette="tab20c")
with sns.plotting_context("notebook", font_scale=1):
    # matplotlib's bar plot
    ax = taxperagemean_rel.plot(kind='bar', stacked=True, width=0.9)
    
    # matplotlib's customization
    ax.set_xlabel('Month of life')
    ax.set_ylabel('')
    ax.set_xlim(0.5,30.5)
    
    
    handles, labels = ax.get_legend_handles_labels()
    #specify order of items in legend
    order = [13, 12, 3, 7, 1, 2, 8, 11, 10, 9, 4, 6, 0, 5]
    #add legend to plot
    ax.legend([handles[idx] for idx in order],[labels[idx] for idx in order], loc='center left', bbox_to_anchor=(1, 0.5))
    legend = ax.get_legend()
    
    xlabels = ax.get_xticklabels()
    xlabels = [l.get_text().replace('.0', '') for l in xlabels]
    ax.set_xticklabels(xlabels, rotation=0)
plt.savefig(f'{figure_dir}/RelativeAbundAll-c.png', bbox_inches='tight', dpi=600)

## Delivery and milk Diet
The qualitative interplay between dilivery mode and 

In [None]:
taxmetfilt = taxonomyl6.loc[ : , [col for col in taxonomyl6.columns if col in most_abundant_l]]
tax_ces_bd = taxmetfilt[(taxmetfilt.diet_milk == 'bd') & (taxmetfilt.delivery_mode == 'Cesarean')]
tax_ces_fd = taxmetfilt[(taxmetfilt.diet_milk == 'fd') & (taxmetfilt.delivery_mode == 'Cesarean')]
tax_vag_bd = taxmetfilt[(taxmetfilt.diet_milk == 'bd') & (taxmetfilt.delivery_mode == 'Vaginal')]
tax_vag_fd = taxmetfilt[(taxmetfilt.diet_milk == 'fd') & (taxmetfilt.delivery_mode == 'Vaginal')]

In [None]:
taxmetfilt

In [None]:
most_abundant_l2 = most_abundant.columns.tolist()
most_abundant_l2.append('age_months')

In [None]:
tax_ces_bd_mean = tax_ces_bd.groupby(['age_months'])[most_abundant_l2].mean()
tax_ces_bd_mean.rename(columns = {'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Ruminococcus':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Ruminococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Blautia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Blautia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Clostridium',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Prevotellaceae;g__Prevotella':'Bacteroidetes|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__[Ruminococcus]':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|[Ruminococcus]',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Clostridiaceae|Clostridium',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Faecalibacterium':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Faecalibacterium',
                               'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus':'Firmicutes|Bacilli|Lactobacillales|Streptococcaceae|Streptococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Coprococcus':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Coprococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Roseburia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Roseburia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Lachnospira':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Lachnospira',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Dorea':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Dorea',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides':'Bacteroidetes|Bacteroidia|Bacteroidales|Bacteroidaceae|Bacteroides',
                               'k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Bifidobacteriales;f__Bifidobacteriaceae;g__Bifidobacterium':'Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium'}, 
                     inplace = True)
tax_ces_bd_mean_rel = tax_ces_bd_mean.div(tax_ces_bd_mean.sum(axis=1), axis=0)

tax_ces_fd_mean = tax_ces_fd.groupby(['age_months'])[most_abundant_l2].mean()
tax_ces_fd_mean.rename(columns = {'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Ruminococcus':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Ruminococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Blautia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Blautia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Clostridium',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Prevotellaceae;g__Prevotella':'Bacteroidetes|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__[Ruminococcus]':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|[Ruminococcus]',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Clostridiaceae|Clostridium',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Faecalibacterium':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Faecalibacterium',
                               'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus':'Firmicutes|Bacilli|Lactobacillales|Streptococcaceae|Streptococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Coprococcus':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Coprococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Roseburia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Roseburia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Lachnospira':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Lachnospira',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Dorea':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Dorea',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides':'Bacteroidetes|Bacteroidia|Bacteroidales|Bacteroidaceae|Bacteroides',
                               'k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Bifidobacteriales;f__Bifidobacteriaceae;g__Bifidobacterium':'Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium'}, 
                     inplace = True)
tax_ces_fd_mean_rel = tax_ces_fd.div(tax_ces_fd_mean.sum(axis=1), axis=0)

tax_vag_bd_mean = tax_vag_bd.groupby(['age_months'])[most_abundant_l2].mean()
tax_vag_bd_mean.rename(columns = {'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Ruminococcus':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Ruminococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Blautia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Blautia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Clostridium',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Prevotellaceae;g__Prevotella':'Bacteroidetes|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__[Ruminococcus]':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|[Ruminococcus]',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Clostridiaceae|Clostridium',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Faecalibacterium':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Faecalibacterium',
                               'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus':'Firmicutes|Bacilli|Lactobacillales|Streptococcaceae|Streptococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Coprococcus':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Coprococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Roseburia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Roseburia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Lachnospira':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Lachnospira',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Dorea':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Dorea',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides':'Bacteroidetes|Bacteroidia|Bacteroidales|Bacteroidaceae|Bacteroides',
                               'k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Bifidobacteriales;f__Bifidobacteriaceae;g__Bifidobacterium':'Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium'}, 
                     inplace = True)
tax_vag_bd_mean_rel = tax_vag_bd_mean.div(tax_vag_bd_mean.sum(axis=1), axis=0)

tax_vag_fd_mean = tax_vag_fd.groupby(['age_months'])[most_abundant_l2].mean()
tax_vag_fd_mean.rename(columns = {'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Ruminococcus':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Ruminococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Blautia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Blautia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Clostridium',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Prevotellaceae;g__Prevotella':'Bacteroidetes|Bacteroidia|Bacteroidales|Prevotellaceae|Prevotella',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__[Ruminococcus]':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|[Ruminococcus]',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium':'Firmicutes|Clostridia|Clostridiales|Clostridiaceae|Clostridium',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Ruminococcaceae;g__Faecalibacterium':'Firmicutes|Clostridia|Clostridiales|Ruminococcaceae|Faecalibacterium',
                               'k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus':'Firmicutes|Bacilli|Lactobacillales|Streptococcaceae|Streptococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Coprococcus':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Coprococcus',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Roseburia':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Roseburia',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Lachnospira':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Lachnospira',
                               'k__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;f__Lachnospiraceae;g__Dorea':'Firmicutes|Clostridia|Clostridiales|Lachnospiraceae|Dorea',
                               'k__Bacteria;p__Bacteroidetes;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides':'Bacteroidetes|Bacteroidia|Bacteroidales|Bacteroidaceae|Bacteroides',
                               'k__Bacteria;p__Actinobacteria;c__Actinobacteria;o__Bifidobacteriales;f__Bifidobacteriaceae;g__Bifidobacterium':'Actinobacteria|Actinobacteria|Bifidobacteriales|Bifidobacteriaceae|Bifidobacterium'}, 
                     inplace = True)
tax_vag_fd_mean_rel = tax_vag_fd_mean.div(tax_vag_fd_mean.sum(axis=1), axis=0)

In [None]:
sns.set(rc={'figure.figsize': (2.5, 2), 'font.family': ['Helvetica']}, style='white', palette="tab20c")
with sns.plotting_context("notebook", font_scale=1):
    # matplotlib's bar plot
    ax = tax_ces_bd_mean_rel.plot(kind='bar', stacked=True, width=0.9, legend = False)
    
    # matplotlib's customization
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xlim(-0.5,5.5)
    
    ax.legend([handles[idx] for idx in order],[labels[idx] for idx in order], loc='center left', bbox_to_anchor=(1, 0.5))
    legend = ax.get_legend()
    
    xlabels = ax.get_xticklabels()
    xlabels = [l.get_text().replace('.0', '') for l in xlabels]
    ax.set_xticklabels(xlabels, rotation=0)
plt.savefig(f'{figure_dir}/RelativeAbund_ces_bd.png', bbox_inches='tight', dpi=600)

In [None]:
tax_ces_fd_mean_rel

In [None]:
sns.set(rc={'figure.figsize': (2.5, 2), 'font.family': ['Helvetica']}, style='white', palette="tab20c")
with sns.plotting_context("notebook", font_scale=1):
    # matplotlib's bar plot
    ax = tax_ces_fd_mean_rel.plot(kind='bar',
                                  stacked=True,
                                  width=0.9,
                                  legend = False)
    
    # matplotlib's customization
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xlim(0.5,6.5)
    
    xlabels = ax.get_xticklabels()
    xlabels = [l.get_text().replace('.0', '') for l in xlabels]
    ax.set_xticklabels(xlabels, rotation=0)
plt.savefig(f'{figure_dir}/RelativeAbund_ces_wTrue.eps', bbox_inches='tight')

In [None]:
sns.set(rc={'figure.figsize': (2.5, 2), 'font.family': ['Helvetica']}, style='white', palette="tab20c")
with sns.plotting_context("notebook", font_scale=1):
    # matplotlib's bar plot
    ax = tax_vag_bd_mean_rel.plot(kind='bar', stacked=True, width=0.9, legend = False)
    
    # matplotlib's customization
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xlim(-0.5,5.5)
    
    xlabels = ax.get_xticklabels()
    xlabels = [l.get_text().replace('.0', '') for l in xlabels]
    ax.set_xticklabels(xlabels, rotation=0)
plt.savefig(f'{figure_dir}/RelativeAbund_vag_bd.png', bbox_inches='tight', dpi=600)

In [None]:
sns.set(rc={'figure.figsize': (2.5, 2), 'font.family': ['Helvetica']}, style='white', palette="tab20c")
with sns.plotting_context("notebook", font_scale=1):
    # matplotlib's bar plot
    ax = tax_vag_fd_mean_rel.plot(kind='bar', stacked=True, width=0.9, legend = False)
    
    # matplotlib's customization
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_xlim(-0.5,5.5)
    
    xlabels = ax.get_xticklabels()
    xlabels = [l.get_text().replace('.0', '') for l in xlabels]
    ax.set_xticklabels(xlabels, rotation=0)
plt.savefig(f'{figure_dir}/RelativeAbund_vag_fd.png', bbox_inches='tight', dpi=600)

## Plotting of ANCOM results as a vulcano plot
The to level 6 (genus) filtered ANCOM and a unfiltered ANCOM test are plotted as a seaborn scatterplot highlighting some points after extracting and merging the ANCOM data.

In [None]:
! qiime tools extract \
    --input-path $delivery_dir/ancom_delivery_mode.qzv \
    --output-path $delivery_dir/extracted-ancom_delivery_mode

In [None]:
ancomData = pd.read_csv(f'{delivery_dir}/extracted-ancom_delivery_mode/e8dd78b4-5118-459f-a13b-12789185a0f7/data/data.tsv', sep='\t')
ancomSignificance = pd.read_csv(f'{delivery_dir}/extracted-ancom_delivery_mode/e8dd78b4-5118-459f-a13b-12789185a0f7/data/ancom.tsv', sep='\t')
ancomSignificance.rename(columns = {'Unnamed: 0':'id'}, inplace = True)

In [None]:
ancomDataSig = ancomData.merge(ancomSignificance, how='inner')
ancomDataSig

In [None]:
ancomDataSig.sort_values(by='log', axis=0, ascending=True)

In [None]:
sns.set(rc={'figure.figsize':(5, 5)}, style='white')
with sns.plotting_context("notebook", font_scale=1.2):
    ax = sns.scatterplot(
        data= ancomDataSig[(ancomDataSig['Reject null hypothesis'] == False)], 
        x='log', 
        y='W',
        color='grey',
        alpha=.3
    )
    ax = sns.scatterplot(
        data=ancomDataSig[(ancomDataSig['Reject null hypothesis'] == True) & (ancomDataSig['log']>-1)], 
        x='log', 
        y='W',
        color='lightskyblue',
        alpha=.3
    )
    ax = sns.scatterplot(
        data=ancomDataSig[(ancomDataSig['Reject null hypothesis'] == True) & (ancomDataSig['log']<-1)], 
        x='log', 
        y='W',
        color='r',
    )
ax.tick_params(axis='x', which='major', bottom=True)
ax.tick_params(axis='y', which='major', left=True)
ax.set_xlim(-2.5,2.5)
#ax.set_ylim(-5,5200)
#ax.set_yscale('log')
ax.set_xlabel('log(difference)')
ax.axhline(y=4490, xmin=0, xmax=.3, color="grey", dashes=(5, 2))
ax.axhline(y=4490, xmin=.7, xmax=1, color="grey", dashes=(5, 2))
ax.axvline(x=1, ymin=0.86, ymax=1, color="grey", dashes=(5, 2))
ax.axvline(x=-1, ymin=0.86, ymax=1, color="grey", dashes=(5, 2))
ax.set_ylabel('W')
plt.savefig(f'{figure_dir}/Vulcano_taxa_600dpi.png', bbox_inches='tight', dpi=600)

In [None]:
! qiime feature-table filter-samples \
    --i-table $delivery_dir/taxonomyl6.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --p-where "[delivery_mode]='Vaginal' OR [delivery_mode]='Cesarean'" \
    --o-filtered-table $delivery_dir/taxonomyl6-filt.qza

In [None]:
! qiime composition add-pseudocount \
    --i-table $delivery_dir/taxonomyl6-filt.qza \
    --o-composition-table $delivery_dir/taxonomyl6-filt-comp.qza

In [None]:
! qiime composition ancom \
    --i-table $delivery_dir/taxonomyl6-filt-comp.qza \
    --m-metadata-file $or_dir/metadata.tsv \
    --m-metadata-column delivery_mode \
    --p-transform-function log \
    --o-visualization $delivery_dir/ancom_delivery_mode_l6_ces_eces.qzv

In [None]:
Visualization.load(f'{delivery_dir}/ancom_delivery_mode_l6_ces_eces.qzv')

In [None]:
! qiime tools extract \
    --input-path $delivery_dir/ancom_delivery_mode_l6_ces_eces.qzv \
    --output-path $delivery_dir/extracted-ancom_delivery_mode_l6_ces_eces

In [None]:
ancomDatafilt = pd.read_csv(f'{delivery_dir}/extracted-ancom_delivery_mode_l6_ces_eces/647592e0-6412-47ab-bfd5-ba438d3feb51/data/data.tsv', sep='\t')
ancomSignificancefilt = pd.read_csv(f'{delivery_dir}/extracted-ancom_delivery_mode_l6_ces_eces/647592e0-6412-47ab-bfd5-ba438d3feb51/data/ancom.tsv', sep='\t')
ancomSignificancefilt.rename(columns = {'Unnamed: 0':'id'}, inplace = True)

In [None]:
ancomDataSigfilt = ancomDatafilt.merge(ancomSignificancefilt, how='inner')
ancomDataSigfilt

In [None]:
ancomDataSigfilt.to_csv(f'{delivery_dir}/figures/TabANCOMtaxaDeliveryMode.tsv', sep='\t')

In [None]:
ancomDataSigfiltT= ancomDataSigfilt[(ancomDataSigfilt['Reject null hypothesis'] == True)]
ancomDataSigfiltT.to_csv(f'{delivery_dir}/figures/sigANCOMtaxaHits.tsv', sep='\t')

In [None]:
ancomDataSigfiltT.shape

In [None]:
ancomDataSigfiltT[(ancomDataSigfiltT['log']>0)].shape

In [None]:
sns.set(rc={'figure.figsize':(5, 5)}, style='white')
with sns.plotting_context("notebook", font_scale=1.2):
    ax = sns.scatterplot(
        data= ancomDataSigfilt[(ancomDataSigfilt['Reject null hypothesis'] == False)], 
        x='log', 
        y='W',
        color='grey',
        alpha=.3
    )
    ax = sns.scatterplot(
        data=ancomDataSigfilt[(ancomDataSigfilt['Reject null hypothesis'] == True) & (ancomDataSigfilt['log']>-0.3)], 
        x='log', 
        y='W',
        color='lightskyblue',
        alpha=.3
    )
    ax = sns.scatterplot(
        data=ancomDataSigfilt[(ancomDataSigfilt['Reject null hypothesis'] == True) & (ancomDataSigfilt['log']<=-0.3)], 
        x='log', 
        y='W',
        color='r',
    )
    ax = sns.scatterplot(
        data=ancomDataSigfilt[(ancomDataSigfilt['Reject null hypothesis'] == True) & (ancomDataSigfilt['log']>=0.3)], 
        x='log', 
        y='W',
        color='r',
    )
    ax = sns.scatterplot(
        data=ancomDataSigfilt[(ancomDataSigfilt['Reject null hypothesis'] == True) & (ancomDataSigfilt['id'].str.contains('Bacteroides'))], 
        x='log', 
        y='W',
        color='b',
    )
    ax = sns.scatterplot(
        data=ancomDataSigfilt[(ancomDataSigfilt['Reject null hypothesis'] == True) & (ancomDataSigfilt['id'].str.contains('Clostridium'))], 
        x='log', 
        y='W',
        color='orange',
    )

ax.tick_params(axis='x', which='major', bottom=True)
ax.tick_params(axis='y', which='major', left=True)
ax.set_xlim(-2.5,2.5)
#ax.set_ylim(-5,5200)
#ax.set_yscale('log')
ax.set_xlabel('log(difference)')
ax.axhline(y=140, xmin=0, xmax=.45, color="grey", dashes=(5, 2))
ax.axhline(y=140, xmin=.56, xmax=1, color="grey", dashes=(5, 2))
ax.axvline(x=0.3, ymin=0.67, ymax=1, color="grey", dashes=(5, 2))
ax.axvline(x=-0.3, ymin=0.67, ymax=1, color="grey", dashes=(5, 2))
ax.set_ylabel('W')
plt.savefig(f'{figure_dir}/Vulcano_taxafilt_2fold_600dpi.png', bbox_inches='tight', dpi=600)