In [5]:
import os
import pandas as pd
import qiime2 as q2
from skbio import OrdinationResults
from qiime2 import Visualization
from seaborn import scatterplot

%matplotlib inline

In [6]:
data_dir = 'data'
or_dir = '../data'
twin_dir = 'data/twin_comparison'
delivery_dir = 'data/deliverymode_comparison'

In [4]:
metadata = pd.read_csv(f'{or_dir}/metadata.tsv', sep='\t')
metadata.head()

Unnamed: 0,id,Library Layout,Instrument,collection_date,geo_location_name,geo_latitude,geo_longitude,host_id,age_days,weight_kg,...,birth_length_cm,sex,delivery_mode,zygosity,race,ethnicity,delivery_preterm,diet_milk,diet_weaning,age_months
0,ERR1314182,PAIRED,Illumina MiSeq,2011-11-11 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,42.1,232.0,,...,47.0,male,Cesarean,Monozygotic,Caucasian,Not Hispanic,True,fd,True,8.0
1,ERR1314183,PAIRED,Illumina MiSeq,2010-12-11 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,27.2,192.0,,...,45.0,female,Cesarean,Dizygotic,Caucasian,Hispanic,True,fd,True,6.0
2,ERR1314184,PAIRED,Illumina MiSeq,2011-12-11 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,28.1,536.0,,...,51.0,female,Cesarean,Monozygotic,Caucasian,Not Hispanic,False,,,18.0
3,ERR1314185,PAIRED,Illumina MiSeq,2011-12-11 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,28.2,537.0,,...,50.0,female,Cesarean,Monozygotic,Caucasian,Not Hispanic,False,,,18.0
4,ERR1314186,PAIRED,Illumina MiSeq,2013-01-12 00:00:00,"USA, Missouri, St. Louis",38.63699,-90.263794,39.2,688.0,,...,48.0,male,Cesarean,Monozygotic,African-American,Not Hispanic,True,,,23.0


**Delivery Mode comparison**\
Here first the data is seperated into data-sets containing all sample, with the same delivery mode.

*Data sorting*

In [9]:
meta_delimode_sort = metadata.groupby(['host_id', 'age_days'])
meta_cesarean = metadata[metadata.delivery_mode == 'Cesarean']
meta_emcesarean = metadata[metadata.delivery_mode == 'Cesarean_emergency']
meta_vaginal = metadata[metadata.delivery_mode == 'Vaginal']
meta_cesarean.to_csv(f'{delivery_dir}/metadata_cesarean.tsv',sep='\t', index=False)
meta_emcesarean.to_csv(f'{delivery_dir}/metadata_emergency_cesarean.tsv',sep='\t', index=False)
meta_vaginal.to_csv(f'{delivery_dir}/metadata_vaginal.tsv',sep='\t', index=False)

In [12]:
meta_cesarean_grouped = meta_cesarean.groupby(['host_id', 'age_days'])
meta_emcesarean_grouped = meta_emcesarean.groupby(['host_id', 'age_days'])
meta_vaginal_grouped = meta_vaginal.groupby(['host_id', 'age_days'])

In [5]:
meta_cesarean = pd.read_csv(f'{delivery_dir}/metadata_cesarean.tsv', sep='\t')
meta_emcesarean = pd.read_csv(f'{delivery_dir}/metadata_emergency_cesarean.tsv', sep='\t')
meta_vaginal = pd.read_csv(f'{delivery_dir}/metadata_vaginal.tsv', sep='\t')

*Vaginal delivery*

In [13]:
! qiime feature-table filter-samples \
    --i-table $data_dir/phylogeny_filtered_table.qza \
    --m-metadata-file $delivery_dir/metadata_vaginal.tsv \
    --o-filtered-table $delivery_dir/pjnb-phyl-tab-vag.qza

[32mSaved FeatureTable[Frequency] to: data/pjnb-phyl-tab-vag.qza[0m
[0m

In [14]:
! qiime feature-table summarize \
  --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
  --m-sample-metadata-file $delivery_dir/metadata_vaginal.tsv \
  --o-visualization $delivery_dir/pjnb-phyl-tab-vag.qzv

[32mSaved Visualization to: data/deliverymode_comparison/pjnb-phyl-tab-vag.qzv[0m
[0m

In [15]:
Visualization.load(f'{delivery_dir}/pjnb-phyl-tab-vag.qzv')

In [26]:
! qiime diversity alpha-rarefaction \
    --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
    --i-phylogeny $data_dir/reference-tree.qza \
    --p-max-depth 10000 \
    --m-metadata-file $delivery_dir/metadata_vaginal.tsv \
    --o-visualization $delivery_dir/alpha-rare-vag.qzv

[32mSaved Visualization to: data/alpha-rare-vag.qzv[0m
[0m

In [27]:
Visualization.load(f'{delivery_dir}/alpha-rare-vag.qzv')

In [29]:
! qiime diversity core-metrics-phylogenetic \
  --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
  --i-phylogeny $data_dir/reference-tree.qza \
  --m-metadata-file $delivery_dir/metadata_vaginal.tsv \
  --p-sampling-depth 9000 \
  --output-dir $delivery_dir/core-metrics-results-vag

[32mSaved FeatureTable[Frequency] to: data/deliverymode_comparison/core-metrics-results-vag/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/deliverymode_comparison/core-metrics-results-vag/faith_pd_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/deliverymode_comparison/core-metrics-results-vag/observed_features_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/deliverymode_comparison/core-metrics-results-vag/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/deliverymode_comparison/core-metrics-results-vag/evenness_vector.qza[0m
[32mSaved DistanceMatrix to: data/deliverymode_comparison/core-metrics-results-vag/unweighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: data/deliverymode_comparison/core-metrics-results-vag/weighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: data/deliverymode_comparison/core-metrics-results-vag/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to

*Cesarean delivery*

In [16]:
! qiime feature-table filter-samples \
    --i-table $data_dir/phylogeny_filtered_table.qza \
    --m-metadata-file $delivery_dir/metadata_cesarean.tsv \
    --o-filtered-table $delivery_dir/pjnb-phyl-tab-ces.qza

[32mSaved FeatureTable[Frequency] to: data/pjnb-phyl-tab-ces.qza[0m
[0m

In [19]:
! qiime feature-table summarize \
  --i-table $delivery_dir/pjnb-phyl-tab-ces.qza \
  --m-sample-metadata-file $delivery_dir/metadata_cesarean.tsv \
  --o-visualization $delivery_dir/pjnb-phyl-tab-ces.qzv

[32mSaved Visualization to: data/deliverymode_comparison/pjnb-phyl-tab-ces.qzv[0m
[0m

In [20]:
Visualization.load(f'{delivery_dir}/pjnb-phyl-tab-ces.qzv')

In [30]:
! qiime diversity alpha-rarefaction \
    --i-table $delivery_dir/pjnb-phyl-tab-ces.qza \
    --i-phylogeny $data_dir/reference-tree.qza \
    --p-max-depth 10000 \
    --m-metadata-file $delivery_dir/metadata_cesarean.tsv \
    --o-visualization $delivery_dir/alpha-rare-ces.qzv

[32mSaved Visualization to: data/deliverymode_comparison/alpha-rare-ces.qzv[0m
[0m

In [31]:
Visualization.load(f'{delivery_dir}/alpha-rare-ces.qzv')

In [32]:
! qiime diversity core-metrics-phylogenetic \
  --i-table $delivery_dir/pjnb-phyl-tab-ces.qza \
  --i-phylogeny $data_dir/reference-tree.qza \
  --m-metadata-file $delivery_dir/metadata_cesarean.tsv \
  --p-sampling-depth 9000 \
  --output-dir $delivery_dir/core-metrics-results-ces

[32mSaved FeatureTable[Frequency] to: data/deliverymode_comparison/core-metrics-results-ces/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/deliverymode_comparison/core-metrics-results-ces/faith_pd_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/deliverymode_comparison/core-metrics-results-ces/observed_features_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/deliverymode_comparison/core-metrics-results-ces/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: data/deliverymode_comparison/core-metrics-results-ces/evenness_vector.qza[0m
[32mSaved DistanceMatrix to: data/deliverymode_comparison/core-metrics-results-ces/unweighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: data/deliverymode_comparison/core-metrics-results-ces/weighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: data/deliverymode_comparison/core-metrics-results-ces/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to

*Emergency cesarean delivery*

In [21]:
! qiime feature-table filter-samples \
    --i-table $date_dir/phylogeny_filtered_table.qza \
    --m-metadata-file $delivery_dir/metadata_emergency_cesarean.tsv \
    --o-filtered-table $delivery_dir/pjnb-phyl-tab-eces.qza

[32mSaved FeatureTable[Frequency] to: data/pjnb-phyl-tab-eces.qza[0m
[0m

In [24]:
! qiime feature-table summarize \
  --i-table $delivery_dir/pjnb-phyl-tab-eces.qza \
  --m-sample-metadata-file $delivery_dir/metadata_emergency_cesarean.tsv \
  --o-visualization $delivery_dir/pjnb-phyl-tab-eces.qzv

[32mSaved Visualization to: data/deliverymode_comparison/pjnb-phyl-tab-eces.qzv[0m
[0m

In [25]:
Visualization.load(f'{delivery_dir}/pjnb-phyl-tab-eces.qzv')

In [33]:
! qiime diversity alpha-rarefaction \
    --i-table $delivery_dir/pjnb-phyl-tab-eces.qza \
    --i-phylogeny $data_dir/reference-tree.qza \
    --p-max-depth 10000 \
    --m-metadata-file $delivery_dir/metadata_emergency_cesarean.tsv \
    --o-visualization $delivery_dir/alpha-rare-eces.qzv

[32mSaved Visualization to: data/deliverymode_comparison/alpha-rare-eces.qzv[0m
[0m

In [34]:
Visualization.load(f'{delivery_dir}/alpha-rare-eces.qzv')

In [35]:
! qiime diversity core-metrics-phylogenetic \
  --i-table $delivery_dir/pjnb-phyl-tab-vag.qza \
  --i-phylogeny $data_dir/reference-tree.qza \
  --m-metadata-file $delivery_dir/metadata_emergency_cesarean.tsv \
  --p-sampling-depth 9000 \
  --output-dir $delivery_dir/core-metrics-results-eces

[31m[1mPlugin error from diversity:

  None of the sample identifiers match between the metadata and the coordinates. Verify that you are using metadata and coordinates corresponding to the same dataset.

Debug info has been saved to /tmp/qiime2-q2cli-err-gqmik77h.log[0m
[0m