In [None]:
##Training a classifier using the backbone length sequence at greengenes2 and extracting reads specific to the v3-v4 region.

In [1]:
!qiime feature-classifier extract-reads \
  --i-sequences 2022.10.backbone.full-length.fna.qza \
  --p-f-primer CCTACGGGNGGCWGCAG \
  --p-r-primer GACTACHVGGGTATCTAATCC \
  --p-min-length 400 \
  --p-max-length 500 \
  --o-reads gg2-ref-seqs.qza
#p-trunc-len was removed based on the suggestion by the tutorial about unequal lengths for the paired end sequences.
#The lengths for 400 and 500 were chosen as per the forum chat and suggestion of a relaxed interval or allowed amplicon size to not miss biologically relevant data:https://forum.qiime2.org/t/picking-values-for-p-min-length-and-p-max-length-in-qiime-feature-classifier-extract-reads/20912.

[32mSaved FeatureData[Sequence] to: gg2-ref-seqs.qza[0m
[0m

In [2]:
!qiime feature-classifier fit-classifier-naive-bayes \
  --i-reference-reads gg2-ref-seqs.qza \
  --i-reference-taxonomy 2022.10.backbone.tax.qza \
  --o-classifier classifier.qza

[32mSaved TaxonomicClassifier to: classifier.qza[0m
[0m

In [3]:
!qiime feature-classifier classify-sklearn \
  --i-classifier classifier.qza \
  --i-reads space_paired.qza \
  --o-classification space_taxonomy.qza

[32mSaved FeatureData[Taxonomy] to: space_taxonomy.qza[0m
[0m

In [None]:
# Taxa analysis

In [6]:
!qiime metadata tabulate \
  --m-input-file space_taxonomy.qza \
  --o-visualization space_taxonomy.qzv

[32mSaved Visualization to: space_taxonomy.qzv[0m
[0m

In [7]:
!qiime taxa barplot \
  --i-table space_table.qza \
  --i-taxonomy space_taxonomy.qza \
  --m-metadata-file Metadata/space_tmp.tsv \
  --o-visualization space_taxa.barplot.qzv

[32mSaved Visualization to: space_taxa.barplot.qzv[0m
[0m

In [None]:
#phylogeny

In [1]:
!qiime phylogeny align-to-tree-mafft-iqtree \
  --i-sequences space_paired.qza \
  --o-alignment aligned-space_paired.qza \
  --o-masked-alignment masked-aligned-space_paired.qza \
  --o-tree space_unrooted-tree.qza \
  --o-rooted-tree space_rooted-tree.qza \
  --p-alrt 1000 \
  --p-seed 42 \
  --p-n-threads 8

[32mSaved FeatureData[AlignedSequence] to: aligned-space_paired.qza[0m
[32mSaved FeatureData[AlignedSequence] to: masked-aligned-space_paired.qza[0m
[32mSaved Phylogeny[Unrooted] to: space_unrooted-tree.qza[0m
[32mSaved Phylogeny[Rooted] to: space_rooted-tree.qza[0m
[0m

In [None]:
#Creating a visualization, rarefile

In [3]:
!qiime feature-table summarize \
  --i-table space_table.qza \
  --o-visualization space_table.qzv \
  --m-sample-metadata-file Metadata/space_tmp.tsv

[32mSaved Visualization to: space_table.qzv[0m
[0m

In [None]:
# Perform rare fraction on the feature table

In [4]:
!qiime feature-table rarefy \
  --i-table space_table.qza \
  --p-sampling-depth 4700 \
  --o-rarefied-table space_rarefied_table.qza

[32mSaved FeatureTable[Frequency] to: space_rarefied_table.qza[0m
[0m

In [5]:
!qiime feature-table summarize \
  --i-table space_rarefied_table.qza \
  --o-visualization space_rarefied_table.qzv \
  --m-sample-metadata-file Metadata/space_tmp.tsv

[32mSaved Visualization to: space_rarefied_table.qzv[0m
[0m

In [None]:
# Alpha diversity

In [6]:
!qiime diversity alpha \
 --i-table space_rarefied_table.qza \
 --p-metric shannon \
 --o-alpha-diversity space_shannon_vector.qza

[32mSaved SampleData[AlphaDiversity] to: space_shannon_vector.qza[0m
[0m

In [7]:
!qiime diversity alpha-phylogenetic \
  --i-table space_rarefied_table.qza \
  --i-phylogeny space_rooted-tree.qza \
  --p-metric faith_pd \
  --o-alpha-diversity space_faith_pd_vector.qza

[32mSaved SampleData[AlphaDiversity] to: space_faith_pd_vector.qza[0m
[0m

In [8]:
!qiime tools export --input-path space_shannon_vector.qza --output-path .
!mv alpha-diversity.tsv space_shannon.tsv

[32mExported space_shannon_vector.qza as AlphaDiversityDirectoryFormat to directory .[0m
[0m

In [1]:
!qiime diversity alpha-rarefaction \
  --i-table space_rarefied_table.qza \
  --i-phylogeny space_rooted-tree.qza \
  --p-max-depth 4700 \
  --m-metadata-file Metadata/space_tmp.tsv \
  --o-visualization space_alpha_rarefaction-1.qzv

[32mSaved Visualization to: space_alpha_rarefaction-1.qzv[0m
[0m

In [None]:
# Alpha group significance by Kruskal-Wallis H test: 
# whether different groups of samples have different alpha diversity 

In [10]:
!qiime diversity alpha-group-significance \
  --i-alpha-diversity space_shannon_vector.qza \
  --m-metadata-file Metadata/space_tmp.tsv \
  --o-visualization space_shannon_group_significance.qzv

[32mSaved Visualization to: space_shannon_group_significance.qzv[0m
[0m

In [None]:
# Beta diversity

In [11]:
!qiime diversity beta-phylogenetic \
  --i-table space_rarefied_table.qza \
  --i-phylogeny space_rooted-tree.qza \
  --p-metric weighted_unifrac \
  --o-distance-matrix space_weighted_unifrac_distance_matrix.qza

[32mSaved DistanceMatrix to: space_weighted_unifrac_distance_matrix.qza[0m
[0m

In [12]:
!qiime tools export --input-path space_weighted_unifrac_distance_matrix.qza --output-path .
!mv distance-matrix.tsv space_weighted_unifrac.tsv

[32mExported space_weighted_unifrac_distance_matrix.qza as DistanceMatrixDirectoryFormat to directory .[0m
[0m

In [None]:
# Beta group significance by Adonis test (analysis of variance using distance matrices:

In [13]:
!cat Metadata/space_tmp.tsv | tr -d '-' > space_metadata_rev.tsv

In [9]:
!qiime diversity adonis \
  --i-distance-matrix space_weighted_unifrac_distance_matrix.qza \
  --m-metadata-file space_metadata_rev.tsv \
  --p-formula "FactorValueTime" \
  --o-visualization space_weighted_unifrac_Time_adonis.qzv

[32mSaved Visualization to: space_weighted_unifrac_Time_adonis.qzv[0m
[0m

In [None]:
# Ordination: to enhance the visualization their relationships

In [None]:
# 1. Principal Coordinates Analysis (PCoA) of beta diversity distance matrices

In [11]:
!qiime diversity pcoa \
  --i-distance-matrix space_weighted_unifrac_distance_matrix.qza \
  --o-pcoa space_weighted_unifrac_pcoa_results.qza

[32mSaved PCoAResults to: space_weighted_unifrac_pcoa_results.qza[0m
[0m

In [None]:
# 2. EMPeror: Interactive 3D scatter plots of PCoA results 

In [17]:
!qiime emperor plot \
  --i-pcoa space_weighted_unifrac_pcoa_results.qza \
  --m-metadata-file space_metadata_rev.tsv \
  --o-visualization space_weighted_unifrac_emperor.qzv

[32mSaved Visualization to: space_weighted_unifrac_emperor.qzv[0m
[0m

In [21]:
!qiime emperor plot \
  --i-pcoa space_weighted_unifrac_pcoa_results.qza \
  --m-metadata-file space_metadata_rev.tsv \
  --p-custom-axes ParameterValue_SampleCollectionTimepoint \
  --o-visualization space_weighted_unifrac_emperor_time.qzv

[31m[1mPlugin error from emperor:

  All values in a custom axis must be numeric, this summary shows the invalid values and the number of times they were found in column 'ParameterValue_SampleCollectionTimepoint':
  90 days on ISS            10
  10 days on ISS             8
  landing day                8
  60 days before launch      8
  180 days after return      7
  90 days before launch      6
  60 days after return       6
  18 days after return       5
  180 days before launch     5
  63 days after return       4
  33 days after return       4
  14 days before return      4
  120 days before launch     3
  30 days after return       3
  240 days before launch     2
  1 day before return        2
  1 day after return         2
  150 days before launch     1
  7 days on ISS              1

Debug info has been saved to /tmp/qiime2-q2cli-err-ycc8d90s.log[0m
[0m

In [None]:
# 3. (dont need to do for microbiome) Beta rarefaction: to assess whether a desired sampling depth can grant stable measurements

In [28]:
!qiime diversity beta-rarefaction \
  --i-table space_table.qza \
  --i-phylogeny space_rooted-tree.qza \
  --p-metric space_weighted_unifrac \
  --p-clustering-method nj \
  --p-sampling-depth 1103 \
  --m-metadata-file Metadata/space_tmp.tsv \
  --o-visualization space_weighted_unifrac_rarefaction.qzv

[31m[1mPlugin error from diversity:

  Parameter 'metric' received 'space_weighted_unifrac' as an argument, which is incompatible with parameter type: Str % Choices('aitchison', 'braycurtis', 'canberra', 'canberra_adkins', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'generalized_unifrac', 'hamming', 'jaccard', 'jensenshannon', 'kulsinski', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'unweighted_unifrac', 'weighted_normalized_unifrac', 'weighted_unifrac', 'yule')

Debug info has been saved to /tmp/qiime2-q2cli-err-0rm4ivg6.log[0m
[0m

In [None]:
# 4. PCoA biplot: 
# reveal which features (taxonomic units) are driven the separation of sample clusters on which axes.

In [None]:
# normalize feature counts into relative abundances

In [24]:
!qiime feature-table relative-frequency \
  --i-table space_rarefied_table.qza \
  --o-relative-frequency-table space_relative_table.qza

[32mSaved FeatureTable[RelativeFrequency] to: space_relative_table.qza[0m
[0m

In [None]:
#  add features onto a PCoA

In [25]:
!qiime diversity pcoa-biplot \
  --i-pcoa space_weighted_unifrac_pcoa_results.qza \
  --i-features space_relative_table.qza \
  --o-biplot space_weighted_unifrac_pcoa_biplot.qza

[32mSaved PCoAResults % Properties('biplot') to: space_weighted_unifrac_pcoa_biplot.qza[0m
[0m

In [None]:
# generate an interactive visualization using the biplot command. 
# Note that the feature metadata is the taxonomy

In [27]:
!qiime emperor biplot \
  --i-biplot space_weighted_unifrac_pcoa_biplot.qza \
  --m-sample-metadata-file Metadata/space_tmp.tsv \
  --m-feature-metadata-file space_taxonomy.qza \
  --o-visualization space_weighted_unifrac_emperor_biplot.qzv

[32mSaved Visualization to: space_weighted_unifrac_emperor_biplot.qzv[0m
[0m

In [None]:
# To do Next
# Differential abundance- clr transform, ALDEX2
# PiCRUST: Very end when we have time, PCoA bioplot