In [3]:

import qiime2

In [6]:
# Manifest file

!echo "# paired-end PHRED 33 fastq manifest file for forward and reverse reads" > manifest1.txt
!echo -e "sample-id\tforward-absolute-filepath\treverse-absolute-filepath" >> manifest1.txt
!ls *.fastq | cut -d "_" -f 1 | sort | uniq | parallel -j0 --keep-order 'echo -e "{/}\t"$PWD"/{/}_1.fastq\t"$PWD"/{/}_2.fastq"' | tr -d "'" > manifest2.txt
!mkdir manifest
!cat manifest1.txt manifest2.txt > manifest/manifest.tsv


## 2. Importing Data

In [7]:
!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path manifest/manifest.tsv \
--output-path demux.qza \
--input-format PairedEndFastqManifestPhred33V2


[32mImported manifest/manifest.tsv as PairedEndFastqManifestPhred33V2 to demux.qza[0m
[0m

In [9]:
#############################################################################
# Summarize Data
#!qiime feature-table tabulate-seqs \
#  --i-data demux.qza \
#  --o-visualization initial_demux.qza_summary.qzv

#                     There was a problem with the command:                     
# (1/1) Invalid value for '--i-data': Expected an artifact of at least type
#  FeatureData[Sequence | AlignedSequence]. An artifact of type
#  SampleData[PairedEndSequencesWithQuality] was provided.
########################################

In [12]:
# summarize and analyse (qiime2 view) before next step
!qiime demux summarize \
	--i-data demux.qza \
	--o-visualization qualities.qzv

[32mSaved Visualization to: qualities.qzv[0m
[0m

In [21]:
#3. Sequence Quality Control and Feature Table Construction

!qiime dada2 denoise-paired \
  --i-demultiplexed-seqs demux.qza \
  --p-trim-left-f 15 \
  --p-trim-left-r 15 \
  --p-trunc-len-f 240 \
  --p-trunc-len-r 240 \
  --o-table table.qza \
  --o-representative-sequences rep-seqs.qza \
  --o-denoising-stats denoising-stats.qza \
  --output-dir dada2_output --verbose


Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada.R --input_directory /tmp/tmp0oov0k4f/forward --input_directory_reverse /tmp/tmp0oov0k4f/reverse --output_path /tmp/tmp0oov0k4f/output.tsv.biom --output_track /tmp/tmp0oov0k4f/track.tsv --filtered_directory /tmp/tmp0oov0k4f/filt_f --filtered_directory_reverse /tmp/tmp0oov0k4f/filt_r --truncation_length 240 --truncation_length_reverse 240 --trim_left 15 --trim_left_reverse 15 --max_expected_errors 2.0 --max_expected_errors_reverse 2.0 --truncation_quality_score 2 --min_overlap 12 --pooling_method independent --chimera_method consensus --min_parental_fold 1.0 --allow_one_off False --num_threads 1 --learn_min_reads 1000000

package ‘optparse’ was built under R version 4.2.3 
R version 4.2.2 (2022-10-31) 
Loading required package: Rcpp
[?25hD

In [22]:
# 4. Summarice data Quality
!qiime metadata tabulate \
  --m-input-file denoising-stats.qza \
  --o-visualization stats.qzv


[32mSaved Visualization to: stats.qzv[0m
[0m

In [29]:
# meta data can be downloaded from previous stats.qza--> 
# 5. Feature Table and Feature Data Summaries
!qiime feature-table summarize \
  --i-table table.qza \
  --o-visualization table2.qzv \
  --m-sample-metadata-file metadata.tsv

[32mSaved Visualization to: table2.qzv[0m
[0m

In [24]:
# 5. Feature Table and Feature Data Summaries
!qiime feature-table summarize \
  --i-table table.qza \
  --o-visualization table.qzv \
#  --m-sample-metadata-file meta_data_asai6.tsv
!qiime feature-table tabulate-seqs \
  --i-data rep-seqs.qza \
  --o-visualization rep-seqs.qzv


[32mSaved Visualization to: table.qzv[0m
[0m[32mSaved Visualization to: rep-seqs.qzv[0m
[0m

In [26]:
# 6.Generate Phylogenetic Tree
!qiime phylogeny align-to-tree-mafft-fasttree \
  --i-sequences rep-seqs.qza \
  --o-alignment aligned-rep-seqs.qza \
  --o-masked-alignment masked-aligned-rep-seqs.qza \
  --o-tree unrooted-tree.qza \
  --o-rooted-tree rooted-tree.qza


[32mSaved FeatureData[AlignedSequence] to: aligned-rep-seqs.qza[0m
[32mSaved FeatureData[AlignedSequence] to: masked-aligned-rep-seqs.qza[0m
[32mSaved Phylogeny[Unrooted] to: unrooted-tree.qza[0m
[32mSaved Phylogeny[Rooted] to: rooted-tree.qza[0m
[0m

In [1]:
# 7. Alpha and Beta Diversity Analysis
!qiime diversity core-metrics-phylogenetic \
  --i-phylogeny rooted-tree.qza \
  --i-table table.qza \
  --p-sampling-depth 210 \
    # 11843 \
  --m-metadata-file metadata.tsv \
  --output-dir core-metrics-results

[32mSaved FeatureTable[Frequency] to: core-metrics-results/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results/faith_pd_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results/observed_features_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: core-metrics-results/evenness_vector.qza[0m
[32mSaved DistanceMatrix to: core-metrics-results/unweighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: core-metrics-results/weighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: core-metrics-results/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: core-metrics-results/bray_curtis_distance_matrix.qza[0m
[32mSaved PCoAResults to: core-metrics-results/unweighted_unifrac_pcoa_results.qza[0m
[32mSaved PCoAResults to: core-metrics-results/weighted_unifrac_pcoa_results.qza[0m
[32mSaved PCoAResults to: core

In [6]:
#14 b. Alpha Group Significance:
!qiime diversity alpha-group-significance \
  --i-alpha-diversity core-metrics-results/faith_pd_vector.qza \
  --m-metadata-file metadata.tsv\
  --o-visualization core-metrics-results/faith-pd-group-significance.qzv


[31m[1mPlugin error from diversity:

  Metadata does not contain any columns that satisfy this visualizer's requirements. There must be at least one metadata column that contains categorical data, isn't empty, doesn't consist of unique values, and doesn't consist of exactly one value.

Debug info has been saved to /tmp/qiime2-q2cli-err-e_hwnu6y.log[0m
[0m

In [9]:
#15. evenness
!qiime diversity alpha-group-significance \
  --i-alpha-diversity core-metrics-results/evenness_vector.qza \
  --m-metadata-file metadata.tsv \
  --o-visualization core-metrics-results/evenness-group-significance.qzv


[31m[1mPlugin error from diversity:

  Metadata does not contain any columns that satisfy this visualizer's requirements. There must be at least one metadata column that contains categorical data, isn't empty, doesn't consist of unique values, and doesn't consist of exactly one value.

Debug info has been saved to /tmp/qiime2-q2cli-err-u29ozui_.log[0m
[0m

In [19]:
#16 
!qiime diversity beta-group-significance \
  --i-distance-matrix core-metrics-results/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file metadata.tsv \
  --m-metadata-column organism \
  --o-visualization core-metrics-results/unweighted-unifrac-body-site-significance.qzv \
  --p-pairwise


[31m[1mPlugin error from diversity:

  All values in the grouping vector are the same. This method cannot operate on a grouping vector with only a single group of objects (e.g., there are no 'between' distances because there is only a single group).

Debug info has been saved to /tmp/qiime2-q2cli-err-wk8has74.log[0m
[0m

In [11]:
#17 plot
!qiime emperor plot \
  --i-pcoa core-metrics-results/unweighted_unifrac_pcoa_results.qza \
  --m-metadata-file metadata.tsv \
  --o-visualization core-metrics-results/plot1.qzv



[32mSaved Visualization to: core-metrics-results/plot1.qzv[0m
[0m

In [18]:
#Alpha rarefaction plotting
#18
!qiime diversity alpha-rarefaction \
  --i-table table.qza \
  --i-phylogeny rooted-tree.qza \
  --p-max-depth 11843 \
  --m-metadata-file metadata.tsv \
  --o-visualization alpha-rarefaction.qzv



[31m[1mPlugin error from diversity:

  All metadata filtered after dropping columns that contained non-categorical data.

Debug info has been saved to /tmp/qiime2-q2cli-err-3e2a5adc.log[0m
[0m

In [21]:
#Taxonomic analysis
#19 download
!wget \
  -O "gg-13-8-99-515-806-nb-classifier.qza" \
  "https://data.qiime2.org/2023.9/common/gg-13-8-99-515-806-nb-classifier.qza"


--2024-01-06 22:12:13--  https://data.qiime2.org/2023.9/common/gg-13-8-99-515-806-nb-classifier.qza
Resolving data.qiime2.org (data.qiime2.org)... 54.200.1.12
Connecting to data.qiime2.org (data.qiime2.org)|54.200.1.12|:443... connected.
HTTP request sent, awaiting response... 302 FOUND
Location: https://s3-us-west-2.amazonaws.com/qiime2-data/2023.9/common/gg-13-8-99-515-806-nb-classifier.qza [following]
--2024-01-06 22:12:15--  https://s3-us-west-2.amazonaws.com/qiime2-data/2023.9/common/gg-13-8-99-515-806-nb-classifier.qza
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 52.218.183.104, 52.92.137.104, 52.92.210.64, ...
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|52.218.183.104|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 28289645 (27M) [binary/octet-stream]
Saving to: ‘gg-13-8-99-515-806-nb-classifier.qza’


2024-01-06 22:16:20 (114 KB/s) - ‘gg-13-8-99-515-806-nb-classifier.qza’ saved [28289645/28289645]



In [23]:
#20. Taxanomy analysis
!qiime feature-classifier classify-sklearn \
  --i-classifier gg-13-8-99-515-806-nb-classifier.qza \
  --i-reads rep-seqs.qza \
  --o-classification taxonomy.qza


[32mSaved FeatureData[Taxonomy] to: taxonomy.qza[0m
[0m

In [24]:
#21
!qiime metadata tabulate \
  --m-input-file taxonomy.qza \
  --o-visualization taxonomy.qzv

[32mSaved Visualization to: taxonomy.qzv[0m
[0m

In [1]:
#22 barplot
!qiime taxa barplot \
  --i-table table.qza \
  --i-taxonomy taxonomy.qza \
  --m-metadata-file metadata.tsv \
  --o-visualization taxa-bar-plots.qzv

[32mSaved Visualization to: taxa-bar-plots.qzv[0m
[0m

In [3]:
#Differential abundance testing with ANCOM
#23
!qiime feature-table filter-samples \
  --i-table table.qza \
  --m-metadata-file metadata.tsv \
  --o-filtered-table diff-abundance-table.qza


[32mSaved FeatureTable[Frequency] to: diff-abundance-table.qza[0m
[0m

In [5]:
#24
!qiime composition add-pseudocount \
  --i-table diff-abundance-table.qza \
  --o-composition-table diff-abundance-table.qza


[32mSaved FeatureTable[Composition] to: diff-abundance-table.qza[0m
[0m

In [10]:
#25
!qiime composition ancom \
  --i-table diff-abundance-table.qza \
  --m-metadata-file metadata.tsv \
  --m-metadata-column sample \
  --o-visualization ancom-organism.qzv



[31m[1mPlugin error from composition:

  All values in `grouping` are unique. This method cannot operate on a grouping vector with only unique values (e.g., there are no 'within' variance because each group of samples contains only a single sample).

Debug info has been saved to /tmp/qiime2-q2cli-err-r64glmht.log[0m
[0m

In [13]:
#26
!qiime taxa collapse \
  --i-table /home/frank/Documents/Bioinfromatics/Assignment/assi6/2.Qiime/diff-abundance-table.qza \
  --i-taxonomy taxonomy.qza \
  --p-level 6 \
  --o-collapsed-table gut-table-l6.qza



Usage: [94mqiime taxa collapse[0m [OPTIONS]

  Collapse groups of features that have the same taxonomic assignment through
  the specified level. The frequencies of all features will be summed when
  they are collapsed.

[1mInputs[0m:
  [94m[4m--i-table[0m ARTIFACT [32mFeatureTable[Frequency][0m
                         Feature table to be collapsed.             [35m[required][0m
  [94m[4m--i-taxonomy[0m ARTIFACT [32mFeatureData[Taxonomy][0m
                         Taxonomic annotations for features in the provided
                         feature table. All features in the feature table must
                         have a corresponding taxonomic annotation. Taxonomic
                         annotations that are not present in the feature table
                         will be ignored.                           [35m[required][0m
[1mParameters[0m:
  [94m[4m--p-level[0m INTEGER      The taxonomic level at which the features should be
                         col

In [22]:
# modified above cell
!qiime diversity alpha-group-significance \
  --i-alpha-diversity core-metrics-results/faith_pd_vector.qza \
  --m-metadata-file metadata.tsv \
  --p-group-column "sample" \
  --o-visualization core-metrics-results/faith-pd-group-significance.qzv


Usage: [94mqiime diversity alpha-group-significance[0m [OPTIONS]

  Visually and statistically compare groups of alpha diversity values.

[1mInputs[0m:
  [94m[4m--i-alpha-diversity[0m ARTIFACT [32mSampleData[AlphaDiversity][0m
                       Vector of alpha diversity values by sample.  [35m[required][0m
[1mParameters[0m:
  [94m[4m--m-metadata-file[0m METADATA...
    (multiple          The sample metadata.
     arguments will    
     be merged)                                                     [35m[required][0m
[1mOutputs[0m:
  [94m[4m--o-visualization[0m VISUALIZATION
                                                                    [35m[required][0m
[1mMiscellaneous[0m:
  [94m--output-dir[0m PATH    Output unspecified results to a directory
  [94m--verbose[0m / [94m--quiet[0m  Display verbose output to stdout and/or stderr during
                       execution of this action. Or silence output if
                       execution is succe

In [27]:
# 7. Alpha and Beta Diversity Analysis
#!qiime diversity core-metrics-phylogenetic \
#  --i-phylogeny rooted-tree.qza \
#  --i-table table.qza \
#  --p-sampling-depth 210 \
#  --m-metadata-file meta_data_asai6.tsv\
#  --output-dir core-metrics-results

# Error
# (17/18) Missing option '--o-jaccard-emperor'. ("--output-dir" may also be
#  used)
# (18/18) Missing option '--o-bray-curtis-emperor'. ("--output-dir" may also be
#  used)


Usage: [94mqiime diversity core-metrics-phylogenetic[0m [OPTIONS]

  Applies a collection of diversity metrics (both phylogenetic and non-
  phylogenetic) to a feature table.

[1mInputs[0m:
  [94m[4m--i-table[0m ARTIFACT [32mFeatureTable[Frequency][0m
                          The feature table containing the samples over which
                          diversity metrics should be computed.     [35m[required][0m
  [94m[4m--i-phylogeny[0m ARTIFACT  Phylogenetic tree containing tip identifiers that
    [32mPhylogeny[Rooted][0m     correspond to the feature identifiers in the table.
                          This tree can contain tip ids that are not present
                          in the table, but all feature ids in the table must
                          be present in this tree.                  [35m[required][0m
[1mParameters[0m:
  [94m[4m--p-sampling-depth[0m INTEGER
    [32mRange(1, None)[0m        The total frequency that each sample should be
          