## Extract v4 from Greengenes and build a blast DB

In [2]:
%%bash
export DATA=~/Data
export PAYCHECK_DATA=$DATA/paycheck
qiime tools import \
    --input-path $DATA/gg_13_8_otus/rep_set/99_otus.fasta \
    --output-path $PAYCHECK_DATA/ref/99_otus.qza --type FeatureData[Sequence]
qiime feature-classifier extract-reads \
    --i-sequences $PAYCHECK_DATA/ref/99_otus.qza \
    --p-f-primer GTGYCAGCMGCCGCGGTAA --p-r-primer GGACTACNVGGGTWTCTAAT \
    --o-reads $PAYCHECK_DATA/ref/99_otus_v4.qza
qiime tools export $PAYCHECK_DATA/ref/99_otus_v4.qza --output-dir .
mv dna-sequences.fasta $PAYCHECK_DATA/ref/99_otus_v4.fasta
makeblastdb -in $PAYCHECK_DATA/ref/99_otus_v4.fasta -dbtype nucl \
    -out $PAYCHECK_DATA/ref/99_otus_v4

Saved FeatureData[Sequence] to: /Users/benkaehler/Data/paycheck/ref/99_otus_v4.qza


Building a new DB, current time: 04/12/2018 16:05:08
New DB name:   /Users/benkaehler/Data/paycheck/ref/99_otus_v4
New DB title:  /Users/benkaehler/Data/paycheck/ref/99_otus_v4.fasta
Sequence type: Nucleotide
Keep MBits: T
Maximum file size: 1000000000B
Adding sequences from FASTA; added 202814 sequences in 4.43287 seconds.


## Download stool data

In [None]:
%%bash
export RAW_STOOL=$PAYCHECK_DATA/raw/stool
export CTX=Deblur-illumina-16S-v4-150nt-10d7e0
redbiom search metadata 'where sample_type == "stool"' > $RAW_STOOL/samples
redbiom search metadata 'where sample_type == "Stool"' >> $RAW_STOOL/samples
redbiom fetch samples --from $RAW_STOOL/samples --context $CTX --output $RAW_STOOL/sv.biom

In [None]:
%%bash
export PAYCHECK_DATA=~/Data/paycheck
export BLAST_DB=$PAYCHECK_DATA/ref/99_otus_v4
export REF_STOOL=$PAYCHECK_DATA/ref/stool
export RAW_STOOL=$PAYCHECK_DATA/raw/stool
biom table-ids --observations -i $RAW_STOOL/sv.biom | awk '{print ">"$1"blast_rocks\n"$1}' > $REF_STOOL/sv.fasta
blastn -num_threads 4 -query $REF_STOOL/sv.fasta -outfmt "6 qacc sacc" \
    -db $BLAST_DB -max_target_seqs 1 -out $REF_STOOL/sv_map.blast
sed -i '' 's/blast_rocks//' $REF_STOOL/sv_map.blast

In [18]:
%%bash
export PAYCHECK_DATA=~/Data/paycheck
export REF_STOOL=$PAYCHECK_DATA/ref/stool
export RAW_STOOL=$PAYCHECK_DATA/raw/stool
qiime tools import --type FeatureTable[Frequency] --input-path $RAW_STOOL/sv.biom --output-path $REF_STOOL/sv.qza
qiime clawback sequence-variants-from-feature-table --i-table $REF_STOOL/sv.qza --o-sequences $REF_STOOL/sv_seqs.qza
qiime feature-classifier classify-sklearn --i-reads $REF_STOOL/sv_seqs.qza \
    --i-classifier $PAYCHECK_DATA/ref/gg-13-8-99-515-806-nb-classifier.qza \
    --o-classification $REF_STOOL/sv_map.qza --p-confidence -1

Saved FeatureData[Sequence] to: /Users/benkaehler/Data/paycheck/ref/stool/sv_seqs.qza
Saved FeatureData[Taxonomy] to: /Users/benkaehler/Data/paycheck/ref/stool/sv_map.qza


## Download soil data

In [2]:
%%bash
export RAW_SOIL=~/Data/paycheck/raw/soil
export CTX=Deblur-illumina-16S-v4-150nt-10d7e0
redbiom search metadata 'where sample_type in ("soil", "Soil")' > $RAW_SOIL/samples
redbiom fetch samples --from $RAW_SOIL/samples --context $CTX --output $RAW_SOIL/sv.biom

In [7]:
%%bash
export PAYCHECK_DATA=~/Data/paycheck
export BLAST_DB=$PAYCHECK_DATA/ref/99_otus_v4
export REF_SOIL=$PAYCHECK_DATA/ref/soil
export RAW_SOIL=$PAYCHECK_DATA/raw/soil
biom table-ids --observations -i $RAW_SOIL/sv.biom | awk '{print ">"$1"blast_rocks\n"$1}' > $REF_SOIL/sv.fasta
blastn -num_threads 4 -query $REF_SOIL/sv.fasta -outfmt "6 qacc sacc" \
    -db $BLAST_DB -max_target_seqs 1 -out $REF_SOIL/sv_map.blast
sed -i '' 's/blast_rocks//' $REF_SOIL/sv_map.blast

In [1]:
%%bash
export PAYCHECK_DATA=~/Data/paycheck
export REF_SOIL=$PAYCHECK_DATA/ref/soil
export RAW_SOIL=$PAYCHECK_DATA/raw/soil
qiime tools import --type FeatureTable[Frequency] --input-path $RAW_SOIL/sv.biom --output-path $REF_SOIL/sv.qza
qiime clawback sequence-variants-from-feature-table --i-table $REF_SOIL/sv.qza --o-sequences $REF_SOIL/sv_seqs.qza
qiime feature-classifier classify-sklearn --i-reads $REF_SOIL/sv_seqs.qza \
    --i-classifier $PAYCHECK_DATA/ref/gg-13-8-99-515-806-nb-classifier.qza \
    --o-classification $REF_SOIL/sv_map.qza --p-confidence -1

Error: no such option: --table
Usage: qiime feature-classifier classify-sklearn [OPTIONS]

Error: Invalid value for "--i-reads": Path "/Users/benkaehler/Data/paycheck/ref/soil/sv_seqs.qza" does not exist.


### Download tear data

In [21]:
%%bash
export CTX=Deblur-illumina-16S-v4-150nt-10d7e0
export PAYCHECK_DATA=~/Data/paycheck
export REF=$PAYCHECK_DATA/ref
export BLAST_DB=$PAYCHECK_DATA/ref/99_otus_v4
export REF_TEARS=$PAYCHECK_DATA/ref/tears
export RAW_TEARS=$PAYCHECK_DATA/raw/tears
export TEARS=$PAYCHECK_DATA/tears
redbiom search metadata 'where sample_type in ("Tears",)' > $RAW_TEARS/samples
redbiom fetch samples --from $RAW_TEARS/samples --context $CTX --output $RAW_TEARS/sv.biom
biom table-ids --observations -i $RAW_TEARS/sv.biom | awk '{print ">"$1"blast_rocks\n"$1}' > $REF_TEARS/sv.fasta
blastn -num_threads 4 -query $REF_TEARS/sv.fasta -outfmt "6 qacc sacc" \
    -db $BLAST_DB -max_target_seqs 1 -out $REF_TEARS/sv_map.blast
sed -i '' 's/blast_rocks//' $REF_TEARS/sv_map.blast
qiime tools import --type FeatureTable[Frequency] --input-path $RAW_TEARS/sv.biom --output-path $REF_TEARS/sv.qza
qiime clawback sequence-variants-from-feature-table --i-table $REF_TEARS/sv.qza --o-sequences $REF_TEARS/sv_seqs.qza
qiime feature-classifier classify-sklearn --i-reads $REF_TEARS/sv_seqs.qza \
    --i-classifier $PAYCHECK_DATA/ref/gg-13-8-99-515-806-nb-classifier.qza \
    --o-classification $REF_TEARS/sv_map.qza --p-confidence -1
qiime clawback generate-class-weights --i-reference-taxonomy $REF/99_tax.qza \
    --i-reference-sequences $REF/99_otus_v4.qza \
    --i-samples $REF_TEARS/sv.qza \
    --i-taxonomy-classification $REF_TEARS/sv_map.qza \
    --o-class-weight $TEARS/weights/weights-normalise-False-unobserved-weight-1e-06.qza

Saved FeatureData[Sequence] to: /Users/benkaehler/Data/paycheck/ref/tears/sv_seqs.qza
Saved FeatureData[Taxonomy] to: /Users/benkaehler/Data/paycheck/ref/tears/sv_map.qza


In [31]:
%%bash
export CTX=Deblur-illumina-16S-v4-150nt-10d7e0
export PAYCHECK_DATA=~/Data/paycheck
export REF=$PAYCHECK_DATA/ref
export REF_TEARS=$PAYCHECK_DATA/ref/tears_cb
export RAW_TEARS=$PAYCHECK_DATA/raw/tears_cb
export TEARS=$PAYCHECK_DATA/tears_cb
qiime clawback fetch-QIITA-samples --p-sample-type Tears --p-context $CTX --o-samples $REF_TEARS/sv.qza
qiime clawback sequence-variants-from-samples --i-samples $REF_TEARS/sv.qza --o-sequences $REF_TEARS/sv_seqs.qza
qiime feature-classifier classify-sklearn --i-reads $REF_TEARS/sv_seqs.qza \
    --i-classifier $PAYCHECK_DATA/ref/gg-13-8-99-515-806-nb-classifier.qza \
    --o-classification $REF_TEARS/sv_map.qza --p-confidence -1
qiime clawback generate-class-weights --i-reference-taxonomy $REF/99_tax.qza \
    --i-reference-sequences $REF/99_otus_v4.qza \
    --i-samples $REF_TEARS/sv.qza \
    --i-taxonomy-classification $REF_TEARS/sv_map.qza \
    --o-class-weight $TEARS/weights/weights-normalise-False-unobserved-weight-1e-06.qza

Saved FeatureTable[Frequency] to: /Users/benkaehler/Data/paycheck/ref/tears_cb/sv.qza
Saved FeatureData[Sequence] to: /Users/benkaehler/Data/paycheck/ref/tears_cb/sv_seqs.qza
Saved FeatureData[Taxonomy] to: /Users/benkaehler/Data/paycheck/ref/tears_cb/sv_map.qza
Saved FeatureTable[RelativeFrequency] to: /Users/benkaehler/Data/paycheck/tears_cb/weights/weights-normalise-False-unobserved-weight-1e-06.qza


### Download vaginal data

In [32]:
%%bash
export CTX=Deblur-illumina-16S-v4-150nt-10d7e0
export PAYCHECK_DATA=~/Data/paycheck
export BLAST_DB=$PAYCHECK_DATA/ref/99_otus_v4
export REF=$PAYCHECK_DATA/ref
export REF_VAGINAL=$PAYCHECK_DATA/ref/vaginal
export RAW_VAGINAL=$PAYCHECK_DATA/raw/vaginal
export VAGINAL=$PAYCHECK_DATA/vaginal
qiime clawback fetch-QIITA-samples --p-sample-type vaginal --p-context $CTX --o-samples $REF_VAGINAL/sv.qza
qiime clawback sequence-variants-from-samples --i-samples $REF_VAGINAL/sv.qza --o-sequences $REF_VAGINAL/sv_seqs.qza
qiime feature-classifier classify-sklearn --i-reads $REF_VAGINAL/sv_seqs.qza \
    --i-classifier $PAYCHECK_DATA/ref/gg-13-8-99-515-806-nb-classifier.qza \
    --o-classification $REF_VAGINAL/sv_map.qza --p-confidence -1
qiime clawback generate-class-weights --i-reference-taxonomy $REF/99_tax.qza \
    --i-reference-sequences $REF/99_otus_v4.qza \
    --i-samples $REF_VAGINAL/sv.qza \
    --i-taxonomy-classification $REF_VAGINAL/sv_map.qza \
    --o-class-weight $VAGINAL/weights/weights-normalise-False-unobserved-weight-1e-06.qza

qiime tools export --output-dir $REF_VAGINAL $REF_VAGINAL/sv.qza
mv $REF_VAGINAL/feature-table.biom $REF_VAGINAL/sv.biom
biom table-ids --observations -i $RAW_VAGINAL/sv.biom | awk '{print ">"$1"blast_rocks\n"$1}' > $REF_VAGINAL/sv.fasta
blastn -num_threads 4 -query $REF_VAGINAL/sv.fasta -outfmt "6 qacc sacc" \
    -db $BLAST_DB -max_target_seqs 1 -out $REF_VAGINAL/sv_map.blast
sed -i '' 's/blast_rocks//' $REF_VAGINAL/sv_map.blast

Saved FeatureTable[Frequency] to: /Users/benkaehler/Data/paycheck/ref/vaginal/sv.qza
Saved FeatureData[Sequence] to: /Users/benkaehler/Data/paycheck/ref/vaginal/sv_seqs.qza
Saved FeatureData[Taxonomy] to: /Users/benkaehler/Data/paycheck/ref/vaginal/sv_map.qza


Traceback (most recent call last):
  File "/Users/benkaehler/miniconda3/envs/qiime2-2018.2/bin/qiime", line 11, in <module>
    sys.exit(qiime())
  File "/Users/benkaehler/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/click/core.py", line 722, in __call__
    return self.main(*args, **kwargs)
  File "/Users/benkaehler/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/click/core.py", line 697, in main
    rv = self.invoke(ctx)
  File "/Users/benkaehler/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/click/core.py", line 1066, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/benkaehler/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/click/core.py", line 1066, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
  File "/Users/benkaehler/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/click/core.py", line 895, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/Users/be

In [36]:
%%bash
export CTX=Deblur-illumina-16S-v4-150nt-10d7e0
export PAYCHECK_DATA=~/Data/paycheck
export REF=$PAYCHECK_DATA/ref
export REF_VAGINAL=$PAYCHECK_DATA/ref/vaginal
export RAW_VAGINAL=$PAYCHECK_DATA/raw/vaginal
export VAGINAL=$PAYCHECK_DATA/vaginal
export BLAST_DB=$PAYCHECK_DATA/ref/99_otus_v4

blastn -num_threads 4 -query $REF_VAGINAL/sv.fasta -outfmt "6 qacc sacc" \
    -db $BLAST_DB -max_target_seqs 1 -out $REF_VAGINAL/sv_map.blast
sed -i '' 's/blast_rocks//' $REF_VAGINAL/sv_map.blast

### Download empo_3 data

In [None]:
%%bash
export CTX=Deblur-NA-illumina-16S-v4-100nt-fbc5b2
