# Run `q2-feature-classifer` for the Iceland tutorial dataset

### Setup

In [1]:
import json

from qiime import Artifact
from qiime.plugins.feature_classifier.methods import extract_reads, \
    fit_classifier, classify
import numpy

### Extract the reference reads

In [14]:
ref_aln = Artifact.load('iceland/99_13_8_ref_pynast_aln.qza')
primer_515F = 'GTGCCAGCMGCCGCGGTAA'
primer_806R = 'GGACTACHVGGGTWTCTAAT'
ref_seq = extract_reads(ref_aln, 120, primer_515F, primer_806R, n_sample=100000).reads
ref_seq.save('iceland/99_13_8_515_806_reads.qza')

### Train the feature hashing naive Bayes classifier

In [3]:
ref_seq = Artifact.load('iceland/99_13_8_515_806_reads.qza')
ref_tax = Artifact.load('iceland/99_13_8_ref_taxonomy.qza')
classifier_spec = \
    {'steps': [
     ['hash', 'feature_extraction.FeatureHasher'],
     ['classify', 'naive_bayes.MultinomialNB']],
     'classify': {'alpha': 0.01},
     'hash': {'non_negative': True, 'n_features': 8192}}
classifier_spec = json.dumps(classifier_spec)
classifier = fit_classifier(ref_seq, ref_tax, 
                            classifier_specification=classifier_spec).classifier
classifier.save('iceland/99_13_8_fh_nb_se_classfier.qza')

'iceland/99_13_8_fh_nb_se_classfier.qza'

### Classify the reads

In [5]:
%%time
reads = Artifact.load('iceland/fmt-tutorial-rep-seqs.qza')
classifier = Artifact.load('iceland/99_13_8_fh_nb_se_classfier.qza')
for confidence in range(0, 101, 5):
    classification = classify(reads, classifier, confidence=confidence/100).classification
    classification.save('iceland/fh/fmt-tutorial-classification-c-%03d.qza' % confidence)

CPU times: user 2min 51s, sys: 1min 25s, total: 4min 16s
Wall time: 5min 5s
