# Run `q2-feature-classifer` for the Iceland tutorial dataset

### Setup

In [1]:
import json

from qiime import Artifact
from qiime.plugins.feature_classifier.methods import extract_reads, \
    fit_classifier, classify
from q2_types import DNAIterator

### Extract the reference reads

In [6]:
max_len = 10000

In [7]:
ref_aln = Artifact.load('iceland/99_13_8_ref_pynast_aln.qza')
primer_515F = 'GTGCCAGCMGCCGCGGTAA'
primer_806R = 'GGACTACHVGGGTWTCTAAT'
ref_seq = extract_reads(ref_aln, max_len, primer_515F, primer_806R, n_sample=100000).reads
ref_seq.save('iceland/99_13_8_515_806_long_reads.qza')

'iceland/99_13_8_515_806_long_reads.qza'

### Check the read lengths

In [8]:
ref_seq = Artifact.load('iceland/99_13_8_515_806_long_reads.qza')
for seq in ref_seq.view(DNAIterator):
    if len(seq) >= max_len:
        print('max_len is not big enough:')
        print(seq)
        break
else:
    print('max_len is ok')

max_len is ok


### Train the feature hashing naive Bayes classifier

In [9]:
ref_seq = Artifact.load('iceland/99_13_8_515_806_long_reads.qza')
ref_tax = Artifact.load('iceland/99_13_8_ref_taxonomy.qza')
classifier_spec = \
    {'steps': [
     ['hash', 'feature_extraction.FeatureHasher'],
     ['classify', 'naive_bayes.MultinomialNB']],
     'classify': {'alpha': 0.01},
     'hash': {'non_negative': True, 'n_features': 8192}}
classifier_spec = json.dumps(classifier_spec)
classifier = fit_classifier(ref_seq, ref_tax, 
                            classifier_specification=classifier_spec).classifier
classifier.save('iceland/99_13_8_fh_nb_se_long_reads_classfier.qza')

'iceland/99_13_8_fh_nb_se_long_reads_classfier.qza'

### Classify the reads

In [None]:
%%time
reads = Artifact.load('iceland/fmt-tutorial-rep-seqs.qza')
classifier = Artifact.load('iceland/99_13_8_fh_nb_se_long_reads_classfier.qza')
for confidence in range(0, 101, 5):
    classification = classify(reads, classifier, confidence=confidence/100).classification
    classification.save('iceland/fh-long/fmt-tutorial-classification-c-%03d.qza' % confidence)