# Simple classifier #1: KNN

In [1]:
# setup
from mlwpy import *
%matplotlib inline

from sklearn import datasets
from sklearn import neighbors
iris = datasets.load_iris() 
(iris_train_ftrs, iris_test_ftrs, iris_train_tgt, iris_test_tgt) = skms.train_test_split(iris.data, iris.target, test_size=.25)

In [2]:
# build a knn classification model
# knn -> train data(features + targets) -> training -> test features -> prediction -> results -> test targets -> evaluation -> performance
knn = neighbors.KNeighborsClassifier(n_neighbors=3)
fit = knn.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)
print("3NN accuracy: ", metrics.accuracy_score(iris_test_tgt, preds))

3NN accuracy:  1.0


# Simple classifier #2: Naive Bayes

In [3]:
nb = naive_bayes.GaussianNB()
fit = nb.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)

print('NB accuracy: ', metrics.accuracy_score(iris_test_tgt, preds))

NB accuracy:  1.0


# Evaluation of Classifiers


In [4]:
from sklearn import (datasets, metrics, model_selection as skms, naive_bayes, neighbors)

# we set random_state so the results are reproducible
# otherwise, we get different training and testing sets
iris = datasets.load_iris()
(iris_train_ftrs, iris_test_ftrs, iris_train_tgt, iris_test_tgt) = skms.train_test_split(iris.data, iris.target, test_size = .90, random_state = 42)

models = {'kNN': neighbors.KNeighborsClassifier(n_neighbors=3), 'NB': naive_bayes.GaussianNB()}

for name, model in models.items():
    fit = model.fit(iris_train_ftrs, iris_train_tgt)
    predictions = fit.predict(iris_test_ftrs)
    score = metrics.accuracy_score(iris_test_tgt, predictions)
    print("{:>3s}:{:0.2f}".format(name, score))

kNN:0.96
 NB:0.81


# Measure of performance

In [5]:
%timeit -r1 datasets.load_iris()

802 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1,000 loops each)


In [6]:
%%timeit -r1 -n1
(iris_train_ftrs, iris_test_ftrs, iris_train_tgt, iris_test_tgt) = skms.train_test_split(iris.data, iris.target, test_size=.25)

581 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [7]:
%%timeit -r1
nb = naive_bayes.GaussianNB()
fit = nb.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)
metrics.accuracy_score(iris_test_tgt, preds)

737 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1,000 loops each)


In [8]:
%%timeit -r1
knn = neighbors.KNeighborsClassifier(n_neighbors=3)
fit = knn.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)
metrics.accuracy_score(iris_test_tgt, preds)

3.8 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


In [10]:
%load_ext memory_profiler

%memit
nb = naive_bayes.GaussianNB()
fit = nb.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)
metrics.accuracy_score(iris_test_tgt, preds)

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler
peak memory: 176.42 MiB, increment: 0.02 MiB


0.8074074074074075

In [11]:
%memit
knn = neighbors.KNeighborsClassifier(n_neighbors=3)
fit = knn.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)
metrics.accuracy_score(iris_test_tgt, preds)

peak memory: 176.38 MiB, increment: 0.00 MiB


0.9629629629629629