# Lecture 18 Classification Part II

## Setup

In [1]:
pip install memory-profiler


Note: you may need to restart the kernel to use updated packages.




In [2]:
import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

%matplotlib inline 

In [3]:
from sklearn import (datasets,
                     metrics,
                     model_selection as skms,
                     naive_bayes,
                     neighbors)

In [4]:
import warnings
warnings.filterwarnings('ignore')
np.random.seed(42)

## Standalone Learning Evaluation

In [5]:
# could live in a standalone .py file
# (repeated some of the imports)
from sklearn import (datasets, 
                     metrics, 
                     model_selection as skms,
                     naive_bayes, 
                     neighbors)

# data
iris = datasets.load_iris()

# train-test split
(iris_train_ftrs, iris_test_ftrs, 
 iris_train_tgt, iris_test_tgt) = skms.train_test_split(iris.data,
                                                        iris.target, 
                                                        test_size=.90,
                                                        random_state=42) 
# define some models
models = {'3-NN': neighbors.KNeighborsClassifier(n_neighbors=3),
          '5-NN': neighbors.KNeighborsClassifier(n_neighbors=5),
          'NB'  : naive_bayes.GaussianNB()}

# in turn, fit-predict with those models
for name, model in models.items():
    fit = model.fit(iris_train_ftrs, 
                    iris_train_tgt)
    predictions = fit.predict(iris_test_ftrs)
    
    score = metrics.accuracy_score(iris_test_tgt, predictions)
    print("{:>4s}: {:0.2f}".format(name,score))

3-NN: 0.96
5-NN: 0.61
  NB: 0.81


## Timing

In [6]:
%timeit -r1 datasets.load_iris()

3.74 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


In [7]:
%%timeit -r1 -n1
(iris_train_ftrs, iris_test_ftrs, 
 iris_train_tgt,  iris_test_tgt) = skms.train_test_split(iris.data,
                                                         iris.target, 
                                                         test_size=.25)

1.43 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [8]:
%%timeit -r1

nb    = naive_bayes.GaussianNB()
fit   = nb.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)

metrics.accuracy_score(iris_test_tgt, preds)

1.15 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1000 loops each)


In [9]:
%%timeit -r1

knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
fit   = knn.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)

metrics.accuracy_score(iris_test_tgt, preds)

10.5 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


In [10]:
nb = naive_bayes.GaussianNB()
%timeit -r1 fit = nb.fit(iris_train_ftrs, iris_train_tgt)

knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
%timeit -r1 fit = knn.fit(iris_train_ftrs, iris_train_tgt)

714 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1000 loops each)
517 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1000 loops each)


In [11]:
nb = naive_bayes.GaussianNB()
fit = nb.fit(iris_train_ftrs, iris_train_tgt)
%timeit -r1 preds = fit.predict(iris_test_ftrs)

knn   = neighbors.KNeighborsClassifier(n_neighbors=3)
fit = knn.fit(iris_train_ftrs, iris_train_tgt)
%timeit -r1 preds = fit.predict(iris_test_ftrs)

333 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1000 loops each)
8.62 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


## Memory

In [12]:
%load_ext memory_profiler

In [13]:
%%memit
nb = naive_bayes.GaussianNB()
fit = nb.fit(iris_train_ftrs, iris_train_tgt)
preds = fit.predict(iris_test_ftrs)

peak memory: 184.56 MiB, increment: 0.09 MiB


In [14]:
%%memit
knn = neighbors.KNeighborsClassifier()
fit = knn.fit(iris_train_ftrs, iris_train_tgt)
preds = knn.predict(iris_test_ftrs)

peak memory: 184.58 MiB, increment: 0.01 MiB
