# Hands-On Machine Learning with Scikit-Learn, Keras & TensorFlow

## Chapter 2: Classification

---
### Imports

In [None]:
from pathlib import Path
from typing import Optional

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.linear_model import SGDClassifier

from tensorflow_2.exceptions import InputError

### Variables

In [None]:
DATA_DIR = Path('../../data/ch3_classification')

---
## Load Data

In [None]:
mnist = fetch_openml('mnist_784', version=1, data_home=DATA_DIR.parent)
print(mnist['DESCR'])

In [None]:
mnist['target'] = mnist['target'].astype(np.uint8)
x_train, y_train = [mnist[k][:60000] for k in ('data', 'target')]
x_test, y_test = [mnist[k][60000:] for k in ('data', 'target')]

In [None]:
def plot_example(dset: str='train', idx: int=0, score: Optional[bool]=None):
    """
    Plot example from dataset.
    
    :param dset: choose either `train` or `test`
    :param idx: index of example
    :param score: model predicted score
    """
    if dset not in ('train', 'test'):
        raise InputError(
            f'dset={dset}',
            f'Valid inputs for dset are "train" or "test"')
    x = x_train if dset == 'train' else x_test
    y = y_train if dset == 'train' else y_test
    score = '' if score is None else f'   Predict: {score}'
    plt.imshow(x[idx].reshape(28, 28), cmap='binary')
    plt.title(f'Label: {y[idx]}{score}')
    plt.axis('off')
    plt.show()
    
    
plot_example('train', 10)

---
## Train Binary Classifier

Stochastic Gradient Descent (SGD) classifier
- capable of handling very large datasets efficiently
- evaluates training instances independently
    - suited for online learning
- relies on randomness during training

In [None]:
y_train_2 = y_train == 2
y_test_2 = y_test == 2

sgd_classifier = SGDClassifier(random_state=42)
sgd_classifier.fit(x_train, y_train_2)

In [None]:
for n in range(10):
    score = sgd_classifier.predict([x_train[n]])[0]
    plot_example(dset='train', idx=n, score=score)