# Hands-On Machine Learning with Scikit-Learn, Keras & TensorFlow
## Chapter 10 - Introduction to Artificial Neural Networks with Keras
### Imports

In [None]:
from collections import namedtuple
import concurrent.futures
import hashlib
from pathlib import Path
import time

import cv2
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Dense, Flatten, InputLayer

from tensorflow_2 import utils

In [None]:
tf.__version__

### Configure Notebook

In [None]:
%load_ext tensorboard

### Variables

In [None]:
DATA_DIR = utils.package_dir() / 'data'/ 'ch10'
LOG_DIR = DATA_DIR / 'logs'
TRAIN_DIR = DATA_DIR / 'train'
VAL_DIR = DATA_DIR / 'val'
TEST_DIR = DATA_DIR / 'test'

BATCH_SIZE = 64

### Functions

In [None]:
def get_run_logdir():
    """Generate path to new run log directory."""
    return LOG_DIR / time.strftime('run_%Y-%m-%d_%H-%M-%S')

---
## Load Data

In [None]:
(x_train_full, y_train_full), (x_test, y_test) = (
    tf.keras.datasets.fashion_mnist.load_data()
    )
print(f'Train Shape: {x_train_full.shape}')
print(f'Train Data Type: {x_train_full.dtype}')

### Classes

In [None]:
classes = (
    't-shirt_top',
    'trouser',
    'pullover',
    'dress',
    'coat',
    'sandal',
    'shirt',
    'sneaker',
    'bag',
    'ankle_boot',
    )
classes_idx = {n: v for n, v in enumerate(classes)}

### Create Validation Stratified Set

In [None]:
x_train, x_val, y_train, y_val = train_test_split(
    x_train_full,
    y_train_full,
    test_size=0.1,
    random_state=42,
    stratify=y_train_full
    )

### Check Distributions

In [None]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=y_train, name='Train'))
fig.add_trace(go.Histogram(x=y_val, name='Validation'))
fig.add_trace(go.Histogram(x=y_test, name='Test'))

fig.update_traces(opacity=0.7)
fig.update_layout(
    title_text='Dataset Distributions',
    xaxis=dict(
        title='Class',
        tickvals=tuple(classes_idx.keys()),
        ticktext=tuple(classes_idx.values()),
    ),
    yaxis_title_text='Count',
    bargroupgap=0.1,
    barmode='group'
    )

fig.show()

### Save Data to Files

This will more mimic an actual use case where the images are too large to be held in memory.

To use the following structure:
```
data_dir/
  train_dir/
    class_0/
      #.jpg
      #.jpg
    ...
    class_n/
      #.jpg
      #.jpg
  val_dir/
    class_0/
      #.jpg
      #.jpg
    ...
    class_n/
      #.jpg
      #.jpg
  test_dir/
    class_0/
      #.jpg
      #.jpg
    ...
    class_n/
      #.jpg
      #.jpg
```

In [None]:
datasets = (
    (x_train, y_train, TRAIN_DIR),
    (x_val, y_val, VAL_DIR),
    (x_test, y_test, TEST_DIR),
    )

for x, y, directory in datasets:
    print(f'Saving Dataset Images: {directory}')
    # create directories
    for label in np.unique(y):
        (directory / classes_idx[label]).mkdir(parents=True,
                                               exist_ok=True)
    # save images
    with concurrent.futures.ProcessPoolExecutor() as pool:
        futures = []
        for im, label in zip(x, y):
            path = (directory / classes_idx[label]
                    / f'{hashlib.sha256(im).hexdigest()}.png')
            if not path.is_file():
                futures.append(pool.submit(cv2.imwrite, str(path), im))
        for f in concurrent.futures.as_completed(futures):
            f.result()

### Preprocess Data

In [None]:
scale_factor = 1 / np.iinfo(x_train.dtype).max

im_gen_train = tf.keras.preprocessing.image.ImageDataGenerator(
    horizontal_flip=True,
    rescale=scale_factor,
    )

im_gen_val = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=scale_factor,
    )

train_data_gen = im_gen_train.flow_from_directory(
    TRAIN_DIR,
    batch_size=BATCH_SIZE,
    color_mode='grayscale',
    class_mode='sparse',
    seed=42,
    shuffle=True,
    target_size=(28, 28),
    )

val_data_gen = im_gen_train.flow_from_directory(
    VAL_DIR,
    batch_size=BATCH_SIZE,
    color_mode='grayscale',
    class_mode='sparse',
    seed=42,
    target_size=(28, 28),
    )

### Visualize Preprocessed Data

In [None]:
x, y = next(train_data_gen)
data_gen_classes = {v: k for k, v in train_data_gen.class_indices.items()}
for n in range(10):
    ax = plt.subplot(2, 5, n + 1)
    ax.imshow(x[n], cmap='gray')
    ax.set_title(data_gen_classes[y[n]])
    ax.axis('off')

## Keras Model

In [None]:
model = tf.keras.Sequential([
    InputLayer(input_shape=(28, 28, 1)),
    Conv2D(filters=32, kernel_size=3, strides=(2, 2), activation='relu'),
    Conv2D(filters=64, kernel_size=3, strides=(2, 2), activation='relu'),
    Flatten(),
    Dense(10, activation='softmax'),
    ])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'],
    )

In [None]:
model.summary()

In [None]:
tf.keras.utils.plot_model(model)

### Train Model

In [None]:
tensorboard_cb = tf.keras.callbacks.TensorBoard(get_run_logdir())
history = model.fit(
    train_data_gen,
    epochs=15,
    steps_per_epoch=len(train_data_gen) // BATCH_SIZE,
    validation_data=val_data_gen,
    callbacks=[tensorboard_cb],
    )

### Tensorboard

In [None]:
%tensorboard --logdir LOG_DIR --port=6006