# Hands-On Machine Learning with Scikit-Learn, Keras & TensorFlow
## Chapter 10 - Introduction to Artificial Neural Networks with Keras
### Imports

In [None]:
from collections import namedtuple
import concurrent.futures
import hashlib
import io
from pathlib import Path
import time
from typing import Dict, Optional

import cv2
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow.keras.layers

from tensorflow_2 import utils

In [None]:
tf.__version__

### Configure Notebook

In [None]:
%load_ext tensorboard

### Functions

In [None]:
def get_run_logdir(desc: Optional[str] = None):
    """
    Generate path to new run log directory.
    
    :param desc: run description
    :return: log file path with timestamp and optional description
    """
    return LOG_DIR / time.strftime(f'{desc}-%Y_%m_%d_%H_%M_%S')

### Variables

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

DATA_DIR = utils.package_dir() / 'data'/ 'ch10'
LOG_DIR = DATA_DIR / 'logs'
TRAIN_DIR = DATA_DIR / 'train'
VAL_DIR = DATA_DIR / 'val'
TEST_DIR = DATA_DIR / 'test'

BATCH_SIZE = 32
DROPOUT_P = 0.45
EPOCHS = 10000
VAL_FREQ = 512 // BATCH_SIZE

best_model = 'best_model.h5'
model_ckpt = 'model_ckpt.h5'

RUN_DIR = get_run_logdir(f'baseline_cnn-batch_{BATCH_SIZE}')
PLOTS_DIR = RUN_DIR / 'plots'
BEST_MODEL = RUN_DIR / best_model
MODEL_CKPT = RUN_DIR / model_ckpt

PREVIOUS_RUN = LOG_DIR / 'baseline_cnn-batch_32-2020_11_26_17_02_23'
PREVIOUS_BEST_MODEL = PREVIOUS_RUN / best_model
PREVIOUS_MODEL_CKPT = PREVIOUS_RUN / model_ckpt

---
## Load Data
[Fashion MNIST Dataset](https://keras.io/api/datasets/fashion_mnist/)

In [None]:
(x_train_full, y_train_full), (x_test, y_test) = (
    tf.keras.datasets.fashion_mnist.load_data()
    )
print(f'Train Shape: {x_train_full.shape}')
print(f'Train Data Type: {x_train_full.dtype}')

### Create Validation Stratified Set

In [None]:
x_train, x_val, y_train, y_val = train_test_split(
    x_train_full,
    y_train_full,
    test_size=0.1,
    random_state=42,
    stratify=y_train_full
    )

### Classes

In [None]:
classes = (
    't-shirt_top',
    'trouser',
    'pullover',
    'dress',
    'coat',
    'sandal',
    'shirt',
    'sneaker',
    'bag',
    'ankle_boot',
    )
CLASSES_IDX = {n: v for n, v in enumerate(classes)}

### Check Distributions

In [None]:
fig = go.Figure()
fig.add_trace(go.Histogram(x=y_train, name='Train'))
fig.add_trace(go.Histogram(x=y_val, name='Validation'))
fig.add_trace(go.Histogram(x=y_test, name='Test'))

fig.update_traces(opacity=0.7)
fig.update_layout(
    title_text='Dataset Distributions',
    xaxis=dict(
        title='Class',
        tickvals=tuple(CLASSES_IDX.keys()),
        ticktext=tuple(CLASSES_IDX.values()),
    ),
    yaxis_title_text='Count',
    bargroupgap=0.1,
    barmode='group'
    )

fig.show()

### Save Data to Files

This will more mimic an actual use case where the images are too large to be held in memory.

To use the following structure:
```
data_dir/
  train_dir/
    class_0/
      #.jpg
      #.jpg
    ...
    class_n/
      #.jpg
      #.jpg
  val_dir/
    class_0/
      #.jpg
      #.jpg
    ...
    class_n/
      #.jpg
      #.jpg
  test_dir/
    class_0/
      #.jpg
      #.jpg
    ...
    class_n/
      #.jpg
      #.jpg
```

In [None]:
datasets = (
    (x_train, y_train, TRAIN_DIR),
    (x_val, y_val, VAL_DIR),
    (x_test, y_test, TEST_DIR),
    )

for x, y, directory in datasets:
    print(f'Saving Dataset Images: {directory}')
    # create directories
    for label in np.unique(y):
        (directory / CLASSES_IDX[label]).mkdir(parents=True,
                                               exist_ok=True)
    # save images
    with concurrent.futures.ProcessPoolExecutor() as pool:
        futures = []
        for im, label in zip(x, y):
            path = (directory / CLASSES_IDX[label]
                    / f'{hashlib.sha256(im).hexdigest()}.png')
            if not path.is_file():
                futures.append(pool.submit(cv2.imwrite, str(path), im))
        for f in concurrent.futures.as_completed(futures):
            f.result()

### Get Image Size

In [None]:
for x in TRAIN_DIR.glob('**/*'):
    if x.suffix == '.png':
        im = cv2.imread(str(x))
        IM_HEIGHT, IM_WIDTH, IM_CHANNELS = im.shape
        break

### Create Data Generators

In [None]:
train_data = tf.keras.preprocessing.image_dataset_from_directory(
    TRAIN_DIR,
    labels='inferred',
    label_mode='int',
    color_mode='grayscale',
    batch_size=BATCH_SIZE,
#     image_size=(IM_HEIGHT, IM_WIDTH),  # TODO: add Resizing layer to model
    seed=42,
    shuffle=True,
    )
train_classes = train_data.class_names

val_data = tf.keras.preprocessing.image_dataset_from_directory(
    VAL_DIR,
    labels='inferred',
    label_mode='int',
    color_mode='grayscale',
    batch_size=BATCH_SIZE,
#     image_size=(IM_HEIGHT, IM_WIDTH),
    seed=42,
    shuffle=True,
    )
val_classes = val_data.class_names

test_data = tf.keras.preprocessing.image_dataset_from_directory(
    TEST_DIR,
    labels='inferred',
    label_mode='int',
    color_mode='grayscale',
    batch_size=BATCH_SIZE,
#     image_size=(IM_HEIGHT, IM_WIDTH),
    seed=42,
    shuffle=True,
    )
test_classes = test_data.class_names

### Configure Prefetching

In [None]:
train_data = train_data.cache().prefetch(buffer_size=AUTOTUNE)
val_data = val_data.cache().prefetch(buffer_size=AUTOTUNE)
test_data = test_data.cache().prefetch(buffer_size=AUTOTUNE)

### Visualize Data

In [None]:
x, y = next(iter(train_data.take(1)))
for n in range(10):
    ax = plt.subplot(2, 5, n + 1)
    ax.imshow(x[n], cmap='gray')
    ax.set_title(train_classes[y[n]])
    ax.axis('off')

### Create Data Generators

In [None]:
scale_factor = 1 / np.iinfo(x_train.dtype).max

im_gen_train = tf.keras.preprocessing.image.ImageDataGenerator(
    horizontal_flip=True,
    rescale=scale_factor,
    )

im_gen_val = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=scale_factor,
    )

train_data_gen = im_gen_train.flow_from_directory(
    TRAIN_DIR,
    batch_size=BATCH_SIZE,
    color_mode='grayscale',
    class_mode='sparse',
    seed=42,
    shuffle=True,
    target_size=(28, 28),
    )
train_classes = {v: k for k, v in train_data_gen.class_indices.items()}

val_data_gen = im_gen_val.flow_from_directory(
    VAL_DIR,
    batch_size=BATCH_SIZE,
    color_mode='grayscale',
    class_mode='sparse',
    seed=42,
    target_size=(28, 28),
    )
val_classes = {v: k for k, v in val_data_gen.class_indices.items()}

test_data_gen = im_gen_val.flow_from_directory(
    TEST_DIR,
    batch_size=BATCH_SIZE,
    color_mode='grayscale',
    class_mode='sparse',
    seed=42,
    target_size=(28, 28),
    )

### Visualize Preprocessed Data

In [None]:
x, y = next(train_data_gen)
for n in range(10):
    ax = plt.subplot(2, 5, n + 1)
    ax.imshow(x[n], cmap='gray')
    ax.set_title(train_classes[y[n]])
    ax.axis('off')

## Keras Model

### Sequential Model

In [None]:
# model = tf.keras.Sequential([
#     InputLayer(input_shape=(28, 28, 1)),
#     Conv2D(filters=32, kernel_size=3, strides=(2, 2), activation='relu'),
#     Dropout(DROPOUT_P),
#     Conv2D(filters=64, kernel_size=3, strides=(2, 2), activation='relu'),
#     Dropout(DROPOUT_P),
#     Flatten(),
#     Dense(10, activation='softmax'),
#     ])

### Functional Model
Allows multiple inputs and outputs

In [None]:
input_0 = Input(shape=(28, 28, 1), name='input_0')
hidden_0 = Conv2D(filters=32, kernel_size=3, strides=(2, 2), activation='relu')(input_0)
hidden_1 = Conv2D(filters=64, kernel_size=3, strides=(2, 2), activation='relu')(hidden_0)
flatten_1 = Flatten()(hidden_1)
output_0 = Dense(10, activation='softmax', name='output_0')(flatten_1)
model = tf.keras.Model(inputs=[input_0], outputs=[output_0])

### Compile Model

<br>
<font color='red'>
    WARNING:<br>
    With the current version of Keras if a saved model is loaded and not compiled inference scores are random.
</font>

In [None]:
def compile_model(m):
    return m.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy'],
        )

In [None]:
if PREVIOUS_BEST_MODEL.exists():
    model = tf.keras.models.load_model(PREVIOUS_BEST_MODEL)
elif PREVIOUS_MODEL_CKPT.exists():
    model = tf.keras.models.load_model(PREVIOUS_MODEL_CKPT)

compile_model(model)

In [None]:
model.summary()

### TensorBoard

<br>
<font color=red>
    Start TensorBoard before fitting the model.
</font>

#### Create Confusion Matrix

In [None]:
def log_confusion_matrix(epoch, logs=None):
    """Save confussion matrix to be displayed in TensorBoard."""
    if epoch > 0 and epoch % VAL_FREQ == 0:
        predict = np.argmax(model.predict(val_data_gen), axis=1)
        cm = tf.math.confusion_matrix(val_data_gen.classes, predict)
        fig = confusion_matrix_fig(cm, CLASSES_IDX)
        cm_image = plotly_static_image(fig)

        file_writer = tf.summary.create_file_writer(str(PLOTS_DIR / 'cm'))
        with file_writer.as_default():
            tf.summary.image("Confusion Matrix", cm_image, epoch)

        
def confusion_matrix_fig(cm: tf.Tensor, classes: Dict[int, str]):
    """
    Generate confusion matrix figure.
    
    :param cm: confusion matrix (r, c = actual, predicted)
    :param classes: dictionary with class index as key and class name as value
    :return: confusion matrix figure
    """
    normalized = cm / tf.math.reduce_sum(cm, axis=1, keepdims=True)
    normalized = tf.linalg.set_diag(normalized, np.zeros((normalized.shape[0])))
    fig = px.imshow(
        normalized,
        color_continuous_scale='gray',
        labels=dict(x='Predicted', y='Actual', color='Error Rate'),
        title='Confusion Matrix Error Rates',
        x=list(range(10)),
        y=list(range(10)),
        )

    fig.update_layout(
        title_text='Confusion Matrix',
        xaxis=dict(
            title='Predicted Class',
            tickvals=tuple(classes.keys()),
            ticktext=tuple(classes.values()),
        ),
        yaxis=dict(
            title='Actual Class',
            tickvals=tuple(classes.keys()),
            ticktext=tuple(classes.values()),
        ),
        )
    
    return fig


def plotly_static_image(fig):
    """
    Convert Plotly figure to a static image.
    
    :param fig: Plotyly figure
    """
    im_bytes = fig.to_image(format='png')
    tf_im = tf.image.decode_png(im_bytes, 3)
    return tf.expand_dims(tf_im, 0)

#### Create Sample Images for TensorBoard

In [None]:
def log_prediction_examples(epoch, logs=None):
    """Save prediction examples to be displayed in TensorBoard."""
    if epoch > 0 and epoch % VAL_FREQ == 0:
        fig = prediction_examples_fig()
        im = pyplot_static_image(fig)

        file_writer = (
            tf.summary.create_file_writer(str(PLOTS_DIR / 'predictions'))
            )
        with file_writer.as_default():
            tf.summary.image("Predictions", im, epoch)
    

def prediction_examples_fig(n_examples: int = BATCH_SIZE):
    """
    Image and prediction percentages.
    
    :param n_examples: number of images to evaluate
    """
    n_examples = n_examples if n_examples <= BATCH_SIZE else BATCH_SIZE
    cols = 2
    rows = int(np.ceil(n_examples / cols))
    fig = plt.figure(figsize=(8 * cols, 4 * rows))
    outer = gridspec.GridSpec(rows, cols, wspace=0.4, hspace=0.4)
    
    x, y = next(val_data_gen)
    predict = model.predict((x, y))
    y_hat = np.argmax(predict, axis=1)

    for n in range(n_examples):
        inner = gridspec.GridSpecFromSubplotSpec(
            1, 2, subplot_spec=outer[n], wspace=0.1, hspace=0.1)
        
        # Image
        ax = plt.Subplot(fig, inner[0])
        ax.imshow(x[n], cmap='gray')
        title = f'{val_classes[y_hat[n]]} {predict[n].max():.0%}'
        if y_hat[n] == y[n]:
            color = 'blue'
        else:
            color = 'red'
            title = title + f'\nActual: {val_classes[y[n]]}'
        ax.set_title(title, fontsize=20, color=color)
        ax.axis('off')
        fig.add_subplot(ax)
        
        # Predictions
        ax = plt.Subplot(fig, inner[1])        
        bar = ax.bar(range(10), predict[n], color='grey')
        bar[y_hat[n]].set_color('red')
        bar[int(y[n])].set_color('blue')
        ax.set_title(f'Example: {n}', fontsize=20, color=color)
        ax.axis('off')
        ax.set_ylim([0, 1])
        fig.add_subplot(ax)
            
    return fig


def pyplot_static_image(fig):
    """
    Convert Matplotlib pyplot figure to a static image.
    
    :param fig: pyplot figure
    """
    buf = io.BytesIO()
    fig.savefig(buf, format='png')
    plt.close(fig)
    buf.seek(0)
    tf_im = tf.image.decode_png(buf.getvalue(), 3)
    return tf.expand_dims(tf_im, 0)

#### Start TensorBoard

In [None]:
%tensorboard --logdir $LOG_DIR --port=6006

### Callbacks

In [None]:
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    MODEL_CKPT,
    save_freq=32,
    )

confusion_matrix_cb = tf.keras.callbacks.LambdaCallback(
    on_epoch_end=log_confusion_matrix
    )

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    monitor='loss',
    patience=16,
    restore_best_weights=True,
    )

prediction_examples_cb = tf.keras.callbacks.LambdaCallback(
    on_epoch_end=log_prediction_examples
    )

tensorboard_cb = tf.keras.callbacks.TensorBoard(RUN_DIR)

### Train Model

`fit()` method
- if dataset is skewed add `class_weight` argument
- use `sample_weight` argument to if the reliability of the label is different per instance (experts evaluated some labels, while others were labeled by an angorithm)

NOTE:
The time when Accuracy is calculated is not the same for the train dataset as the validation dataset.
- Validation: calculated ***end*** of each epoch
- Training: running mean ***durring*** each epoch

To compensate the training metrics should be shifted by $\frac{1}{2}$ an epoch to the left.

In [None]:
history = model.fit(
    train_data_gen,
    epochs=EPOCHS,
    verbose=4,
    validation_data=val_data_gen,
    validation_freq=VAL_FREQ,
    callbacks=[
        checkpoint_cb,
        confusion_matrix_cb,
        early_stopping_cb,
        prediction_examples_cb,
        tensorboard_cb,
    ],
    steps_per_epoch=2 * len(train_data_gen) // BATCH_SIZE,
    workers=2,
    use_multiprocessing=False,
    )

## Evaluate Test Set
- Estimate generalization error

In [None]:
model.evaluate(test_data_gen)

## Save Model

In [None]:
model.save(BEST_MODEL, save_format='h5')

### Save Model Diagram

In [None]:
tf.keras.utils.plot_model(model, to_file=RUN_DIR / 'model_plot.png')

## Load Model

In [None]:
best_model = tf.keras.models.load_model(BEST_MODEL)
compile_model(best_model)

### Verify Results

In [None]:
best_model.evaluate(test_data_gen)