In [None]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

<a target="_blank" href="https://colab.research.google.com/github/GoogleCloudPlatform/keras-idiomatic-programmer/blob/master/workshops/Idiomatic%20Programmer%20-%20handbook%203%20-%20Codelab%202.ipynb">
<img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>

# Idiomatic Programmer Code Labs

## Code Labs #2 - Get Familiar with Training

## Prerequistes:

    1. Familiar with Python
    2. Completed Handbook 3/Part 11: Training & Deployment

## Objectives:

    1. Pretraining for Weight Initialization
    2. Early Stopping during Training
    3. Model Saving and Restoring

## Pretraining

We are going to do some pre-training runs to find a good initial weight initialization. Each time the weights are initialized, they are randomly choosen from the selected distribution (i.e., kernel_initializer).

We will do the following:

    1. Make three instances of the same model, each with their own weight initialization.
    2. Take a subset of the training data (20%)
    3. Train each model instance for a few epochs.
    4. Pick the instance with the highest valuation accuracy.
    5. Use this instance to train the model with the entire training data.

You fill in the blanks (replace the ??), make sure it passes the Python interpreter.

In [None]:
from keras import Sequential, optimizers
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.utils import to_categorical
from keras.datasets import cifar10
import numpy as np

# Let's use the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize the pixel data
x_train = (x_train / 255.0).astype(np.float32)
x_test  = (x_test  / 255.0).astype(np.float32)

# One-hot encode the labels
y_train = to_categorical(y_train)
y_test  = to_categorical(y_test)

# Let's take a fraction of the training data to test the weight initialization (20%)
# Generally, we like to use all the training data for this purpose, but for brevity we will use 20%
x_tmp = x_train[0:10000]
y_tmp = y_train[0:10000]

# We will use this function to build a simple CNN, using He-Normal initialization for the weights.
def convNet(input_shape, nclasses):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', kernel_initializer='he_normal',
                     input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.25))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer='he_normal'))
    model.add(Dense(nclasses, activation='softmax'))
    return model

# Let's make 3 versions of the model, each with their own weight initialization.
models = []
for _ in range(3):
    model = convNet((32, 32, 3), 10)
    # We will use (assume best) learning rate of 0.001
    model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=0.001), metrics=['accuracy'])

    # Let's do the short training of 20% of training data for 5 epochs.
    model.fit(x_tmp, y_tmp, epochs=5, batch_size=32, validation_split=0.1, verbose=1)
    
    # Save a copy of the model
    # HINT: We are saving the in-memory partially trained model
    models.append(??)

In [None]:
# Now let's pick the model instance with the highest val_acc and train it with the full training data
# HINT: Index will be 0, 1 or 2
models[1].fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1)

score = model.evaluate(x_test, y_test)
print(score)

## EarlyStopping

Note that the training accuracy in the above example keeps going up, but at some point the validation loss swings back up and validation goes down. That means you are overfitting -- even with the dropout.

Let's now look on how to decide how many epochs we should run. We can use early stopping technique. In this case, we set the number of epochs larger than we anticipate, and then set an objective to reach. When the objective is reached, we stop training.

In [None]:
from keras.callbacks import EarlyStopping

# Let's try this with a fresh model, and not care about the weight initialization this time.
model = convNet((32, 32, 3), 10)
model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=0.001), metrics=['accuracy'])

# Set an early stop (termination of training) when the valuation loss has stopped 
# reducing (default setting).
earlystop = EarlyStopping(monitor='val_loss')

# Train the model and use early stop to stop training early if the valuation loss 
# stops decreasing.
# HINT: what goes in the callbacks list is the instance (variable) of the EarlyStopping object
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1, callbacks=[??])

## Model Saving and Restoring

Let's do a basic store of the model and weights to disk, and then mimic restoring the model from disk to in memory.

In [None]:
from keras.models import load_model

# Save the model and trained weights and biases.
model.save('mymodel.h5')

# load a pre-trained model as a different model instance (mymodel instead of model)
mymodel = load_model('mymodel.h5')

# Let's verify we really do that.
score = mymodel.evaluate(x_test, y_test)
print(score)


## End of Code Lab