In [None]:
import os
import time

import numpy as np
import tensorflow as tf

In [None]:
TRAIN_EPOCHS = 10 # Number of epochs to train
OUTPUTS_MIDDLE_LAYER = 1024 # Number of units for Dense layer

In [None]:
def mnist_datasets():
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(path='/mnt/shared/ezua-tutorials/Data-Science/Kubeflow-GPU/mnist.npz')
    x_train, x_test = x_train / np.float32(255), x_test / np.float32(255)
    y_train, y_test = y_train.astype(np.int64), y_test.astype(np.int64)
    return x_train, x_test, y_train, y_test

In [None]:
def create_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(OUTPUTS_MIDDLE_LAYER, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    return model

In [None]:
def compile_model(model):
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

In [None]:
def train_model(model, x_train, x_test, y_test, y_train):
    # model fitting
    start = time.time()
    history = model.fit(
        x_train, y_train, epochs=TRAIN_EPOCHS,
        validation_data=(x_test, y_test)
    )
    duration_total = time.time() - start
    print()
    print('Total time %f sec' % (duration_total))
    return history, duration_total

In [None]:
# Clean previously saved models
model_dir = '/mnt/user/mnist-gpu-test'
if tf.io.gfile.exists(model_dir):
    tf.io.gfile.rmtree(model_dir)

In [None]:
# Load dataset
x_train, x_test, y_train, y_test = mnist_datasets()

In [None]:
# create dir for Kale
if tf.io.gfile.exists('/marshal'):
    tf.io.gfile.mkdir('/marshal/model_mnist.tfkeras')

In [None]:
# Train and evaluate for a set number of epochs.
with tf.device('/CPU:0'):
    # Create the model
    model_mnist_cpu = create_model()
    # Compile and train
    compile_model(model_mnist_cpu)
    history_cpu, cpu_duration = train_model(model_mnist_cpu, x_train, x_test, y_test, y_train)
    cpu_acc = history_cpu.history['accuracy'][-1]
    print(f"Accuracy: {cpu_acc * 100}%")

In [None]:
# Train and evaluate for a set number of epochs.
with tf.device('/GPU:0'):
    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
    print()
    # Create the model
    model_mnist_gpu = create_model()
    # Compile and train
    compile_model(model_mnist_gpu)
    history_gpu, gpu_duration = train_model(model_mnist_gpu, x_train, x_test, y_test, y_train)
    gpu_acc = history_gpu.history['accuracy'][-1]
    print(f"Accuracy: {gpu_acc * 100}%")

In [None]:
# Save model
export_path_cpu = os.path.join(model_dir, 'cpu', 'export')
export_path_gpu = os.path.join(model_dir, 'gpu', 'export')
tf.saved_model.save(model_mnist_cpu, export_path_cpu)
tf.saved_model.save(model_mnist_gpu, export_path_gpu)

In [None]:
model_accuracy = max(cpu_acc, gpu_acc)

In [None]:
print("======= Timing =======")
print(f"CPU time: {cpu_duration} sec")
print(f"GPU time: {gpu_duration} sec")

In [None]:
print(model_accuracy)