In [None]:
import os
import time
import urllib3
import requests

import yaml
import mlflow
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

%matplotlib inline
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [None]:
TRAIN_EPOCHS = 10 # Number of epochs to train
OUTPUTS_MIDDLE_LAYER = 1024 # Number of units for Dense layer

In [None]:
def mnist_datasets():
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(path='/mnt/shared/ezua-tutorials/Data-Science/Kubeflow-GPU/mnist.npz')
    x_train, x_test = x_train / np.float32(255), x_test / np.float32(255)
    y_train, y_test = y_train.astype(np.int64), y_test.astype(np.int64)
    return x_train, x_test, y_train, y_test

In [None]:
def create_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(OUTPUTS_MIDDLE_LAYER, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    return model

In [None]:
def compile_model(model):
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

In [None]:
def train_model(model, x_train, x_test, y_test, y_train):
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    print()
    # model fitting
    start = time.time()
    history = model.fit(
        x_train, y_train, epochs=TRAIN_EPOCHS,
        validation_data=(x_test, y_test)
    )
    duration_total = time.time() - start
    print()
    print('Total time %f sec' % (duration_total))
    return history, duration_total

In [None]:
# Load dataset
x_train, x_test, y_train, y_test = mnist_datasets()

In [None]:
# Create the model
model_mnist = create_model()

In [None]:
# Train and evaluate for a set number of epochs.
compile_model(model_mnist)
history, duration = train_model(model_mnist, x_train, x_test, y_test, y_train)
acc = history.history['accuracy'][-1]
print(f"Accuracy: {acc * 100}%")

In [None]:
# Save model in s3 via mlflow
mlflow.set_experiment("trained_mnist_model")
signature = mlflow.models.signature.infer_signature(x_test, y_test)
mlflow_tf_model_save_path_backup = mlflow.tensorflow._MODEL_SAVE_PATH
mlflow.tensorflow._MODEL_SAVE_PATH += "/1"
model_info = mlflow.tensorflow.log_model(model=model_mnist, artifact_path="model", signature=signature, registered_model_name="tf-mnist-model")
mlflow.tensorflow._MODEL_SAVE_PATH = mlflow_tf_model_save_path_backup

In [None]:
# Get URI of saved model in s3 object store
saved_model_path = os.path.join(mlflow.get_run(model_info.run_id).info.artifact_uri, model_info.artifact_path, "data", mlflow.tensorflow._MODEL_SAVE_PATH)

In [None]:
# Set current JWT token as access and secret key for local minio
!sed -e "s/\$AUTH_TOKEN/$AUTH_TOKEN/" /mnt/shared/ezua-tutorials/Data-Science/Kubeflow-GPU/object_store_sa_secret.yaml.tpl > /tmp/object_store_sa_secret.yaml

In [None]:
# Create service account and secret in order to allow kserve access object store
!kubectl apply -f /tmp/object_store_sa_secret.yaml

In [None]:
# Create inferenceservice for trained mnist model
with open('/mnt/shared/ezua-tutorials/Data-Science/Kubeflow-GPU/gpu_mnist_inferenceservice.yaml', 'r') as file:
    inferenceservice_yaml_data = yaml.safe_load(file)

inferenceservice_yaml_data["spec"]["predictor"]["tensorflow"]["storageUri"] = saved_model_path

with open('/tmp/gpu_mnist_inferenceservice.yaml', 'w') as yaml_file:
    yaml.dump(inferenceservice_yaml_data, yaml_file)

!kubectl apply -f /tmp/gpu_mnist_inferenceservice.yaml

In [None]:
!until kubectl get pods -l serving.kserve.io/inferenceservice=gpu-mnist-inferenceservice | grep -q "Running"; do sleep 10; done
!echo "Inferenceservice is running"
!kubectl wait --for=condition=ready pod -l serving.kserve.io/inferenceservice=gpu-mnist-inferenceservice --timeout=600s

In [None]:
# Make a call to inferenceservice
URL = f"http://gpu-mnist-inferenceservice-predictor-default.{os.getenv('JUPYTERHUB_USER')}.svc.cluster.local/v1/models/gpu-mnist-inferenceservice:predict"
count_input_images = 10
test_images_data = x_test[0:count_input_images]
inputs = {"inputs": test_images_data[np.newaxis, ...].tolist()}
headers = {"Authorization": f"Bearer {os.getenv('AUTH_TOKEN')}"}
response = requests.post(URL, json=inputs, headers=headers)

print(f"Status: {response.reason}")
print(f"JSON data: {response.json()}")

In [None]:
# Parse predicted numbers from inferenceservice output
outputs = response.json()["outputs"]
predicted_numbers = [np.argmax(x) for x in outputs]
print(f"Numbers predicted by inferenceservice from input: {predicted_numbers}")

In [None]:
# Show input images with corresponding predicted numbers
num_col = 5
num_row = int(np.ceil(count_input_images / num_col))

# plot images
fig, axes = plt.subplots(num_row, num_col, figsize=(1.5 * num_col, 2 * num_row))
for i in range(count_input_images):
    ax = axes[i // num_col, i % num_col]
    ax.imshow(test_images_data[i], cmap='gray')
    ax.set_title('Predicted: {}'.format(predicted_numbers[i]))
plt.tight_layout()
plt.show()