In [9]:
@component(base_image="public.ecr.aws/j1r0q0g6/notebooks/notebook-servers/jupyter-tensorflow-full:v1.5.0")
def model_building(no_epochs: int = 1, optimizer: str = "adam"):
    from tensorflow import keras
    import tensorflow as tf
    from minio import Minio
    import numpy as np
    import pandas as pd
    import json

    minio_client = Minio(
        "100.65.11.110:9000",
        access_key="minio",
        secret_key="minio123",
        secure=False
    )
    minio_bucket = "mlpipeline"

    model = keras.models.Sequential([
        keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28,28,1)),
        keras.layers.MaxPool2D(2, 2),
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dense(10, activation='softmax')
    ])

    stringlist = []
    model.summary(print_fn=lambda x: stringlist.append(x))
    metric_model_summary = "\n".join(stringlist)

    model.compile(optimizer=optimizer,
                  loss="sparse_categorical_crossentropy",
                  metrics=['accuracy'])

    minio_client.fget_object(minio_bucket,"x_train","/tmp/x_train.npy")
    x_train = np.load("/tmp/x_train.npy")

    minio_client.fget_object(minio_bucket,"y_train","/tmp/y_train.npy")
    y_train = np.load("/tmp/y_train.npy")

    history = model.fit(x_train, y_train, epochs=no_epochs, batch_size=20)

    minio_client.fget_object(minio_bucket,"x_test","/tmp/x_test.npy")
    x_test = np.load("/tmp/x_test.npy")

    minio_client.fget_object(minio_bucket,"y_test","/tmp/y_test.npy")
    y_test = np.load("/tmp/y_test.npy")

    model_loss, model_accuracy = model.evaluate(x_test, y_test)

    test_predictions = model.predict(x_test)
    test_predictions = np.argmax(test_predictions, axis=1)

    confusion_matrix = tf.math.confusion_matrix(labels=y_test, predictions=test_predictions).numpy()
    vocab = list(np.unique(y_test))
    data = [(vocab[target_index], vocab[predicted_index], count)
            for target_index, target_row in enumerate(confusion_matrix)
            for predicted_index, count in enumerate(target_row)]

    df_cm = pd.DataFrame(data, columns=['target', 'predicted', 'count'])
    df_cm.to_csv("/tmp/confusion_matrix.csv", header=True, index=False)

    keras.models.save_model(model, "/tmp/detect-digits")

    def upload_local_directory_to_minio(local_path, bucket_name, minio_path):
        import os, glob
        assert os.path.isdir(local_path)
        for local_file in glob.glob(local_path + '/**', recursive=True):
            local_file = local_file.replace(os.sep, "/")
            if not os.path.isfile(local_file):
                continue
            remote_path = os.path.join(minio_path, local_file[1 + len(local_path):]).replace(os.sep, "/")
            minio_client.fput_object(bucket_name, remote_path, local_file)

    upload_local_directory_to_minio("/tmp/detect-digits", minio_bucket, "models/detect-digits/1/")

    print("Saved model to minIO")

    metadata = {
        'outputs': [{
            'type': 'confusion_matrix',
            'format': 'csv',
            'source': '/tmp/confusion_matrix.csv',
            'schema': [
                {'name': 'target', 'type': 'CATEGORY'},
                {'name': 'predicted', 'type': 'CATEGORY'},
                {'name': 'count', 'type': 'NUMBER'}
            ],
            'storage': 'inline'
        }]
    }

    metrics = {
        'metrics': [
            {'name': 'accuracy', 'numberValue': model_accuracy, 'format': 'PERCENTAGE'},
            {'name': 'loss', 'numberValue': model_loss, 'format': 'RAW'}
        ]
    }

    from collections import namedtuple
    output = namedtuple('output', ['mlpipeline_ui_metadata', 'mlpipeline_metrics'])
    return output(json.dumps(metadata), json.dumps(metrics))
