In [None]:
!pip install matplotlib numpy scikit-learn tensorflow boto3 botocore

In [None]:
from IPython.core.display import HTML

HTML("<script>Jupyter.notebook.kernel.restart()</script>")

## Download Datasets

In [None]:
from os import environ, path
from urllib.parse import urlparse
from pathlib import Path
from typing import List
import os

import boto3


def get_s3_client():
    # allow s3 connection without creds
    if "AWS_ACCESS_KEY_ID" not in environ:
        from botocore import UNSIGNED
        from botocore.client import Config

        return boto3.client("s3", config=Config(signature_version=UNSIGNED))
    return boto3.client("s3")


def download_s3(bucket, key, outdir):
    s3 = get_s3_client()
    s3_object = s3.get_object(Bucket=bucket, Key=key)
    stream = s3_object["Body"]
    outfile = path.join(outdir, key)
    filepath = path.abspath(outfile)
    parent_dir = path.dirname(outfile)
    Path(parent_dir).mkdir(parents=True, exist_ok=True)
    with open(outfile, "wb+") as f:
        f.write(stream.read())
    print(f"file saved to: {outfile}")


def parse_s3_url(url):
    print(f"downloading: {url}")
    u = urlparse(url)
    bucket = u.netloc.split(".")[0]
    key = u.path.strip("/")
    return bucket, key


def download_s3_dir(data_urls: List[str], data_dir: str):
    """Download objects from S3"""

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    for data_url in data_urls:
        bucket, key = parse_s3_url(data_url)
        download_s3(bucket, key, data_dir)


def download_data(data_dir: OutputPath(str), data_urls: str):
    # data_urls must be type string because kubeflow has no registered serializers for type "typing.List[str]"
    download_s3_dir(data_urls.split(","), data_dir)
    print("downloads complete")

## Train Model

In [None]:
from os import path

import tensorflow as tf
from tensorflow import keras


def gen_log_dirname(log_dir) -> str:
    return path.join(log_dir, "tensorboard", "fit")


def load_mnist(filepath, kind, normalize=True):
    import gzip
    import numpy as np

    """Load MNIST data from `filepath`"""
    labels_path = path.join(filepath, f"{kind}-labels-idx1-ubyte.gz")
    images_path = path.join(filepath, f"{kind}-images-idx3-ubyte.gz")

    with gzip.open(labels_path, "rb") as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)

    with gzip.open(images_path, "rb") as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(
            len(labels), 28, 28
        )

    # normalize by dividing each pixel value by 255.0. This places the pixel value within the range 0 and 1.
    if normalize:
        images = images / 255.0

    return images, labels


def learning_rate(batch_size):
    # gradually reduce the learning rate during training
    return keras.optimizers.schedules.InverseTimeDecay(
        0.001, decay_steps=batch_size * 1000, decay_rate=1, staircase=False
    )


def create_model(batch_size):
    model = keras.Sequential(
        [
            keras.layers.Flatten(input_shape=(28, 28)),
            keras.layers.Dense(128, activation="relu"),
            keras.layers.Dropout(0.5),
            keras.layers.Dense(128, activation="relu"),
            keras.layers.Dense(10),
        ]
    )
    model.compile(
        optimizer=keras.optimizers.Adam(
            # learning_rate=1e-3,
            learning_rate=learning_rate(batch_size),
        ),
        # https://keras.io/api/metrics/probabilistic_metrics/#sparsecategoricalcrossentropy-class
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=["accuracy"],
    )
    model.summary()
    return model


def train_model(
    model_dir: str,
    data_dir: str,
    log_dir: str,
    epochs: int = 5,
) -> str:  # noqa: F821
    """Trains a model and saves to model dir and returns path to tensorboard logs."""

    train_images, train_labels = load_mnist(data_dir, kind="train", normalize=True)
    test_images, test_labels = load_mnist(data_dir, kind="t10k", normalize=True)

    tensorboard_log_dir = gen_log_dirname(log_dir)

    batch_size = len(train_images)
    model = create_model(batch_size)

    model.fit(
        x=train_images,
        y=train_labels,
        epochs=epochs,
        shuffle=True,
        # tensorboard args
        validation_data=(test_images, test_labels),
        callbacks=[
            tf.keras.callbacks.TensorBoard(
                log_dir=tensorboard_log_dir, histogram_freq=1
            )
        ],
    )

    model.save(model_dir, include_optimizer=True)
    return tensorboard_log_dir

## Evaluate Model

In [None]:
import json

import tensorflow as tf
from tensorflow import keras
from tensorflow.python.lib.io import file_io
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score

from typing import NamedTuple
from collections import namedtuple


def write_cm_to_csv(cm, class_labels, cm_path):
    data = []
    for target_index, target_row in enumerate(cm):
        for predicted_index, count in enumerate(target_row):
            data.append(
                (class_labels[target_index], class_labels[predicted_index], count)
            )

    df_cm = pd.DataFrame(data, columns=["target", "predicted", "count"])
    with file_io.FileIO(cm_path, "w") as f:
        df_cm.to_csv(
            f, columns=["target", "predicted", "count"], header=False, index=False
        )


def predict(model, test_images):
    # Define a Softmax layer to define outputs as probabilities
    probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])
    predictions = probability_model.predict(test_images)
    return np.ravel(np.matrix(predictions).argmax(1))


def evaluate_model(
    metrics_path: str,
    data_dir: str,
    model_dir: str,
) -> NamedTuple(
    "output",
    # https://www.kubeflow.org/docs/pipelines/sdk/pipelines-metrics/
    # The output name must be MLPipeline Metrics or MLPipeline_Metrics (case does not matter).
    [("mlpipeline_ui_metadata", "UI_metadata"), ("mlpipeline_metrics", "Metrics")],
):

    test_images, test_labels = load_mnist(data_dir, kind="t10k", normalize=False)
    model = keras.models.load_model(model_dir)

    loss, accuracy = model.evaluate(test_images, test_labels)
    metrics = {
        "metrics": [
            {"name": "loss", "numberValue": str(loss), "format": "PERCENTAGE"},
            {"name": "accuracy", "numberValue": str(accuracy), "format": "PERCENTAGE"},
        ]
    }
    with open(metrics_path, "w+") as f:
        json.dump(metrics, f)

    print_output = namedtuple(
        # "pipeline_metrics" is hardcoded value that could be anything
        "output",
        ["pipeline_metrics"],
    )
    return print_output(json.dumps(metrics))

## Export Model to S3

In [None]:
from os import environ, path
import boto3
from botocore.exceptions import ClientError


def get_s3_client():
    # allow s3 connection without creds
    if "AWS_ACCESS_KEY_ID" not in environ:
        from botocore import UNSIGNED
        from botocore.client import Config

        return boto3.client("s3", config=Config(signature_version=UNSIGNED))
    return boto3.client("s3")


def upload_file(s3_client, file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """
    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    return s3_client.upload_file(file_name, bucket, object_name)


def bucket_exists(s3_client, bucket_name):
    exists = True
    try:
        s3_client.head_bucket(Bucket=bucket_name)
    except botocore.exceptions.ClientError as e:
        # If a client error is thrown, then check that it was a 404 error.
        # If it was a 404 error, then the bucket does not exist.
        error_code = e.response["Error"]["Code"]
        if error_code == "404":
            exists = False
    return exists


def s3_upload_dir(
    src_dir: str,
    bucket_name: str,
    bucket_dir: str,
    # model_dir: InputPath(str), bucket_name: str, bucket_dir: str,
):
    s3_client = get_s3_client()

    if not bucket_exists(s3_client, bucket_name):
        raise Exception(f"Bucket: {bucket_name} does not exist")

    for root, dirs, files in os.walk(model_dir):
        for name in files:
            local_path = os.path.join(root, name)
            upload_file(
                s3_client,
                local_path,
                bucket_name,
                f"{bucket_dir}/{os.path.relpath(local_path, model_dir)}",
            )

    response = s3_client.list_objects(Bucket=bucket_name)
    print(f"All objects in {bucket_name}:")

    for file in response["Contents"]:
        print(f"{bucket_name}/{file['Key']}")

## Run entire pipeline

In [None]:
!pip install python-dotenv
from dotenv import load_dotenv

# %load_ext dotenv
# !dotenv --help
# !dotenv load -f '../.env' set
load_dotenv(dotenv_path="../.env")

In [None]:
model_version = 1

# download_data component
data_dir = os.path.join("tmp", "datasets")
mnist_data_s3_urls = [
    "https://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz",
    "https://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz",
    "https://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz",
    "https://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz",
]

# train_model component
model_dir = os.path.join("tmp", "model", str(model_version))
log_dir = os.path.join("tmp", "logs")
metadata_file = os.path.join("tmp", "metadata.json")

# evaluate_model component
metrics_path = os.path.join("tmp", "metrics.json")

# export model component
bucket_name = "kfaas-demo-data-sandbox"
# bucket_name = "kfaas-demo-data-prod"

client = "demo"
model_name = "fashion_model"
bucket_dir_tensorboard = f"{client}/tensorboard/{model_name}/{model_version}"
bucket_dir_model = f"{client}/{model_name}/{model_version}"

# run pipeline
download_data(data_dir, ",".join(mnist_data_s3_urls))
tensorboard_log_dir = train_model(
    model_dir=model_dir,
    data_dir=data_dir,
    log_dir=log_dir,
    epochs=10,
)
print(f"tensorboard_log_dir={tensorboard_log_dir}")
evaluate_model(metrics_path, data_dir, model_dir)
!saved_model_cli show --dir "tmp/model/1" --all
# s3_upload_dir(model_dir, bucket_name, bucket_dir_model)
s3_upload_dir(tensorboard_log_dir, bucket_name, bucket_dir_model)

## Run Tensorboard on model training logs


In [None]:
!python -c "import sklearn; sklearn.show_versions()"
# Load the TensorBoard notebook extension
%reload_ext tensorboard
%tensorboard --logdir tmp/logs/tensorboard/fit
!echo "go to http://localhost:6006/"

In [None]:
from tensorboard import notebook

notebook.list()  # View open TensorBoard instances

## Tensorflow Serving
- Do not include model version in source path (ie NOT `${PWD}/tmp/model/1`)
- Do not use environment variables, must hardcode model_name
### Run with docker
```console
cd notebooks
docker run -p 8501:8501 \
--mount type=bind,source=$PWD/tmp/model,target=/models/fashion_model \
-e MODEL_NAME=fashion_model -t tensorflow/serving
```

### Query
```shell
http get http://0.0.0.0:8501/v1/models/fashion_model
http get http://0.0.0.0:8501/v1/models/fashion_model/metatdata
```

### Install TFServing with APT
```bash
sudo bash
echo "deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal" | tee /etc/apt/sources.list.d/tensorflow-serving.list && \
curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -
apt update
apt-get install tensorflow-model-server
```

### Run with APT installed
```bash
export MODEL_NAME=fashion_model
tensorflow_model_server \
  --rest_api_port=8501 \
  --model_name=fashion_model \
  --model_base_path="$PWD/tmp/model"
```

In [None]:
!pip install requests

### Query Tensorflow Server API

In [None]:
class_labels = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

In [None]:
import matplotlib.pyplot as plt
import requests
import json


def show(idx, title):
    plt.figure()
    plt.imshow(test_images[idx].reshape(28, 28))
    plt.axis("off")
    plt.title("\n\n{}".format(title), fontdict={"size": 16})


def query_api(model_name, data, version=1, port=8501):
    headers = {"content-type": "application/json"}
    r = requests.post(
        f"http://0.0.0.0:{port}/v1/models/{model_name}/versions/{version}:predict",
        # f"http://0.0.0.0:{port}/v1/models/{model_name}:predict",
        data=json.dumps(data),
        headers=headers,
    )
    res = r.json()
    if "error" in res:
        raise Exception(res["error"])

    # for i in range(0,len(json.loads(data)["instances"])):
    for i, pred in enumerate(res["predictions"]):
        show(
            i,
            "The model thought this was a {} (class {}), and it was actually a {} (class {})".format(
                class_labels[np.argmax(pred)],
                np.argmax(pred),
                class_labels[data["labels"][i]],
                data["labels"][i],
            ),
        )

In [None]:
import json

test_images, test_labels = load_mnist(data_dir, kind="t10k", normalize=True)
data = {
    "signature_name": "serving_default",
    "instances": test_images[0:3].tolist(),
    "labels": test_labels[0:3].tolist(),
}
query_api(model_name, data, model_version, port=8501)

In [None]:
import random

test_images, test_labels = load_mnist(data_dir, kind="t10k", normalize=True)
rando = random.randint(0, len(test_images) - 1)
show(rando, "An Example Image: {}".format(class_labels[test_labels[rando]]))