# ACME Model ArtifactsExample

>__NOTE:__ This Notebook uses the _Python 3 (Data Science)_ Kernel.

## Configuring the Model Artifact

In [None]:
!mkdir model

### Setup

In [None]:
%%writefile model/model.py
import os
import sys
import json
import re
import traceback
import tensorflow as tf
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn import preprocessing
tf.get_logger().setLevel("ERROR")


prefix = "/opt/ml"
training_input_path = os.path.join(prefix, "input/data")
output_path = os.path.join(prefix, "output")
model_path = os.path.join(prefix, "model")
param_path = os.path.join(prefix, "input/config/hyperparameters.json")

### Training Function

In [None]:
%%writefile -a model/model.py


def train():
    print("Training mode")
    try:
        channel_name = "training"
        training_path = os.path.join(training_input_path, channel_name)
        params = {}
        with open(param_path, "r") as f:
            is_float = re.compile(r'^\d+(?:\.\d+)$')
            is_integer = re.compile(r'^\d+$')
            for key,value in json.load(f).items():
                if is_float.match(value) is not None:
                    value = float(value)
                elif is_integer.match(value) is not None:
                    value = int(value)
                params[key] = value

        input_files = [ os.path.join(training_path, file) for file in os.listdir(training_path) ]
        if len(input_files) == 0:
            raise ValueError((f"There are no files in {training_path}.\\n" +
                              f"This usually indicates that the channel ({channel_name}) was incorrectly specified,\\n" +
                              "the data specification in S3 was incorrectly specified or the role specified\\n" +
                              "does not have permission to access the data."))
        column_names = ["rings", "length", "diameter", "height", "whole weight", "shucked_weight", "viscera_weight", "shell_weight", "sex_F", "sex_I", "sex_M"]
        train_data = pd.read_csv(os.path.join(training_path, "training.csv"), sep=',', names=column_names)
        val_data = pd.read_csv(os.path.join(training_path, "validation.csv"), sep=',', names=column_names)
        train_y = train_data["rings"].to_numpy()
        train_X = train_data.drop(["rings"], axis=1).to_numpy()
        val_y = val_data["rings"].to_numpy()
        val_X = val_data.drop(["rings"], axis=1).to_numpy()
        train_X = preprocessing.normalize(train_X)
        val_X = preprocessing.normalize(val_X)
        network_layers = [
            Dense(64, activation="relu", kernel_initializer="normal", input_dim=10),
            Dense(64, activation="relu"),
            Dense(1, activation="linear")
        ]
        model = Sequential(network_layers)
        model.compile(optimizer="adam", loss="mse", metrics=["mae", "accuracy"])
        model.summary()
        model.fit(train_X, train_y, validation_data=(val_X, val_y),
                  batch_size=params.get("batch_size"), epochs=params.get("epochs"),
                  shuffle=True, verbose=1
        )
        print("Saving Model")
        model.save(filepath=os.path.join(model_path, "model.h5"), overwrite=True, include_optimizer=False, save_format="h5")

    except Exception as e:
        trc = traceback.format_exc()
        with open(os.path.join(output_path, "failure"), "w") as f:
            f.write("Exception during training: {}".format(str(e) + '\\n' + trc))
        print("Exception during training: {}".format(str(e) + '\\n' + trc), file=sys.stderr)
        sys.exit(255)

---

## Create the Application

### Container entrypoint

In [None]:
%%writefile model/app.py
#!/usr/bin/env python

import json
import io
import sys
import os
import signal
import traceback
import flask
import multiprocessing
import subprocess
import tarfile
import model
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn import preprocessing


prefix = "/opt/ml"
model_path = os.path.join(prefix, "model")
sys.path.insert(0,model_path)
model_cache = {}

class PredictionService(object):
    tf_model = None
    @classmethod
    def get_model(cls):
        if cls.tf_model is None:
            cls.tf_model = load_model()
        return cls.tf_model

    @classmethod
    def predict(cls, input):
        tf_model = cls.get_model()
        return tf_model.predict(input)

def load_model():
    model = tf.keras.models.load_model(os.path.join(model_path, "model.h5"))
    model.compile(optimizer="adam", loss="mse")
    return model

def sigterm_handler(nginx_pid, gunicorn_pid):
    try:
        os.kill(nginx_pid, signal.SIGQUIT)
    except OSError:
        pass
    try:
        os.kill(gunicorn_pid, signal.SIGTERM)
    except OSError:
        pass

    sys.exit(0)

def start_server(timeout, workers):
    print(f"Starting the inference server with {model_server_workers} workers")
    subprocess.check_call(["ln", "-sf", "/dev/stdout", "/var/log/nginx/access.log"])
    subprocess.check_call(["ln", "-sf", "/dev/stderr", "/var/log/nginx/error.log"])
    nginx = subprocess.Popen(["nginx", "-c", "/opt/program/nginx.conf"])
    gunicorn = subprocess.Popen(["gunicorn",
                                 "--timeout", str(timeout),
                                 "-k", "gevent",
                                 "-b", "unix:/tmp/gunicorn.sock",
                                 "-w", str(workers),
                                 "wsgi:app"])

    signal.signal(signal.SIGTERM, lambda a, b: sigterm_handler(nginx.pid, gunicorn.pid))
    pids = set([nginx.pid, gunicorn.pid])
    while True:
        pid, _ = os.wait()
        if pid in pids:
            break
    sigterm_handler(nginx.pid, gunicorn.pid)
    print("Inference server exiting")


app = flask.Flask(__name__)


@app.route("/ping", methods=["GET"])
def ping():
    health = PredictionService.get_model() is not None
    status = 200 if health else 404
    return flask.Response(response="\n", status=status, mimetype="application/json")


@app.route("/invocations", methods=["POST"])
def invoke():
    data = None
    if flask.request.content_type == "text/csv":
        payload = np.fromstring(flask.request.data.decode('utf-8'), sep=",")
        data = payload.reshape(1, -1)
    else:
        return flask.Response(response="Invalid request data type, only 'text/csv' is supported.", status=415, mimetype="text/plain")
    predictions = PredictionService.predict(data)
    out = io.StringIO()
    pd.DataFrame({"results": predictions.flatten()}).to_csv(out, header=False, index=False)
    result = out.getvalue()
    print(f"Prediction Result: {result}")
    return flask.Response(response=result, status=200, mimetype="text/csv")


if __name__ == "__main__":
    print(f"Tensorflow Version: {tf.__version__}")
    if len(sys.argv) < 2 or ( not sys.argv[1] in [ "serve", "train"] ):
        raise Exception("Invalid argument: you must specify 'train' for training mode, 'serve' for predicting mode, 'preprocess' for preprocessing mode or 'evaluate' for evaluation mode.") 
    train = sys.argv[1] == "train"
    if train:
        model.train()
    else:
        cpu_count = multiprocessing.cpu_count()
        model_server_timeout = os.environ.get('MODEL_SERVER_TIMEOUT', 60)
        model_server_workers = int(os.environ.get('MODEL_SERVER_WORKERS', cpu_count))
        start_server(model_server_timeout, model_server_workers)

### Nginx Configuration

In [None]:
%%writefile model/nginx.conf
worker_processes 1;
daemon off;

pid /tmp/nginx.pid;
error_log /var/log/nginx/error.log;

events {

}

http {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /var/log/nginx/access.log combined;
  
  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }

  server {

    listen 8080 deferred;
    client_max_body_size 5m;

    keepalive_timeout 5;

    location ~ ^/(ping|invocations) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redirect off;
      proxy_pass http://gunicorn;
    }

    location / {
      return 404 "{}";
    }
  }
}

### Web Server Gateay Application

In [None]:
%%writefile model/wsgi.py
import app as myapp
app = myapp.app

---

## Create the Dockerfile

In [None]:
%%writefile model/Dockerfile
FROM public.ecr.aws/ubuntu/ubuntu:20.04

ENV DEBIAN_FRONTEND noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN true
ENV KMP_AFFINITY=granularity=fine,compact,1,0
ENV KMP_BLOCKTIME=1
ENV KMP_SETTINGS=0

ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING=UTF-8
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8

ARG PYTHON=python3.7
ARG PYTHON_PIP=python3-pip
ARG PIP=pip3
ARG PYTHON_VERSION=3.7.10
ARG TF_URL=https://aws-tensorflow-binaries.s3-us-west-2.amazonaws.com/tensorflow/r2.5_aws/20210820_112241/cpu/py37/tensorflow_cpu-2.5.1-cp37-cp37m-manylinux2010_x86_64.whl

RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    openssh-client \
    openssh-server \
    ca-certificates \
    curl \
    emacs \
    git \
    libssl1.1 \
    libtemplate-perl \
    openssl \
    wget \
    vim \
    zlib1g-dev \
    libgtk2.0-dev \
 && rm -rf /var/lib/apt/lists/*

ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
ENV PATH /usr/local/openmpi/bin/:$PATH

WORKDIR /

RUN apt-get update \
 && apt-get install -y --no-install-recommends \
    libbz2-dev \
    libc6-dev \
    libffi-dev \
    libgdbm-dev \
    liblzma-dev \
    libncursesw5-dev \
    libreadline-gplv2-dev \
    libsqlite3-dev \
    libssl-dev \
    tk-dev \
    nginx \
 && rm -rf /var/lib/apt/lists/* \
 && apt-get clean

RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
 && tar -xvf Python-$PYTHON_VERSION.tgz \
 && cd Python-$PYTHON_VERSION \
 && ./configure && make && make install \
 && make && make install && rm -rf ../Python-$PYTHON_VERSION*

RUN ${PIP} --no-cache-dir install --upgrade \
    pip \
    setuptools

RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \
 && ln -s $(which ${PIP}) /usr/bin/pip

RUN ${PIP} install --no-cache-dir -U \
    numpy==1.19.1 \
    scipy==1.5.2 \
    scikit-learn==0.23 \
    pandas==1.1 \
    Pillow==7.2.0 \
    python-dateutil==2.8.1 \
    "pyYAML>=5.4.1,<5.5" \
    requests==2.24.0 \
    "awscli<2" \
    opencv-python==4.3.0.36 \
    && ${PIP} install --no-cache-dir -U \
    h5py==3.1.0 \
    "absl-py>=0.9,<0.11" \
    werkzeug==1.0.1 \
    psutil==5.7.2 \
    flask \
    gevent \
    gunicorn \
    pyarrow==2 \
    awswrangler \
    "markupsafe==2.0.1" \
    protobuf==3.19.0

RUN ${PIP} install --no-cache-dir -U \
    ${TF_URL}

RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.6/license.txt -o /license.txt

RUN mkdir -p /opt/program
RUN mkdir -p /opt/ml

COPY app.py /opt/program
COPY model.py /opt/program
COPY nginx.conf /opt/program
COPY wsgi.py /opt/program

WORKDIR /opt/program

EXPOSE 8080

ENTRYPOINT ["python", "app.py"]