## Inference Recommender Pre-Trained TensorFlow ResNet50

### Local Inference Test

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.applications import resnet50
from tensorflow.keras import backend
import numpy as np
from tensorflow.keras.preprocessing import image

In [None]:
#model = tf.keras.applications.ResNet50()
tf.keras.backend.set_learning_phase(0)
model = resnet50.ResNet50()

# Load the image file, resizing it to 224x224 pixels (required by this model)
img = image.load_img("dog.jpg", target_size=(224, 224))
# Convert the image to a numpy array
x = image.img_to_array(img)
# Add a forth dimension since Keras expects a list of images
x = np.expand_dims(x, axis=0)

# Scale the input image to the range used in the trained network
x = resnet50.preprocess_input(x)

print("predicting model")
predictions = model.predict(x)
predicted_classes = resnet50.decode_predictions(predictions, top=9)
print(predicted_classes)

### Create Model and Payload Tarball

In [None]:
export_dir = "00001"
tf.keras.backend.set_learning_phase(0)
model = tf.keras.applications.ResNet50()

if not os.path.exists(export_dir):
    os.makedirs(export_dir)
    print("Directory ", export_dir, " Created ")
else:
    print("Directory ", export_dir, " already exists")
# Save to SavedModel
model.save(export_dir, save_format="tf", include_optimizer=False)

In [None]:
!tar -cvpzf model.tar.gz ./00001

In [None]:
import json

payload = json.dumps(x.tolist())

In [None]:
with open("payload.json", "w") as outfile:
    outfile.write(payload)

In [None]:
payload_archive_name = "payload.tar.gz"

In [None]:
!tar -cvzf {payload_archive_name} payload.json

### Upload Tarballs to S3

In [None]:
from sagemaker import get_execution_role, Session, image_uris
import boto3
import time

region = boto3.Session().region_name
role = get_execution_role()
sm_client = boto3.client("sagemaker", region_name=region)
sagemaker_session = Session()
print(region)

In [None]:
sample_payload_url = sagemaker_session.upload_data(
    path=payload_archive_name, key_prefix="resnet-payload"
)

In [None]:
model_url = sagemaker_session.upload_data(
    path="model.tar.gz", key_prefix="resnet-model-data"
)

In [None]:
print("Your model artifacts are stored at: {}".format(model_url))
print("Your sample payload is stored at: {}".format(sample_payload_url))

### Model Registry

In [None]:
model_package_group_name = "tf-resnet" + str(round(time.time()))

In [None]:
import sagemaker
from sagemaker.model import Model
from sagemaker import image_uris

model = Model(
    model_data=model_url,
    role=role,
    image_uri = sagemaker.image_uris.retrieve(framework="tensorflow", region=region, version="2.1", py_version="py3", 
                                              image_scope='inference', instance_type="ml.m5.xlarge"),
    sagemaker_session=sagemaker_session
    )

In [None]:
model_package = model.register(
    content_types=["application/json"],
    response_types=["application/json"],
    model_package_group_name=model_package_group_name,
    image_uri=model.image_uri,
    approval_status="Approved",
    framework="TENSORFLOW"
)

### Kick off Inference Recommender Job

In [None]:
model_package.right_size(
    sample_payload_url=sample_payload_url,
    supported_content_types=["application/json"],
    supported_instance_types=["ml.c5.xlarge", "ml.c5.9xlarge", "ml.c5.18xlarge", "ml.m5d.24xlarge"],
    framework="TENSORFLOW",
)

### Advanced IR Job

In [None]:
from sagemaker.parameter import CategoricalParameter 
from sagemaker.inference_recommender.inference_recommender_mixin import (  
    Phase,  
    ModelLatencyThreshold 
) 

hyperparameter_ranges = [ 
    { 
        "instance_types": CategoricalParameter(["ml.c5.9xlarge", "ml.c5.18xlarge", "ml.g4dn.xlarge"]), 
        'OMP_NUM_THREADS': CategoricalParameter(['1', '2', '3']), 
    } 
] 

phases = [ 
    Phase(duration_in_seconds=120, initial_number_of_users=2, spawn_rate=2), 
    Phase(duration_in_seconds=120, initial_number_of_users=4, spawn_rate=2),
    Phase(duration_in_seconds=120, initial_number_of_users=8, spawn_rate=2)
] 

model_latency_thresholds = [ 
    ModelLatencyThreshold(percentile="P95", value_in_milliseconds=300) 
]

In [None]:
model_package.right_size( 
    sample_payload_url=sample_payload_url, 
    supported_content_types=["application/json"], 
    framework="TENSORFLOW", 
    job_duration_in_seconds=3600, 
    hyperparameter_ranges=hyperparameter_ranges, 
    phases=phases, # TrafficPattern 
    max_invocations=100, # StoppingConditions 
    model_latency_thresholds=model_latency_thresholds
)

### Analyze Results

In [None]:
import boto3
sm_client = boto3.client(service_name='sagemaker')

# Enter default or advanced job name here, this value is emitted at the top of the right size API call
job_name = 'Enter your job name here'
inference_recommendation_res = sm_client.describe_inference_recommendations_job(JobName=job_name)


data = [
    {**x["EndpointConfiguration"], **x["ModelConfiguration"], **x["Metrics"]}
    for x in inference_recommendation_res['InferenceRecommendations']
]

In [None]:
import pandas as pd
df = pd.DataFrame(data)
dropFilter = df.filter(["VariantName"])
df.drop(dropFilter, inplace=True, axis=1)
pd.set_option("max_colwidth", 400)
df.head()