In [6]:
import sagemaker, boto3
from sagemaker import image_uris, model_uris, script_uris
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base
from sagemaker.session import Session



# model_version="*" fetches the latest version of the model.
infer_model_id, infer_model_version = "tensorflow-ic-imagenet-mobilenet-v2-100-224-classification-4", "*"
endpoint_name = name_from_base(f"slf-serverless")

inference_instance_type = "ml.m5.xlarge"

sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()
aws_region = boto3.Session().region_name
bucket = sagemaker_session.default_bucket()

In [7]:
# Retrieve the inference docker container uri.
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=infer_model_id,
    model_version=infer_model_version,
    instance_type=inference_instance_type,
)
# Retrieve the inference script uri.
deploy_source_uri = script_uris.retrieve(
    model_id=infer_model_id, model_version=infer_model_version, script_scope="inference"
)

# Assemble the URI to the model we have trained for slf
input_bucket = bucket
input_prefix = "slf-classifier-training/output/slf-tensorflow-ic-imagenet-mobilenet-v2-2023-01-25-03-06-14-286/output" # Not sure how to autogen
input_model_name = "model.tar.gz"
base_model_uri = f"s3://{input_bucket}/{input_prefix}/{input_model_name}"


# Create the SageMaker model instance. Note that we need to pass Predictor class when we deploy model through Model class,
# for being able to run inference through the sagemaker API.
model = Model(
    image_uri=deploy_image_uri,
    source_dir=deploy_source_uri,
    model_data=base_model_uri,
    entry_point="inference.py",
    role=aws_role,
    predictor_cls=Predictor,
    name=endpoint_name + "-model",
)

In [8]:
from sagemaker.serverless import ServerlessInferenceConfig
serverless_config = ServerlessInferenceConfig(memory_size_in_mb=1024, max_concurrency=3)

In [9]:
# deploy the Model.
model_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=inference_instance_type,
    endpoint_name=endpoint_name,
    serverless_inference_config=serverless_config
)

ClientError: An error occurred (ValidationException) when calling the CreateModel operation: 1 validation error detected: Value 'slf-serverless-mobilenet-v2-100-224-2023-01-25-17-08-19-381-model' at 'modelName' failed to satisfy constraint: Member must have length less than or equal to 63