# Create Endpoint

#### Make sure everything is up to date to use the new Serverless Endpoint

In [None]:
! pip install sagemaker botocore boto3 awscli --upgrade

In [None]:
import boto3
import sagemaker
from sagemaker import ModelPackage
from sagemaker import image_uris
from time import gmtime, strftime

#### 1. Create Model

In [None]:
region = boto3.Session().region_name
client = boto3.client("sagemaker", region_name=region)

#Role to give SageMaker permission to access AWS services.
sagemaker_role = sagemaker.get_execution_role()

#Get model from S3
# example path - "s3://sagemaker-<region>-XXXXXXXXXXXX/<folder>/model/pipelines-XXXXXXXXXXXXXXX/output/model.tar.gz"
model_url = '<s3 path for created model>'


#Get container image (prebuilt example used to model)
container_uri = "763104351884.dkr.ecr.us-east-2.amazonaws.com/tensorflow-inference:2.5-cpu"

#Create model name
model_name = "retrieval-endpoint-test"

response = client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = sagemaker_role,
    Containers = [{
        "Image": container_uri,
        "Mode": "SingleModel",
        "ModelDataUrl": model_url,
    }]
)

#### 2. Create Endpoint Config

In [None]:
retrieval_epc_name = "retrieval-serverless-epc" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

# Specify "ServerlessConfig" to create the serverless endpoint. 
endpoint_config_response = client.create_endpoint_config(
    EndpointConfigName = retrieval_epc_name,
    ProductionVariants = [
        {
            "VariantName": "AllTraffic",
            "ModelName": model_name,
            "ServerlessConfig": {
                "MemorySizeInMB": 2048,
                "MaxConcurrency": 1
            },
        },
    ],
)

print("Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"])

#### 3. Create Serverless Endpoint

In [None]:
endpoint_name = "retrieval-serverless-ep" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

create_endpoint_response = client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=retrieval_epc_name,
)

print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

#### 4. Test Endpoint

In [None]:
runtime = boto3.client("sagemaker-runtime")

endpoint_name = "<your-endpoint-name>"
content_type = "<request-mime-type>"
payload = <your-request-body>

response = client.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType=content_type,
    Body=payload
)

#### 5. Test Endpoint (After Setting Up API Gateway)

In [None]:
import requests

In [None]:
payload = "pblackburn"
# exmaple URL path - https://XXXXXXXX.execute-api.<region>.amazonaws.com/<specified path>
r = requests.post('<url path to API>', json=payload)


In [None]:
# show Predictions
r.json()['predictions'][0]['output_2'][0:20]

# Update Endpoint
- Check deployed model
- Check newly approved model
- If the latest approved model is not deployed, then deploy the latest model  
I use the arn as the model name, so it easy to see if the latest model is deployed

In [None]:
region = boto3.Session().region_name
client = boto3.client("sagemaker", region_name=region)

#Role to give SageMaker permission to access AWS services.
sagemaker_role = sagemaker.get_execution_role()

In [None]:
# Get models in the model registry
updated_model_request = client.list_model_packages(
ModelApprovalStatus='Approved',
ModelPackageGroupName='PipelineModelPackageGroup',
ModelPackageType='Both',
SortBy='CreationTime',
SortOrder='Descending'
)
most_recent_model_arn = updated_model_request['ModelPackageSummaryList'][0]['ModelPackageArn']
model_name = most_recent_model_arn[-54:].replace("/","").replace(":","")

# Get endpoint data
endpoint_response = client.describe_endpoint(
EndpointName='retrieval-serverless-ep2021-12-10-15-22-49'
)

most_recent_endpoint_config = endpoint_response['EndpointConfigName']

endpoint_config_response = client.describe_endpoint_config(
    EndpointConfigName = most_recent_endpoint_config
)

current_endpoint_model = endpoint_config_response['ProductionVariants'][0]['ModelName']

try:
    if model_name != current_endpoint_model:
        print("Updating Endpoint Model....")
        # pull s3 path for most recent model
        s3_response = client.describe_model_package(
            ModelPackageName = most_recent_model_arn
        )
        most_recent_s3_model_bucket = s3_response['InferenceSpecification']['Containers'][0]['ModelDataUrl']
        #Get container image (prebuilt example)
        container_uri = "763104351884.dkr.ecr.us-east-2.amazonaws.com/tensorflow-inference:2.5-cpu"
        try:
            #Create model
            response = client.create_model(
                ModelName = model_name,
                ExecutionRoleArn = sagemaker_role,
                Containers = [{
                    "Image": container_uri,
                    "Mode": "SingleModel",
                    "ModelDataUrl": most_recent_s3_model_bucket,
                }]
            )
        except:
            print("Error creating model")
        

        ## create endpoint config
        retrieval_epc_name = "retrieval-serverless-epc" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())

        endpoint_config_response = client.create_endpoint_config(
            EndpointConfigName = retrieval_epc_name,
            ProductionVariants = [ 
                {
                    "VariantName": "AllTraffic",
                    "ModelName": model_name,
                    "ServerlessConfig": {
                        "MemorySizeInMB": 2048,
                        "MaxConcurrency": 1
                    },
                },
            ],
        )

        ## update Endpoint
        response = client.update_endpoint(
            EndpointName='retrieval-serverless-ep2021-12-10-15-22-49',
            EndpointConfigName= retrieval_epc_name
        )

        print("Endpoint updated successfully")


    else:
        print("Model is already up to date")

except:
    print("There was an issue updating the endpoint")