# SageMaker Async Endpoint Deployment

Use to:

- Create SageMaker model
- Create async endpoint configuration
- Deploy async endpoint

In [1]:
# !pip install boto3
# !pip install sagemaker



In [4]:
import boto3
import sagemaker
from sagemaker import get_execution_role

# Initialize
session = sagemaker.Session()
region = session.boto_region_name
role = get_execution_role()  # Or manually set your role ARN
account_id = boto3.client("sts").get_caller_identity()["Account"]

# Define names and URIs
model_name = "gencast-container"
endpoint_config_name = "gencast-async-config"
endpoint_name = "gencast-async-endpoint"
image_uri = "193871648423.dkr.ecr.eu-west-2.amazonaws.com/gencast-container:latest" # fo to AWS ECR
output_s3_path = "s3://gencast-async/async-output/"  

In [5]:
import boto3

sagemaker_client = boto3.client("sagemaker", region_name="eu-west-2")

# Correct names
endpoint_name = "gencast-async-endpoint"
endpoint_config_name = "gencast-async-endpoint"  # This matches the error message

# Delete the endpoint
try:
    sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
    print(f"Endpoint '{endpoint_name}' deleted successfully.")
except sagemaker_client.exceptions.ClientError as e:
    print(f"Error deleting endpoint: {e}")

# Delete the endpoint configuration
try:
    sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
    print(f"Endpoint config '{endpoint_config_name}' deleted successfully.")
except sagemaker_client.exceptions.ClientError as e:
    print(f"Error deleting endpoint config: {e}")


Error deleting endpoint: An error occurred (ValidationException) when calling the DeleteEndpoint operation: Could not find endpoint "gencast-async-endpoint".
Endpoint config 'gencast-async-endpoint' deleted successfully.


In [6]:
from sagemaker.model import Model
from sagemaker.async_inference import AsyncInferenceConfig

# Create the model
model = Model(
    image_uri=image_uri,
    role=role,
    sagemaker_session=session,
    name=model_name
)

# Create async inference config
async_config = AsyncInferenceConfig(
    output_path=output_s3_path,
    failure_path="s3://gencast-async/async-failures/",
    max_concurrent_invocations_per_instance=1
)

# Deploy the model
predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.t2.medium",
    endpoint_name=endpoint_name,
    async_inference_config=async_config
)


Using already existing model: gencast-container


-----------!

In [7]:
import json
import boto3
import uuid
from sagemaker import Session
from sagemaker.predictor_async import AsyncPredictor
from sagemaker.predictor import Predictor

# Setup
session = Session()
s3_client = boto3.client("s3")
bucket = "gencast-async"
input_key = f"async-inputs/input-{uuid.uuid4()}.json"
input_data = {
    "currentDate": "2019-03-29",
    "targetDate": "2019-04-01"
}

# Upload input to S3
s3_client.put_object(
    Bucket=bucket,
    Key=input_key,
    Body=json.dumps(input_data)
)
input_path = f"s3://{bucket}/{input_key}"

In [8]:
print("Bucket:", bucket)
print("Key:", input_key)


Bucket: gencast-async
Key: async-inputs/input-2ecd47e5-f0a7-4bc1-97d8-4211a8e98c13.json


In [None]:
from sagemaker.predictor import Predictor
from sagemaker.predictor_async import AsyncPredictor

# Create predictor and async wrapper
predictor = Predictor(endpoint_name="gencast-async-endpoint", sagemaker_session=session)
async_predictor = AsyncPredictor(predictor)

# Submit async inference request
async_response = async_predictor.predict(
    input_path=input_path
)

print("Async inference request submitted.")
print("Output will be saved to:", async_response.output_path)
print("Failure path (if any):", async_response.failure_path)


In [None]:
print("Failure path:", async_response.failure_path)


In [None]:
try:
    result = async_response.get_result()
    print("Result:", result)
except Exception as e:
    print("Inference failed:", e)


In [None]:
result = async_response.get_result()
print("Result downloaded:")
print(result)
