 ## Initialize Your Environment

In [None]:
import time
from urllib.parse import urlparse
import re
import boto3
import sagemaker as sage
from sagemaker import get_execution_role, ModelPackage
from sagemaker.async_inference import AsyncInferenceConfig
import requests

session = sage.Session()
s3_bucket = session.default_bucket()
region = session.boto_region_name
account_id = boto3.client("sts").get_caller_identity().get("Account")
role = get_execution_role()

sagemaker = boto3.client("sagemaker")
s3_client = session.boto_session.client("s3")
ecr = boto3.client("ecr")
sm_runtime = boto3.client("sagemaker-runtime")
boto_session = boto3.Session()


 # Deploy the model as endpoint

In [None]:
model_package_arn = 'arn:aws:sagemaker:YOUR ARN'
model = ModelPackage(
    role=role,
    model_package_arn=model_package_arn,
    sagemaker_session=session,
)
model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.2xlarge',
    endpoint_name='svsdeid',
    async_inference_config=AsyncInferenceConfig(
        output_path="s3://svs-sage-output/outputs/"
    )
)
model.endpoint_name


# List Endpoints and status

In [None]:
sagemaker = boto_session.client("sagemaker")
paginator = sagemaker.get_paginator("list_endpoints")

endpoints_found = False

for page in paginator.paginate():
    if page["Endpoints"]:
        if not endpoints_found:
            print(f"{'EndpointName':<60} | {'Status'}")
            print("-" * 80)
            endpoints_found = True
        for ep in page["Endpoints"]:
            name = ep["EndpointName"]
            status = ep["EndpointStatus"]
            print(f"{name:<60} | {status}")

if not endpoints_found:
    print("No endpoints found.")


# 1. Upload a SVS file to S3 if not already done


In [None]:
# Step 1: Download the SVS file from GitHub
svs_url = "https://github.com/JohnSnowLabs/visual-nlp-workshop/raw/refs/heads/master/jupyter/data/svs/62893.svs"
local_path = "input.svs"
response = requests.get(svs_url)
with open(local_path, "wb") as f:
    f.write(response.content)
print(f"Downloaded to {local_path}")

# Step 2: Upload to S3
bucket_name = "svs-sage-input"
s3_key = "input.svs"

s3_client.upload_file(local_path, bucket_name, s3_key)
print(f"Uploaded {local_path} to s3://{bucket_name}/{s3_key}")


## 2. Test Deployed Endpoint
Do **not use** `sm_runtime.invoke_endpoint()`, it is not supported.        
You must use `sm_runtime.invoke_endpoint_async()` for async inference.

In [None]:
# ASYNC
sm_runtime = boto_session.client("sagemaker-runtime", region_name='us-east-1')

response = sm_runtime.invoke_endpoint_async(
    EndpointName='svsdeid30v2',
    ContentType='application/octet-stream',
    InputLocation='s3://svs-sage-input/input2.svs',
    Accept='application/octet-stream',
)
# Using `invoke_endpoint_async` returns a response with an `InferenceId` and `OutputLocation` where the results will be stored.
response

Lets check s3 until the output file is ready and download it

In [None]:
# Get output S3 location
s3 = boto_session.client('s3')
output_s3_url = response['OutputLocation']
parsed_url = urlparse(output_s3_url)
bucket = parsed_url.netloc
key = parsed_url.path.lstrip('/')

print(f"Waiting for output file to be ready at: {output_s3_url}")

# Wait until file exists in S3
while True:
    try:
        s3.head_object(Bucket=bucket, Key=key)
        print("Output file is ready!")
        break
    except s3.exceptions.ClientError as e:
        if e.response['Error']['Code'] == '404':
            print("Still processing... checking again in 10 seconds.")
            time.sleep(20)
        else:
            raise

# Download the file
output_path = 'downloaded_result.out'
s3.download_file(bucket, key, output_path)
print(f"Downloaded output to {output_path}")

## Put everything in re-usable functions
These helper functions defined below will 
1. Upload local file to S3 
2. Submit an async inference job to the SageMaker endpoint
3. Poll sagemaker endpoint logs until inference ID is completed, then download file  `wait_until_file_available_and_download()`
4. Download the output file from S3 to a local path 

Feel free to adjust it to your needs.

In [None]:
def wait_until_file_available_and_download(s3_client, logs_client, s3_url, inference_id, log_group, download_path,
                                           poll_interval=20, timeout=1000):
    parsed_url = urlparse(s3_url)
    bucket = parsed_url.netloc
    key = parsed_url.path.lstrip('/')

    print(f"Waiting for output file at: {s3_url}")
    start_time = time.time()

    found_log = False
    latency_pattern = re.compile(
        r"ModelLatency: (\d+) us, "
        r"RequestDownloadLatency: (\d+) us, "
        r"ResponseUploadLatency: (\d+) us, "
        r"TimeInBacklog: (\d+) ms, "
        r"TotalProcessingTime: (\d+) ms"
    )

    while True:
        now = int(time.time() * 1000)
        one_hour_ago = now - 60 * 60 * 1000

        response = logs_client.filter_log_events(
            logGroupName=log_group,
            filterPattern=f'"Inference request succeeded" "{inference_id}"',
            startTime=one_hour_ago,
            endTime=now,
            limit=10
        )

        for event in response.get("events", []):
            message = event["message"]
            match = latency_pattern.search(message)
            if match:
                model_latency_us = int(match.group(1))
                request_download_latency_us = int(match.group(2))
                response_upload_latency_us = int(match.group(3))
                time_in_backlog_ms = int(match.group(4))
                total_processing_ms = int(match.group(5))

                print("📦 Inference Latency Breakdown (in seconds):")
                print(f"🧠 ModelLatency:           {model_latency_us / 1e6:.3f} s")
                print(f"⬇️ RequestDownloadLatency: {request_download_latency_us / 1e6:.3f} s")
                print(f"⬆️ ResponseUploadLatency:  {response_upload_latency_us / 1e6:.3f} s")
                print(f"⏳ TimeInBacklog:          {time_in_backlog_ms / 1e3:.3f} s")
                print(f"📈 TotalProcessingTime:    {total_processing_ms / 1e3:.3f} s")
                found_log = True
                break

        if found_log:
            break

        if time.time() - start_time > timeout:
            raise TimeoutError("Timeout waiting for inference logs.")

        print("Still processing... checking logs again shortly.")
        time.sleep(poll_interval)

    # Now poll S3 for result
    while True:
        try:
            s3_client.head_object(Bucket=bucket, Key=key)
            print("Output file is ready!")
            break
        except s3_client.exceptions.ClientError as e:
            if e.response['Error']['Code'] == '404':
                if time.time() - start_time > timeout:
                    raise TimeoutError("Timeout waiting for output file.")
                print("Still processing... checking S3 again shortly.")
                time.sleep(poll_interval)
            else:
                raise

    s3_client.download_file(bucket, key, download_path)
    print(f"Downloaded output to {download_path}")
    return download_path


def upload_file_to_s3(s3_client, local_path, s3_path):
    parsed = urlparse(s3_path)
    bucket = parsed.netloc
    key = parsed.path.lstrip('/')

    print(f"Uploading {local_path} to {s3_path}...")
    s3_client.upload_file(local_path, bucket, key)
    print("Upload complete.")


def process_and_download(
        input_s3_path,
        endpoint_name,
        download_path,
        region='us-east-1',
        poll_interval=10,
        timeout=600,
        local_svs_to_upload=None,
        deid_tags=None
):
    s3 = boto_session.client("s3", region_name=region)
    sm_runtime = boto_session.client("sagemaker-runtime", region_name=region)
    logs = boto_session.client("logs", region_name=region)

    if local_svs_to_upload:
        upload_file_to_s3(s3, local_svs_to_upload, input_s3_path)

    custom_attributes = None
    if deid_tags:
        custom_attributes = f"svs_tags={','.join(deid_tags)}"
        print(f"Sending custom attributes: {custom_attributes}")

    response = sm_runtime.invoke_endpoint_async(
        EndpointName=endpoint_name,
        ContentType='application/octet-stream',
        InputLocation=input_s3_path,
        Accept='application/octet-stream',
        CustomAttributes=custom_attributes
    )

    print(f'Got response: {response}')

    inference_id = response['InferenceId']
    output_s3_url = response['OutputLocation']
    log_group = f"/aws/sagemaker/Endpoints/{endpoint_name}"

    return wait_until_file_available_and_download(
        s3_client=s3,
        logs_client=logs,
        s3_url=output_s3_url,
        inference_id=inference_id,
        log_group=log_group,
        download_path=download_path,
        poll_interval=poll_interval,
        timeout=timeout
    )


# Example usage

process_and_download(
    # Specify the Sagemaker endpoint name
    endpoint_name='svsdeid',

    # Specify the S3 path where the input SVS file is located or will be uploaded
    input_s3_path='s3://svs-sage-input/input1.svs',

    # Specify the local SVS file to upload, if needed
    local_svs_to_upload=None,

    # Specify the local path where you want to download the output
    download_path='output3.svs',

    # Specify the tags you want to de-identify from SVS metadata
    deid_tags=[
        'ImageDescription.ScanScope ID',
        'ImageDescription.Time Zone',
        'ImageDescription.ScannerType',
    ]
)


# References

1. SVS Sagemaker Endpoint Documentation
2. SVS DEID Model Card
3. SVS Medium Article Part 1
4. SVS Medium Article Part 2
5. SVS Medium Article Part 3
6. John Snow Labs Visual NLP