In [None]:
from sagemaker import ModelPackage
from sagemaker import get_execution_role
import sagemaker
import boto3
import json

role = get_execution_role()
sagemaker_session = sagemaker.Session()
smr_client = boto3.client("sagemaker-runtime")
model_package = "exaone-3-0-7-8b-instruct-2024--8bf096b27ae43d43aed8f8f6521f0392"

model_package_map = {
    "us-east-1": f"arn:aws:sagemaker:us-east-1:865070037744:model-package/{model_package}",
    "us-east-2": f"arn:aws:sagemaker:us-east-2:057799348421:model-package/{model_package}",
    "us-west-1": f"arn:aws:sagemaker:us-west-1:382657785993:model-package/{model_package}",
    "us-west-2": f"arn:aws:sagemaker:us-west-2:594846645681:model-package/{model_package}",
    "ca-central-1": f"arn:aws:sagemaker:ca-central-1:470592106596:model-package/{model_package}",
    "eu-central-1": f"arn:aws:sagemaker:eu-central-1:446921602837:model-package/{model_package}",
    "eu-west-1": f"arn:aws:sagemaker:eu-west-1:985815980388:model-package/{model_package}",
    "eu-west-2": f"arn:aws:sagemaker:eu-west-2:856760150666:model-package/{model_package}",
    "eu-west-3": f"arn:aws:sagemaker:eu-west-3:843114510376:model-package/{model_package}",
    "eu-north-1": f"arn:aws:sagemaker:eu-north-1:136758871317:model-package/{model_package}",
    "ap-southeast-1": f"arn:aws:sagemaker:ap-southeast-1:192199979996:model-package/{model_package}",
    "ap-southeast-2": f"arn:aws:sagemaker:ap-southeast-2:666831318237:model-package/{model_package}",
    "ap-northeast-2": f"arn:aws:sagemaker:ap-northeast-2:745090734665:model-package/{model_package}",
    "ap-northeast-1": f"arn:aws:sagemaker:ap-northeast-1:977537786026:model-package/{model_package}",
    "ap-south-1": f"arn:aws:sagemaker:ap-south-1:077584701553:model-package/{model_package}",
    "sa-east-1": f"arn:aws:sagemaker:sa-east-1:270155090741:model-package/{model_package}",
}

region = boto3.Session().region_name
if region not in model_package_map.keys():
    raise Exception(f"Current boto3 session region {region} is not supported.")

model_pack_arn = model_package_map[region]

model = ModelPackage(
    role=role,
    model_package_arn=model_pack_arn,
    sagemaker_session=sagemaker_session
)

In [None]:
import time

ts = time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
sm_model_name = f"exaone-3-0-7-8b-instruct-{ts}"

model.deploy(
    initial_instance_count=1,
    instance_type='ml.g5.4xlarge',
    endpoint_name=sm_model_name,
    model_data_download_timeout=600,
    container_startup_health_check_timeout=300,
)
model.endpoint_name

In [None]:
input_text = "When did the first world war end?"

payload = {
    "inputs": [
        {"name": "text_input", "shape": [1, 1], "datatype": "BYTES", "data": [input_text]},
        {"name": "max_tokens", "shape": [1, 1], "datatype": "INT32", "data": [256]},
        {"name": "top_p", "shape": [1, 1], "datatype": "FP32", "data": [0.9]},
        {"name": "temperature", "shape": [1, 1], "datatype": "FP32", "data": [0.0]},
        {"name": "pad_id", "shape": [1, 1], "datatype": "INT32", "data": [0]},
        {"name": "end_id", "shape": [1, 1], "datatype": "INT32", "data": [2]},
    ]
}

In [None]:
%%time
response = smr_client.invoke_endpoint(
    EndpointName=sm_model_name,
    Accept="application/json",
    ContentType="application/json",
    Body=json.dumps(payload)
)
data = response["Body"].read()
output = json.loads(data)

In [None]:
result = [res_list['data'] for res_list in output['outputs'] if res_list['name'] == 'text_output']
result

In [None]:
model.sagemaker_session.delete_endpoint(model.endpoint_name)
model.sagemaker_session.delete_endpoint_config(model.endpoint_name)
model.delete_model()