In [None]:
from sagemaker import ModelPackage
from sagemaker import get_execution_role
import sagemaker
import boto3
import json

role = get_execution_role()
model_pack_arn = "arn:aws:sagemaker:us-east-1:989792134670:model-package/exaone-3-0-7-8b-instruct-2024-09-03-23-46-14"
sagemaker_session = sagemaker.Session()
smr_client = boto3.client("sagemaker-runtime")

model = ModelPackage(
    role=role,
    model_package_arn=model_pack_arn,
    sagemaker_session=sagemaker_session
)

In [None]:
import time

ts = time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())
sm_model_name = f"exaone-3-0-7-8b-instruct-{ts}"

model.deploy(
    initial_instance_count=1,
    instance_type='ml.g5.4xlarge',
    endpoint_name=sm_model_name,
    model_data_download_timeout=600,
    container_startup_health_check_timeout=300,
)
model.endpoint_name

In [None]:
input_text = "When did the first world war end?"

payload = {
    "inputs": [
        {"name": "text_input", "shape": [1, 1], "datatype": "BYTES", "data": [input_text]},
        {"name": "max_tokens", "shape": [1, 1], "datatype": "INT32", "data": [256]},
        {"name": "top_p", "shape": [1, 1], "datatype": "FP32", "data": [0.9]},
        {"name": "temperature", "shape": [1, 1], "datatype": "FP32", "data": [0.0]},
        {"name": "pad_id", "shape": [1, 1], "datatype": "INT32", "data": [0]},
        {"name": "end_id", "shape": [1, 1], "datatype": "INT32", "data": [2]},
    ]
}

In [None]:
%%time
response = smr_client.invoke_endpoint(
    EndpointName=sm_model_name,
    Accept="application/json",
    ContentType="application/json",
    Body=json.dumps(payload)
)
data = response["Body"].read()
output = json.loads(data)

In [None]:
result = [res_list['data'] for res_list in output['outputs'] if res_list['name'] == 'text_output']
result

In [None]:
model.sagemaker_session.delete_endpoint(model.endpoint_name)
model.sagemaker_session.delete_endpoint_config(model.endpoint_name)
model.delete_model()