In [None]:
import sagemaker, boto3
from sagemaker.model import Model
from sagemaker.session import Session
from sagemaker.predictor import Predictor
from sagemaker import image_uris, model_uris

In [None]:
MODEL_ID = "huggingface-text2text-flan-t5-base"
INSTANCE_TYPE = 'ml.g4dn.2xlarge'
INSTANCE_COUNT = 1
STACK_NAME = 'LLMStackKendra'

In [None]:
# Get endpoint name from cloudformation output
cf_client = boto3.client('cloudformation')
stackname = 'LLMStackKendra'

response = cf_client.describe_stacks(StackName=stackname)
outputs = response["Stacks"][0]["Outputs"]

cf_outputs = {}
for i in outputs:
    cf_outputs[i['OutputKey']] = i['OutputValue']

ENDPOINT_NAME = cf_outputs['SageMakerEndpoint']

In [None]:
sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()

aws_region = boto3.Session().region_name
sess = sagemaker.Session()

print(f'aws_role={aws_role}')
print(f'aws_region={aws_region}')

In [None]:
deploy_image_uri = image_uris.retrieve(
    region=None,
    framework=None, 
    image_scope="inference",
    model_id=MODEL_ID,
    model_version='*',
    instance_type=INSTANCE_TYPE)

model_uri = model_uris.retrieve(
    model_id=MODEL_ID,
    model_version='*',
    model_scope="inference"
)

print(f'deploy_image_uri: {deploy_image_uri} \n')
print(f'model_uri: {model_uri}')

In [None]:
model_inference = Model(
    image_uri=deploy_image_uri,
    model_data=model_uri,
    role=aws_role,
    predictor_cls=Predictor,
    name=ENDPOINT_NAME,
    env={"TS_DEFAULT_WORKERS_PER_MODEL": "1"},
)

model_predictor_inference = model_inference.deploy(
    initial_instance_count=INSTANCE_COUNT,
    instance_type=INSTANCE_TYPE,
    predictor_cls=Predictor,
    endpoint_name=ENDPOINT_NAME,
)

In [None]:
print(f'Deployed model with endpoint: {ENDPOINT_NAME}')