# Create SageMaker endpoint

In [None]:
import sagemaker
from sagemaker.huggingface import HuggingFaceModel

sess = sagemaker.Session()
role = sagemaker.get_execution_role()

print(f"IAM role arn used for running training: {role}")
print(f"S3 bucket used for storing artifacts: {sess.default_bucket()}")

In [3]:
git_config = {'repo': 'https://github.com/philschmid/transformers.git','branch': 'master'} # v4.4.2 is referring to the `transformers_version you use in the estimator.

In [15]:

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'facebook/blenderbot-400M-distill',
	'HF_TASK':'conversational'
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	transformers_version='4.17.0',
	pytorch_version='1.10.2',
	py_version='py38',
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1, # number of instances
	instance_type='ml.m5.xlarge' # ec2 instance type
)

predictor.predict({
	'inputs': {
		"past_user_inputs": ["Which movie is the best ?"],
		"generated_responses": ["It's Die Hard for sure."],
		"text": "Can you explain why ?"
	}
})

INFO:sagemaker:Creating model with name: huggingface-pytorch-inference-2023-03-11-13-14-19-656
INFO:sagemaker:Creating endpoint-config with name huggingface-pytorch-inference-2023-03-11-13-14-20-423
INFO:sagemaker:Creating endpoint with name huggingface-pytorch-inference-2023-03-11-13-14-20-423


------!

{'generated_text': " It's based on the book of the same name by James Fenimore Cooper.",
 'conversation': {'past_user_inputs': ['Which movie is the best ?',
   'Can you explain why ?'],
  'generated_responses': ["It's Die Hard for sure.",
   " It's based on the book of the same name by James Fenimore Cooper."]}}

In [17]:
predictor.predict({
	"inputs": {
		"past_user_inputs": ["Which movie is the best ?"],
		"generated_responses": ["It's Die Hard for sure."],
		"text": "Can you explain why ?"
	}
})

{'generated_text': " It's based on the book of the same name by James Fenimore Cooper.",
 'conversation': {'past_user_inputs': ['Which movie is the best ?',
   'Can you explain why ?'],
  'generated_responses': ["It's Die Hard for sure.",
   " It's based on the book of the same name by James Fenimore Cooper."]}}

# Lambda Function

In [None]:
import os
import io
import boto3
import json

# grab environment variables
ENDPOINT_NAME = os.environ['ENDPOINT_NAME']
runtime= boto3.client("sagemaker-runtime", region_name='ap-southeast-1')

def lambda_handler(event, context):
    message = event['inputs']
    messageStr = str(message).replace("\'", "\"")
    
    messageStr = messageStr.replace("\"s", "'s")
    print("String message: ", messageStr)
    response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                       Body=bytes(messageStr, 'utf-8'),
                                       ContentType='application/json')
    print(response)
    result = json.loads(response['Body'].read().decode())
    return result