In [1]:
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

In [3]:
# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'facebook/bart-large-mnli',
	'HF_TASK':'zero-shot-classification'
}

In [4]:
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	transformers_version='4.37.0',
	pytorch_version='2.1.0',
	py_version='py310',
	env=hub,
	role=role, 
)

In [5]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1, # number of instances
	instance_type='ml.g4dn.xlarge' # the cheapest instance with GPU, VRAM 16 GB
)

------------!

In [7]:
predictor.predict({
    "inputs": "I do not even know how to play the piano to be honest!",
    "parameters": {"candidate_labels": ["other", "sport", "technology", "movies", "music"]},
})

{'sequence': 'I do not even know how to play the piano to be honest!',
 'labels': ['music', 'other', 'sport', 'technology', 'movies'],
 'scores': [0.8662603497505188,
  0.12306392937898636,
  0.004872139077633619,
  0.0033865300938487053,
  0.0024169941898435354]}