In [1]:
import boto3
import sagemaker



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
from sagemaker.huggingface import HuggingFaceModel


In [3]:
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']


In [4]:
# Hub Model configuration. https://huggingface.co/models
hub = {
    'HF_MODEL_ID':'facebook/roberta-hate-speech-dynabench-r4-target',
    'HF_TASK':'text-classification'
}

In [5]:
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    transformers_version='4.49.0',
    pytorch_version='2.6.0',
    py_version='py312',
    env=hub,
    role=role,
)

In [9]:

from sagemaker.serverless import ServerlessInferenceConfig

def serverless_inference_config():
    return ServerlessInferenceConfig(
        memory_size_in_mb=2048, # Adjust memory as needed (e.g., 1024, 2048, 3072...)
        max_concurrency=1,      # Max number of concurrent requests
    )

In [None]:
# Deploy using the serverless config
predictor = huggingface_model.deploy(
  serverless_inference_config=serverless_inference_config(),
  endpoint_name='fb-hatespeech-reddit' # Give it a new name
)


In [6]:
# deploy model to SageMaker Inference (live endpoint)
#predictor = huggingface_model.deploy(
#    initial_instance_count=1, # number of instances
#    instance_type='ml.m5.xlarge' # ec2 instance type
#)

------!

In [16]:
# Define the text you want to classify
data = {
   "inputs": "Go back to your country"
}

# Send the data to the endpoint for a prediction
prediction = predictor.predict(data)

# Print the result
print(prediction)

[{'label': 'hate', 'score': 0.9991140961647034}]
