In [None]:
!pip install sagemaker -U

In [2]:
import sagemaker
import boto3

sess = sagemaker.Session()

# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket = None
if sagemaker_session_bucket is None and sess is not None:
    sagemaker_session_bucket = sess.default_bucket()


# Role Management
try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client("iam")
    role = iam.get_role(Rolename='sagemaker_execution_role')['Role']['Arn']

session = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f'sagemaker role arn:{role}')
print(f'sagemaker session region:{sess.boto_region_name}')

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker role arn:arn:aws:iam::593793026122:role/service-role/AmazonSageMaker-ExecutionRole-20240831T173993
sagemaker session region:us-east-1


In [3]:
from sagemaker.huggingface.model import HuggingFaceModel

# Hub model configuration <https://huggingface.co/models>
hub = {
  'HF_MODEL_ID':'distilbert-base-uncased-distilled-squad', # model_id from hf.co/models
  'HF_TASK':'question-answering'                           # NLP task to perform
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   env=hub,                                                # configuration for loading model from Hub
   role=role,                                              # IAM role with permissions to create an endpoint
   transformers_version="4.26",                             # Transformers version used
   pytorch_version="1.13",                                  # PyTorch version used
   py_version='py39',                                      # Python version used
)

# deploy model to Sagemaker Inference
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge"
)


# example request
data = {
    "inputs": {
        "question": "For which team does Virat Kohli play for?",
        "context": "Virat Kohli is an Indian international cricketer who plays Test and One Day International cricket for the Indian national team."
        }
}

#request
predictor.predict(data)

--------!

{'score': 0.7421595454216003,
 'start': 106,
 'end': 126,
 'answer': 'Indian national team'}