## Install Sagemaker SDK, Transformers in Python3:

In [1]:
!pip install --upgrade --no-cache-dir torch-neuron neuron-cc[tensorflow] torchvision torch --extra-index-url=https://pip.repos.neuron.amazonaws.com

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting torch-neuron
  Downloading https://pip.repos.neuron.amazonaws.com/torch-neuron/torch_neuron-1.11.0.2.3.0.0-py3-none-linux_x86_64.whl (32.6 MB)
     |████████████████████████████████| 32.6 MB 94.1 MB/s            
Collecting torchvision
  Downloading torchvision-0.11.2-cp36-cp36m-manylinux1_x86_64.whl (23.3 MB)
     |████████████████████████████████| 23.3 MB 5.8 MB/s            
  Downloading torchvision-0.11.1-cp36-cp36m-manylinux1_x86_64.whl (23.3 MB)
     |████████████████████████████████| 23.3 MB 37.2 MB/s            
[?25h  Downloading torchvision-0.10.1-cp36-cp36m-manylinux1_x86_64.whl (22.1 MB)
     |████████████████████████████████| 22.1 MB 23.0 MB/s            
[?25h  Downloading torchvision-0.10.0-cp36-cp36m-manylinux1_x86_64.whl (22.1 MB)
     |████████████████████████████████| 22.1 MB 26.7 MB/s            
[?25h  Downloading torchvision-0.9.1-cp36-cp36m-manylinux1_x86_64.whl (17

In [2]:
!pip install transformers



In [3]:
# ! pip install sagemaker --upgrade
!pip install "sagemaker>=2.48.0" --upgrade



In [4]:
pip install "transformers==4.12.3" --upgrade --no-cache-dir

Note: you may need to restart the kernel to use updated packages.


In [5]:
!pip install "datasets[s3]==1.18.3" --upgrade



## Install Libraries:

In [7]:
import torch
import torch_neuron
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig


print("transformers.__version__:", transformers.__version__)




transformers.__version__: 4.12.3


## Create SageMaker endpoint with the chosen model:

In [17]:
import sagemaker
from sagemaker import get_execution_role
import json
import boto3


sess = sagemaker.Session()



sagemaker_session_bucket= None # "sagemaker-us-west-2-893195739154"
if sagemaker_session_bucket is None and sess is not None:
    pass
    sagemaker_session_bucket = sess.default_bucket() # set to default bucket if a bucket name is not given

print("sagemaker_session_bucket:", sagemaker_session_bucket)


role = get_execution_role()
print("role:", role)


print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")


prefix = "mymodel/supervised"

sagemaker_session_bucket: sagemaker-us-west-2-893195739154
role: arn:aws:iam::893195739154:role/service-role/AmazonSageMaker-ExecutionRole-20220422T170187
sagemaker role arn: arn:aws:iam::893195739154:role/service-role/AmazonSageMaker-ExecutionRole-20220422T170187
sagemaker bucket: sagemaker-us-west-2-893195739154
sagemaker session region: us-west-2


In [9]:
aws_region = "us-west-2" # AWS-region

In [19]:
img_uri = sagemaker.image_uris.retrieve(framework='pytorch', 
            region=aws_region, 
            image_scope='inference', 
            version="1.10", 
            instance_type='ml.p3.16xlarge', 
            py_version='py38') 
print("img_uri:", img_uri)

img_uri: 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:1.10-gpu-py38


In [21]:


from sagemaker.huggingface.model import HuggingFaceModel


hub = {

  'HF_MODEL_ID':'distilbert-base-uncased-distilled-squad', # model_id from hf.co/models
  'HF_TASK':'question-answering'
}


pytorch_cpu_image_uri = "763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-cpu-py38-ubuntu20.04"


huggingface_model = HuggingFaceModel(
   env=hub, 
   role = sagemaker.get_execution_role(),               
   
   image_uri = pytorch_cpu_image_uri,                                            
   transformers_version="4.18",                         
   pytorch_version="1.7",                                
)




##  Deploy model to SageMaker Inference:

In [None]:

predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type="ml.m5.xlarge"
)

## Sample Input: 

In [23]:
data = {

    "inputs": {
        "question": "Why did Saifur stay at home?",
        "context": "Anna friend forgot to bring Saifur's umbrella. There was rain in the morning. Saifur though that he better stay at home."
    }
}

# data = {
# "inputs": {
#     "question": "What is used for inference?",
#     "context": "My Name is Philipp and I live in Nuremberg. This model is used with sagemaker for inference."
#     }
# }



## Predictor:

In [24]:
preds = predictor.predict(data)

In [25]:
preds


{'score': 0.16734573245048523, 'start': 57, 'end': 61, 'answer': 'rain'}

In [15]:
bucket = "sagemaker-us-west-2-893195739154"
input_prefix = "sagemaker/input"
output_prefix = "sagemaker/output"