# Torchserve on SageMaker

# Create Inference script

In [None]:
! pygmentize 2_src/pipeline-predictor.py

## Model Packaging

In [2]:
# Download artifacts for Q&A model

! mkdir distilbert-base-uncased-distilled-squad
! wget https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/pytorch_model.bin -P distilbert-base-uncased-distilled-squad
! wget https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/tokenizer.json -P distilbert-base-uncased-distilled-squad
! wget https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/tokenizer_config.json -P distilbert-base-uncased-distilled-squad
! wget https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/vocab.txt -P distilbert-base-uncased-distilled-squad
! wget https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/config.json -P distilbert-base-uncased-distilled-squad

--2022-08-17 08:23:21--  https://huggingface.co/distilbert-base-uncased-distilled-squad/resolve/main/pytorch_model.bin
Resolving huggingface.co (huggingface.co)... 2600:1f18:147f:e850:db35:e0c7:187b:c770, 2600:1f18:147f:e800:afa4:a769:1b42:e343, 52.202.207.64, ...
Connecting to huggingface.co (huggingface.co)|2600:1f18:147f:e850:db35:e0c7:187b:c770|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.huggingface.co/distilbert-base-uncased-distilled-squad/22cbcd1c2d2e3190cdb7658f0fd330e4c2bc18056a1e6612a4430197b7368372?response-content-disposition=attachment%3B%20filename%3D%22pytorch_model.bin%22 [following]
--2022-08-17 08:23:21--  https://cdn-lfs.huggingface.co/distilbert-base-uncased-distilled-squad/22cbcd1c2d2e3190cdb7658f0fd330e4c2bc18056a1e6612a4430197b7368372?response-content-disposition=attachment%3B%20filename%3D%22pytorch_model.bin%22
Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... 2600:9000:23ca:f000:11:f807:5180:93a1, 2

In [3]:
!tar -C "$PWD" -czf distilbert-base-uncased-distilled-squad.tar.gz  distilbert-base-uncased-distilled-squad/

In [7]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()
#role = get_execution_role()  # TODO: replace it
role="arn:aws:iam::941656036254:role/service-role/AmazonSageMaker-ExecutionRole-20210904T193230" # TODO: this has to be replaced

bucket = sagemaker_session.default_bucket()
prefix = 'torchserve'
s3_path = 's3://{}/{}'.format(bucket, prefix)


model_data = sagemaker_session.upload_data('distilbert-base-uncased-distilled-squad.tar.gz',
                                           bucket,
                                           os.path.join(prefix, 'model-artifacts'))

In [8]:
print(model_data)

s3://sagemaker-us-east-1-941656036254/torchserve/model-artifacts/distilbert-base-uncased-distilled-squad.tar.gz


# Local Mode

In [9]:
import boto3
from sagemaker.local import LocalSession

sagemaker_local_session = LocalSession()

sagemaker_local_session.config = {'local': {'local_code': True}}
account = boto3.client('sts').get_caller_identity().get('Account')
role = "arn:aws:iam::941656036254:role/service-role/AmazonSageMaker-ExecutionRole-20210904T193230"

  self.config = yaml.load(open(sagemaker_config_file, "r"))


In [10]:
import os
import subprocess

instance_type = "local"

try:
    if subprocess.call("nvidia-smi") == 0:
        ## Set type to GPU if one is present
        instance_type = "local_gpu"
except:
    pass

print("Instance type = " + instance_type)

Instance type = local


In [20]:
from sagemaker.pytorch import PyTorchModel

env = {    "SAGEMAKER_TS_BATCH_SIZE": "3",
    "SAGEMAKER_TS_MAX_BATCH_DELAY": "100000"
    }


# Note: You can update the 'torchserve-predictor.py' file as needed according to the model you want to use (ie BERT) 
model = PyTorchModel(model_data=model_data,
                   role=role, 
                   entry_point='pipeline-predictor.py',
                   source_dir='2_src',
                   framework_version='1.9.0',
                   py_version='py38',
                   env=env,
                   sagemaker_session=sagemaker_local_session)

In [21]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

local_predictor = model.deploy(initial_instance_count=1, instance_type=instance_type, serializer=JSONSerializer(), deserializer=JSONDeserializer())

Attaching to zu7r4nfrsk-algo-1-9oixb
[36mzu7r4nfrsk-algo-1-9oixb |[0m Sagemaker TS environment variables have been set and will be used for single model endpoint.
[36mzu7r4nfrsk-algo-1-9oixb |[0m Collecting transformers
[36mzu7r4nfrsk-algo-1-9oixb |[0m   Downloading transformers-4.21.1-py3-none-any.whl (4.7 MB)
     |████████████████████████████████| 4.7 MB 908 kB/s            
[36mzu7r4nfrsk-algo-1-9oixb |[0m Collecting regex!=2019.12.17
[36mzu7r4nfrsk-algo-1-9oixb |[0m   Downloading regex-2022.7.25-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (768 kB)
     |████████████████████████████████| 768 kB 571 kB/s            
[36mzu7r4nfrsk-algo-1-9oixb |[0m Collecting filelock
[36mzu7r4nfrsk-algo-1-9oixb |[0m   Downloading filelock-3.8.0-py3-none-any.whl (10 kB)
[36mzu7r4nfrsk-algo-1-9oixb |[0m Collecting huggingface-hub<1.0,>=0.1.0
[36mzu7r4nfrsk-algo-1-9oixb |[0m   Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
     |█████████████████████████████

In [22]:
import json

context = r"""
The Amazon rainforest (Portuguese: Floresta Amazônica or Amazônia; Spanish: Selva Amazónica, Amazonía or usually Amazonia; French: Forêt amazonienne; Dutch: Amazoneregenwoud), also known in English as Amazonia or the Amazon Jungle, is a moist broadleaf forest that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 square kilometres (2,700,000 sq mi), of which 5,500,000 square kilometres (2,100,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Venezuela, Ecuador, Bolivia, Guyana, Suriname and French Guiana. States or departments in four nations contain "Amazonas" in their names. The Amazon represents over half of the planet's remaining rainforests, and comprises the largest and most biodiverse tract of tropical rainforest in the world, with an estimated 390 billion individual trees divided into 16,000 species.
"""

question="What kind of forest is Amazon?"


data = {"question":question, "context":context}
print(data)



{'question': 'What kind of forest is Amazon?', 'context': '\nThe Amazon rainforest (Portuguese: Floresta Amazônica or Amazônia; Spanish: Selva Amazónica, Amazonía or usually Amazonia; French: Forêt amazonienne; Dutch: Amazoneregenwoud), also known in English as Amazonia or the Amazon Jungle, is a moist broadleaf forest that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 square kilometres (2,700,000 sq mi), of which 5,500,000 square kilometres (2,100,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Venezuela, Ecuador, Bolivia, Guyana, Suriname and French Guiana. States or departments in four nations contain "Amazonas" in their names. The Amazon represents over half of the planet\'s remaining rainforests, and comprises the largest and most biodiverse

In [None]:
local_predictor.predict(data)

## Remote Endpoint

In [14]:
from sagemaker.pytorch import PyTorchModel

env = {    "SAGEMAKER_TS_BATCH_SIZE": "3",
    "SAGEMAKER_TS_MAX_BATCH_DELAY": "100000"
    }


# Note: You can update the 'torchserve-predictor.py' file as needed according to the model you want to use (ie BERT) 
model = PyTorchModel(model_data=model_data,
                   role=role, 
                   entry_point='pipeline-predictor.py',
                   source_dir='2_src',
                   framework_version='1.9.0',
                   py_version='py38',
                   env=env,
                   sagemaker_session=sagemaker_session)

In [None]:
remote_predictor = model.deploy(initial_instance_count=1, instance_type="ml.g4dn.4xlarge", serializer=JSONSerializer(), deserializer=JSONDeserializer())

In [19]:
remote_predictor.predict(data)

{'score': 0.4394714832305908,
 'start': 238,
 'end': 253,
 'answer': 'moist broadleaf'}