### Allennlp-Sagemaker

#### Pre-requisites

In [None]:
This notebook shows how to use the SageMaker Python SDK to run your code in a local container and deploy to SageMaker's managed hosting environments. 

In [1]:
#### Set up the environment 

In [2]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting en-core-web-sm@ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.3.0/en_core_web_sm-3.3.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting allennlp
  Downloading allennlp-2.10.1-py3-none-any.whl (730 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m730.2/730.2 KB[0m [31m51.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jsonlines
  Downloading jsonlines-3.1.0-py3-none-any.whl (8.6 kB)
Collecting allennlp-models
  Downloading allennlp_models-2.10.1-py3-none-any.whl (464 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m464.5/464.5 KB[0m [31m8.4 MB/s[0m eta [36m0:0

#### get sample data

In [5]:
#!aws s3 cp s3://sagemaker-us-east-1-551641581032/sagemaker/DEMO-Allennlp/all_chapter_2.jsonl ./

download: s3://sagemaker-us-east-1-551641581032/sagemaker/DEMO-Allennlp/all_chapter_2.jsonl to ./all_chapter_2.jsonl


In [6]:
#!aws s3 cp s3://sagemaker-us-east-1-551641581032/sagemaker/DEMO-Allennlp/2375715.jsonl ./

download: s3://sagemaker-us-east-1-551641581032/sagemaker/DEMO-Allennlp/2375715.jsonl to ./2375715.jsonl


#### model training

In [2]:
!ls

#my.jsonnet: 训练配置脚本
#stary_dataloader.py : 基于小说数据的dataloader
#staryPredictor.py: 基于小说数据的 predictor

2375715.jsonl		   infer.py    requirements.txt
all_chapter_2.jsonl	   lost+found  stary_dataloader.py
allennlp-test-stary.ipynb  my.jsonnet  staryPredictor.py


In [3]:
#基于allennlp命令进行模型训练
!allennlp train my.jsonnet -s ./model_save -f --include-package stary_dataloader

[nltk_data] Downloading package punkt to /home/ec2-user/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /home/ec2-user/nltk_data...
2022-10-28 01:32:39,712 - INFO - allennlp.common.plugins - Plugin allennlp_models available
2022-10-28 01:32:39,774 - INFO - allennlp.common.params - evaluation = None
2022-10-28 01:32:39,774 - INFO - allennlp.common.params - include_in_archive = None
2022-10-28 01:32:39,775 - INFO - allennlp.common.params - random_seed = 13370
2022-10-28 01:32:39,775 - INFO - allennlp.common.params - numpy_seed = 1337
2022-10-28 01:32:39,775 - INFO - allennlp.common.params - pytorch_seed = 133
2022-10-28 01:32:39,776 - INFO - allennlp.common.checks - Pytorch version: 1.12.1+cu102
2022-10-28 01:32:39,777 - INFO - allennlp.common.params - type = default
2022-10-28 01:32:39,777 - INFO - allennlp.common.params - dataset_reader.type = stary
2022-10-28 01:32:39,778 - INFO - allennlp.common.params - dataset_reader.max_instanc

#### Deploy the trained model to prepare for predictions

In [4]:
import os
import sagemaker
sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = 'sagemaker/DEMO-Allennlp'

role = sagemaker.get_execution_role()

In [11]:
#!mkdir tmp
#!mkdir tmp/code
!cp ./model_save/model.tar.gz ./tmp/
!cp requirements.txt ./tmp/code/
!cp stary_dataloader.py ./tmp/code/
!cp staryPredictor.py ./tmp/code/
!cp infer.py ./tmp/code/

In [12]:
!cd tmp && tar -czvf ../model-inference.tar.gz *

code/
code/stary_dataloader.py
code/requirements.txt
code/staryPredictor.py
code/infer.py
code/.ipynb_checkpoints/
model.tar.gz


In [13]:
!aws s3 cp model-inference.tar.gz s3://$bucket/output/model-inference.tar.gz

upload: ./model-inference.tar.gz to s3://sagemaker-us-east-1-551641581032/output/model-inference.tar.gz


In [14]:
from sagemaker.pytorch.model import PyTorchModel

pytorch_model = PyTorchModel(model_data='s3://sagemaker-us-east-1-551641581032/output/model-inference.tar.gz',
                             role=role,
                             entry_point='infer.py',
                             framework_version='1.9.0', py_version='py38',
                              model_server_workers=1)

instance_type = 'ml.m5.2xlarge'

predictor = pytorch_model.deploy(instance_type=instance_type, initial_instance_count=1)

-------------!

In [15]:
!aws sagemaker list-endpoints --status-equals InService

{
    "Endpoints": [
        {
            "EndpointName": "pytorch-inference-2022-10-28-03-03-15-084",
            "EndpointArn": "arn:aws:sagemaker:us-east-1:551641581032:endpoint/pytorch-inference-2022-10-28-03-03-15-084",
            "CreationTime": 1666926195.367,
            "LastModifiedTime": 1666926578.258,
            "EndpointStatus": "InService"
        },
        {
            "EndpointName": "pytorch-inference-2022-10-28-01-11-59-594",
            "EndpointArn": "arn:aws:sagemaker:us-east-1:551641581032:endpoint/pytorch-inference-2022-10-28-01-11-59-594",
            "CreationTime": 1666919519.871,
            "LastModifiedTime": 1666919965.846,
            "EndpointStatus": "InService"
        }
    ]
}


#### Invoking the endpoint

In [None]:
#endpoint_name = 'pytorch-inference-2022-10-28-03-03-15-084'  # 'ml.m5.2xlarge'
#predictor = sagemaker.predictor.Predictor(endpoint_name=endpoint_name)

In [16]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

predictor.serializer = JSONSerializer()
predictor.deserializer = JSONDeserializer()

In [21]:
texts = "After a long discussion about it. Selene's brother, Helios, came up with a compromise. 'Alright, Selene, they shall have a chance for change. For now, you will pair them with their own races, but when the time comes you will choose a pure-hearted female to be your Moon Princess. She will have three mates, one of her own kind and two of different races. If she can bring three races together with her mates, then we will not destroy them.' Selene was happy that her children were given a chance."

import time
start = time.time()
outputs = predictor.predict(texts)
end = time.time()
print('outputs: ', outputs)
print('time:', end-start)

outputs:  {'doc': '" After a long discussion about it . Selene \'s brother , Helios , came up with a compromise . \' Alright , Selene , they shall have a chance for change . For now , you will pair them with their own races , but when the time comes you will choose a pure - hearted female to be your Moon Princess . She will have three mates , one of her own kind and two of different races . If she can bring three races together with her mates , then we will not destroy them . \' Selene was happy that her children were given a chance . "', 'clusters': [[[8, 8], [23, 23], [36, 36], [50, 50], [60, 60], [100, 100], [104, 104]], [[64, 64], [72, 72], [82, 82], [89, 89]]]}
time: 0.6020877361297607


#### clean-up
Deleting the local endpoint when you're finished is important since you can only run one local endpoint at a time.

In [None]:
predictor.delete_endpoint()