# Host a Pretrained Model on SageMaker
---

In [5]:
!pip install transformers==3.3.1 sagemaker==2.15.0 --quiet
!pip install tensorflow
!pip install torch

[0mCollecting torch
  Downloading torch-1.13.1-cp37-cp37m-manylinux1_x86_64.whl (887.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m995.2 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch)
  Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting nvidia-cudnn-cu11==8.5.0.96 (from torch)
  Downloading nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl (557.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m557.1/557.1 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting nvidia-cublas-cu11==11.10.3.66 (from torch)
  Downloading nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl (317.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31

In [3]:
# !pip install --upgrade pip

In [8]:
import os
from transformers import BertTokenizer
from transformers.modeling_bert import BertModel


tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

model_path = "model/"
code_path = "code/"

if not os.path.exists(model_path):
    os.mkdir(model_path)

model.save_pretrained(save_directory=model_path)
tokenizer.save_pretrained(save_directory=model_path)

('model/vocab.txt', 'model/special_tokens_map.json', 'model/added_tokens.json')

In [9]:
!pygmentize code/inference_code.py

[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m[37m[39;49;00m
[34mfrom[39;49;00m [04m[36mtransformers[39;49;00m [34mimport[39;49;00m BertTokenizer, BertModel[37m[39;49;00m
[37m[39;49;00m
[34mdef[39;49;00m [32mmodel_fn[39;49;00m(model_dir):[37m[39;49;00m
[37m    [39;49;00m[33m"""[39;49;00m
[33m    Load the model for inference[39;49;00m
[33m    """[39;49;00m[37m[39;49;00m
[37m[39;49;00m
    model_path = os.path.join(model_dir, [33m'[39;49;00m[33mmodel/[39;49;00m[33m'[39;49;00m)[37m[39;49;00m
    [37m[39;49;00m
    [37m# Load BERT tokenizer from disk.[39;49;00m[37m[39;49;00m
    tokenizer = BertTokenizer.from_pretrained(model_path)[37m[39;49;00m
[37m[39;49;00m
    [37m# Load BERT model from disk.[39;49;00m[37m[39;49;00m
    model = BertModel.from_pretrained(model_path)[37m[39;49;00m
[37m[39;49;00m
    model_dict = {[33m'[39;49;00m[33mmodel[39;49;00m[33m'[39;49;00m

In [11]:
import tarfile

zipped_model_path = os.path.join(model_path, "model.tar.gz")

with tarfile.open(zipped_model_path, "w:gz") as tar:
    tar.add(model_path)
    tar.add(code_path)

In [12]:
from sagemaker.pytorch import PyTorchModel
from sagemaker import get_execution_role
import time

endpoint_name = "bert-base-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

model = PyTorchModel(
    entry_point="inference_code.py",
    model_data=zipped_model_path,
    role=get_execution_role(),
    framework_version="1.5",
    py_version="py3",
)

predictor = model.deploy(
    initial_instance_count=1, 
    instance_type="ml.m5.xlarge",
    endpoint_name=endpoint_name
)

-----!

In [14]:
import sagemaker

sm = sagemaker.Session().sagemaker_runtime_client

prompt = "The best part of Amazon SageMaker is that it makes machine learning easy."

response = sm.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=prompt.encode(encoding="UTF-8"),
    ContentType="text/csv"
)

response["Body"].read()

b'(tensor([[[-0.2462, -0.0988,  0.1747,  ..., -0.4059,  0.0966,  0.6564],\n         [-0.1352, -0.5824, -0.0728,  ..., -0.1726,  0.5765,  0.1273],\n         [-0.1491, -0.4218,  0.2821,  ...,  0.1332,  0.5053, -0.2813],\n         ...,\n         [-0.8054, -0.3126,  0.6776,  ..., -0.0572,  0.0806, -0.0318],\n         [ 0.7608,  0.1367, -0.2650,  ...,  0.1246, -0.5977, -0.2397],\n         [ 0.4660,  0.2762,  0.0636,  ...,  0.1112, -0.5502, -0.2997]]],\n       grad_fn=<NativeLayerNormBackward>), tensor([[-7.0429e-01, -4.2229e-01, -9.7203e-01,  6.3414e-01,  8.6010e-01,\n         -3.5008e-01,  3.8001e-02,  2.2652e-01, -8.5239e-01, -9.9980e-01,\n         -6.4649e-01,  8.4232e-01,  8.9319e-01,  6.2476e-01,  4.8914e-01,\n         -3.7195e-01,  1.0597e-01, -5.0569e-01,  3.3702e-01,  7.3767e-01,\n          6.0322e-01,  1.0000e+00, -3.2281e-01,  4.7648e-01,  4.4296e-01,\n          9.4813e-01, -6.6813e-01,  7.4915e-01,  8.2229e-01,  6.3062e-01,\n         -1.4025e-01,  2.2783e-01, -9.6329e-01, -2.0670

In [15]:
predictor.delete_model()
predictor.delete_endpoint()