# Triton Model Server

!!!!!!!!! Note, that this will only work on Linux (requirement of Triton Client libs)


## Install dependencies

In [3]:
! pip install torch-tensorrt -f https://github.com/pytorch/TensorRT/releases

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Looking in links: https://github.com/pytorch/TensorRT/releases
Collecting torch-tensorrt
  Downloading https://github.com/pytorch/TensorRT/releases/download/v1.1.0/torch_tensorrt-1.1.0-cp38-cp38-linux_x86_64.whl (11.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.7/11.7 MB[0m [31m46.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting torch>=1.11.0+cu113<1.12.0
  Downloading torch-1.12.1-cp38-cp38-manylinux1_x86_64.whl (776.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m776.3/776.3 MB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: torch, torch-tensorrt
  Attempting uninstall: torch
    Found existing installation: torch 1.10.0
    Uninstalling torch-1.10.0:
      Successfully uninstalled torch-1.10.0
Successfully installed torch-1.12.1 torch-tensorrt-1.1.0
You should consider upgrading via the '/h

In [None]:
! source activate pytorch_p38

In [4]:
! pwd

/home/ec2-user/SageMaker/Accelerate-Deep-Learning-Workloads-with-Amazon-SageMaker/chapter9


## Run TensorRT compiler runtime

In [None]:
! docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 -it --rm -v /home/ec2-user/SageMaker/Accelerate-Deep-Learning-Workloads-with-Amazon-SageMaker/chapter9/3_src:/workspace/3_src nvcr.io/nvidia/pytorch:22.06-py3

In [None]:
# inside container 

python 3_src/compile_tensorrt.py

# Packaging model

In [8]:
! pwd


/home/ec2-user/SageMaker/Accelerate-Deep-Learning-Workloads-with-Amazon-SageMaker/chapter9


In [14]:
%%writefile ./3_src/resnet50/config.pbtxt
name: "resnet50"
platform: "pytorch_libtorch"
max_batch_size : 0
input [
  {
    name: "input__0"
    data_type: TYPE_FP32
    dims: [ 3, 224, 224 ]
    reshape { shape: [ 1, 3, 224, 224 ] }
  }
]
output [
  {
    name: "output__0"
    data_type: TYPE_FP32
    dims: [ 1, 1000 ,1, 1]
    reshape { shape: [ 1, 1000 ] }
  }
]
dynamic_batching {
   preferred_batch_size: 16
   max_queue_delay_microseconds: 1000
 }
instance_group {
  count: 1
  kind: KIND_GPU
}

Overwriting ./3_src/resnet50/config.pbtxt


In [15]:
# here we need to cd first inside 3_src and then archive
!tar -czvf resnet50.tar.gz resnet50

3_src/resnet50/
3_src/resnet50/1/
3_src/resnet50/1/model.pt
3_src/resnet50/.ipynb_checkpoints/
3_src/resnet50/.ipynb_checkpoints/config-checkpoint.pbtxt
3_src/resnet50/config.pbtxt


In [3]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()
role="arn:aws:iam::941656036254:role/service-role/AmazonSageMaker-ExecutionRole-20210904T193230" # TODO: this has to be replaced

bucket = sagemaker_session.default_bucket()
prefix = 'triton'
s3_path = 's3://{}/{}'.format(bucket, prefix)

In [19]:

model_data = sagemaker_session.upload_data("resnet50.tar.gz",
                                           bucket,
                                           prefix)

In [2]:
model_data = "s3://sagemaker-us-east-1-941656036254/triton/resnet50.tar.gz"

# Deploy inference endpoint

Triton containers are as of now not supported by SageMaker Python SDK. Hence, we will use `boto3` for deployment.

In [4]:
import boto3, json,  time
from sagemaker import get_execution_role

sm_client = boto3.client(service_name="sagemaker")
runtime_sm_client = boto3.client("sagemaker-runtime")


In [5]:
account_id_map = {
    'us-east-1': '785573368785',
    'us-east-2': '007439368137',
    'us-west-1': '710691900526',
    'us-west-2': '301217895009',
    'eu-west-1': '802834080501',
    'eu-west-2': '205493899709',
    'eu-west-3': '254080097072',
    'eu-north-1': '601324751636',
    'eu-south-1': '966458181534',
    'eu-central-1': '746233611703',
    'ap-east-1': '110948597952',
    'ap-south-1': '763008648453',
    'ap-northeast-1': '941853720454',
    'ap-northeast-2': '151534178276',
    'ap-southeast-1': '324986816169',
    'ap-southeast-2': '355873309152',
    'cn-northwest-1': '474822919863',
    'cn-north-1': '472730292857',
    'sa-east-1': '756306329178',
    'ca-central-1': '464438896020',
    'me-south-1': '836785723513',
    'af-south-1': '774647643957'
}

region = boto3.Session().region_name
if region not in account_id_map.keys():
    raise("UNSUPPORTED REGION")

base = "amazonaws.com.cn" if region.startswith("cn-") else "amazonaws.com"
triton_image_uri = "{account_id}.dkr.ecr.{region}.{base}/sagemaker-tritonserver:21.08-py3".format(
    account_id=account_id_map[region], region=region, base=base
)

In [6]:
triton_image_uri

'785573368785.dkr.ecr.us-east-1.amazonaws.com/sagemaker-tritonserver:21.08-py3'

In [12]:
sm_model_name = "triton-resnet50-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

container = {
    "Image": triton_image_uri,
    "ModelDataUrl": model_data,
    "Environment": {"SAGEMAKER_TRITON_DEFAULT_MODEL_NAME": "model"},
}

create_model_response = sm_client.create_model(
    ModelName=sm_model_name, ExecutionRoleArn=role, PrimaryContainer=container
)

print("Model Arn: " + create_model_response["ModelArn"])

Model Arn: arn:aws:sagemaker:us-east-1:941656036254:model/triton-resnet50-2022-08-09-19-46-04


In [13]:
endpoint_config_name = "triton-resnet50-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "InstanceType": "ml.g4dn.4xlarge",
            "InitialVariantWeight": 1,
            "InitialInstanceCount": 1,
            "ModelName": sm_model_name,
            "VariantName": "AllTraffic",
        }
    ],
)

print("Endpoint Config Arn: " + create_endpoint_config_response["EndpointConfigArn"])

Endpoint Config Arn: arn:aws:sagemaker:us-east-1:941656036254:endpoint-config/triton-resnet50-2022-08-09-19-46-05


In [14]:
endpoint_name = "triton-resnet50-" + time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

create_endpoint_response = sm_client.create_endpoint(
    EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name
)

print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

Endpoint Arn: arn:aws:sagemaker:us-east-1:941656036254:endpoint/triton-resnet50-2022-08-09-19-46-06


In [15]:
resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp["EndpointStatus"]
print("Status: " + status)

while status == "Creating":
    time.sleep(60)
    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
    status = resp["EndpointStatus"]
    print("Status: " + status)

print("Arn: " + resp["EndpointArn"])
print("Status: " + status)

Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Creating
Status: Failed
Arn: arn:aws:sagemaker:us-east-1:941656036254:endpoint/triton-resnet50-2022-08-09-19-46-06
Status: Failed


# Creating Inference Client

In [None]:
! wget  -O img1.jpg "https://www.hakaimagazine.com/wp-content/uploads/header-gulf-birds.jpg"