In [1]:
!cat container/Dockerfile

# Build an image that can do training and inference in SageMaker
# This is a Python 2 image that uses the nginx, gunicorn, flask stack
# for serving inferences in a stable way.

FROM ubuntu:20.04

MAINTAINER Amazon AI <sage-learner@amazon.com>

ARG PYTHON_VERSION_TAG=3.8.3
ARG LINK_PYTHON_TO_PYTHON3=1

RUN apt-get -y update && apt-get install -y --no-install-recommends \
         wget \
         nginx \
         ca-certificates \
    && rm -rf /var/lib/apt/lists/*
    
RUN apt-get -qq -y update && \
    DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
        gcc \
        g++ \
        zlibc \
        zlib1g-dev \
        libssl-dev \
        libbz2-dev \
        libsqlite3-dev \
        libncurses5-dev \
        libgdbm-dev \
        libgdbm-compat-dev \
        liblzma-dev \
        libreadline-dev \
        uuid-dev \
        libffi-dev \
        tk-dev \
        curl \
        git \
        make \
        sudo \
        bash-completion 

In [1]:
%cd container
!docker build -t port-cnn-15 .
%cd ../

/home/ec2-user/SageMaker/protein-annotation/inference-container/container
Sending build context to Docker daemon  377.9kB
Step 1/17 : FROM ubuntu:20.04
 ---> f643c72bc252
Step 2/17 : MAINTAINER Amazon AI <sage-learner@amazon.com>
 ---> Using cache
 ---> 326b3d86eee1
Step 3/17 : ARG PYTHON_VERSION_TAG=3.8.3
 ---> Using cache
 ---> 439310e2c915
Step 4/17 : ARG LINK_PYTHON_TO_PYTHON3=1
 ---> Using cache
 ---> dbb5d06e3d11
Step 5/17 : RUN apt-get -y update && apt-get install -y --no-install-recommends          wget          nginx          ca-certificates     && rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> 5f43ad46e6dd
Step 6/17 : RUN apt-get -qq -y update &&     DEBIAN_FRONTEND=noninteractive apt-get -qq -y install         gcc         g++         zlibc         zlib1g-dev         libssl-dev         libbz2-dev         libsqlite3-dev         libncurses5-dev         libgdbm-dev         libgdbm-compat-dev         liblzma-dev         libreadline-dev         uuid-dev         libffi-dev    

In [2]:
import boto3

account_id = boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name

ecr_repository = 'port-cnn-15'
tag = ':latest'
uri_suffix = 'amazonaws.com'
port_cnn_uri = '{}.dkr.ecr.{}.{}/{}'.format(account_id, region, uri_suffix, ecr_repository + tag)

# Create ECR repository and push docker image
!$(aws ecr get-login --region $region --registry-ids $account_id --no-include-email)
!aws ecr create-repository --repository-name $ecr_repository
!docker tag {ecr_repository + tag} $port_cnn_uri
!docker push $port_cnn_uri

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
{
    "repository": {
        "repositoryArn": "arn:aws:ecr:us-east-1:877465308896:repository/port-cnn-15",
        "registryId": "877465308896",
        "repositoryName": "port-cnn-15",
        "repositoryUri": "877465308896.dkr.ecr.us-east-1.amazonaws.com/port-cnn-15",
        "createdAt": 1607546024.0,
        "imageTagMutability": "MUTABLE",
        "imageScanningConfiguration": {
            "scanOnPush": false
        },
        "encryptionConfiguration": {
            "encryptionType": "AES256"
        }
    }
}
The push refers to repository [877465308896.dkr.ecr.us-east-1.amazonaws.com/port-cnn-15]

[1Bc53ef381: Preparing 
[1Bc0f5558d: Preparing 
[1Bec89fa3b: Preparing 
[1Ba32e9379: Preparing 
[1B4d331419: Preparing 
[1B62557a9d: Preparing 
[1Bb99c5c09: Preparing 
[1B3634dc78: Preparing 
[1Bf84dbbe9: Preparing 


[9Bc0f5558d: Pushed   1.637GB/1.619GB[6A[2K[9A[2K[7A[2K[10A[2K[9A[2K[9A[2K[3A[2K[9A[2K[7A[2K[9A[2K[7A[2K[5A[2K[7A[2K[5A[2K[9A[2K[5A[2K[2A[2K[4A[2K[5A[2K[4A[2K[9A[2K[7A[2K[4A[2K[7A[2K[4A[2K[7A[2K[5A[2K[7A[2K[4A[2K[9A[2K[7A[2K[9A[2K[7A[2K[1A[2K[7A[2K[4A[2K[5A[2K[7A[2K[1A[2K[7A[2K[9A[2K[7A[2K[5A[2K[7A[2K[5A[2K[7A[2K[1A[2K[7A[2K[1A[2K[7A[2K[4A[2K[1A[2K[4A[2K[1A[2K[9A[2K[5A[2K[1A[2K[5A[2K[1A[2K[1A[2K[7A[2K[1A[2K[7A[2K[1A[2K[7A[2K[9A[2K[5A[2K[9A[2K[5A[2K[4A[2K[9A[2K[4A[2K[9A[2K[7A[2K[9A[2K[7A[2K[4A[2K[1A[2K[9A[2K[5A[2K[9A[2K[5A[2K[7A[2K[1A[2K[7A[2K[9A[2K[1A[2K[9A[2K[4A[2K[9A[2K[5A[2K[7A[2K[5A[2K[7A[2K[5A[2K[9A[2K[1A[2K[9A[2K[7A[2K[9A[2K[1A[2K[9A[2K[5A[2K[9A[2K[5A[2K[7A[2K[9A[2K[5A[2K[9A[2K[5A[2K[1A[2K[9A[2K[9A[2K[5A[2K[9A[2K[7A[2K[7A[2K[7A[2K[7A[2K[1A[2K

In [3]:
# S3 prefix
prefix = 'ProtCNN-Endpoint'

# Define IAM role
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()

In [4]:
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

In [20]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
#image = '{}.dkr.ecr.{}.amazonaws.com/port-cnn:latest'.format(account, region)
model_name = "PortCNN-prediction-10"

In [21]:
image = '{}.dkr.ecr.{}.amazonaws.com/port-cnn-15:latest'.format(account, region)

In [22]:
image

'877465308896.dkr.ecr.us-east-1.amazonaws.com/port-cnn-15:latest'

In [23]:
sage = boto3.Session().client(service_name='sagemaker')

In [24]:
primary_container = {
    'Image': image,
    'ModelDataUrl': "s3://sagemaker-us-east-1-877465308896/tensorflow-training-201202-1559-002-4534ac0c/output/model.tar.gz",
}

create_model_response = sage.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    PrimaryContainer = primary_container)

In [25]:
job_name_prefix = 'PortCNN-inference'

In [26]:
import time
from time import gmtime, strftime

timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
endpoint_config_name = job_name_prefix + '-epc-' + timestamp
endpoint_config_response = sage.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType':'ml.m5.4xlarge', #'ml.m5.4xlarge'
        'InitialInstanceCount':1,
        'ModelName':model_name,
        'VariantName':'AllTraffic'}])

print('Endpoint configuration name: {}'.format(endpoint_config_name))
print('Endpoint configuration arn:  {}'.format(endpoint_config_response['EndpointConfigArn']))

Endpoint configuration name: PortCNN-inference-epc--2020-12-09-21-22-50
Endpoint configuration arn:  arn:aws:sagemaker:us-east-1:877465308896:endpoint-config/portcnn-inference-epc--2020-12-09-21-22-50


In [27]:
sagemaker = boto3.client(service_name='sagemaker')

In [28]:
## Here

In [29]:
%%time
import time

timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
endpoint_name = job_name_prefix + '-ep-' + timestamp
print('Endpoint name: {}'.format(endpoint_name))

endpoint_params = {
    'EndpointName': endpoint_name,
    'EndpointConfigName': endpoint_config_name,
}
endpoint_response = sagemaker.create_endpoint(**endpoint_params)
print('EndpointArn = {}'.format(endpoint_response['EndpointArn']))

Endpoint name: PortCNN-inference-ep--2020-12-09-21-22-55
EndpointArn = arn:aws:sagemaker:us-east-1:877465308896:endpoint/portcnn-inference-ep--2020-12-09-21-22-55
CPU times: user 7.76 ms, sys: 4.05 ms, total: 11.8 ms
Wall time: 218 ms


In [30]:
# get the status of the endpoint
response = sagemaker.describe_endpoint(EndpointName=endpoint_name)
status = response['EndpointStatus']
print('EndpointStatus = {}'.format(status))


# wait until the status has changed
sagemaker.get_waiter('endpoint_in_service').wait(EndpointName=endpoint_name)


# print the status of the endpoint
endpoint_response = sagemaker.describe_endpoint(EndpointName=endpoint_name)
status = endpoint_response['EndpointStatus']
print('Endpoint creation ended with EndpointStatus = {}'.format(status))

if status != 'InService':
    raise Exception('Endpoint creation failed.')

EndpointStatus = Creating
Endpoint creation ended with EndpointStatus = InService


In [31]:
s3 = boto3.resource('s3')

In [32]:
obj = s3.Object("sagemaker-us-east-1-877465308896","test-2.csv")

In [33]:
payloads = obj.get()['Body'].read().decode('utf-8')

In [34]:
for element in payloads.split("\r\n")[:-1]:
    print(element)

AFLFSGRREVMADACLQGMMGCVYGTAGGMDSAAAVLGDFCFLAGKPEERLIAWDYGRQYLLLAPPDAAWRELIKKVLGDRAREHTRYAIKKEGDCFDPGRLRTLAETLPAGITLSRIHGELYGKCLKEEWSRDLVSCFPSCEAYEAMGLGVAALRGNELLAGASSYARSRDAIEIEIDTREDMRNRGLASACGAALILECLERGLYPSWDAHTEISAALAEKLGYHVSHPYVVY
MVDVGGKPVSRRTAAASATVLLGEKAFWLVKENQLAKGDALAVAQIAGIMAAKQTSALIPLCHPIPLDRVAVSLELVEPGWRVVVTATCVASGRTGVEMEALTAASLAALALYDMCKAVTRDIVIQDVRLLSKTGG
VLDVACGTCDVAMEARNQTGDAAFIIGTDFSPGMLTLGLQKLKKNRRFATIPLVCANALALPFQSTHFDAVLIAFGIRNIMDRKGALKQFHDALKPGG
VVLERASLESVKVGKEYQLLNCDRHKGIAKKFKRDISTCRPDITHQCLLMLMDSPLNRAGLLQVFIRTEKNILIEINPQTRIPRTFDRFCGLMVQLLQKFSIHALDGNVKLLKVIKNPITDHFPNGCMKIGTSFSAEVVQDPTSVMTSTTNNDDDDAPIVFVVGAISRGSIDVDYVEKTISLSSYPLSAALTCAKLCGAFE
MVDVGGKPVSRRTAAASATVLLGEKAFWLVKENQLAKGDALAVAQIAGIMAAKQTSALIPLCHPIPLDRVAVSLELVEPGWRVVVTATCVASGRTGVEMEALTAASLAALALYDMCKAVTRDIVIQDVRLLSKTGG


In [35]:
payloads = payloads.split("\r\n")

In [36]:
import boto3
runtime = boto3.Session().client(service_name='runtime.sagemaker') 

In [37]:
import re

In [38]:
response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                           ContentType= 'text/csv', 
                                           Body=payloads[0] + "\r\n")
print(response)

{'ResponseMetadata': {'RequestId': 'ac0e7f8d-d99c-45a6-b8ba-371894aa6f0d', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'ac0e7f8d-d99c-45a6-b8ba-371894aa6f0d', 'x-amzn-invoked-production-variant': 'AllTraffic', 'date': 'Wed, 9 Dec 2020 21:37:04 GMT', 'content-type': 'text/csv; charset=utf-8', 'content-length': '46'}, 'RetryAttempts': 0}, 'ContentType': 'text/csv; charset=utf-8', 'InvokedProductionVariant': 'AllTraffic', 'Body': <botocore.response.StreamingBody object at 0x7fb5b36b53c8>}


In [39]:
r = response['Body'].read().decode()

In [40]:
r

'"(\'PF12746.7\', \'GNAT_acetyltran\', 0.9372136)"\n'

In [112]:
# Match ' OR " OR ( OR ) OR \n
re.sub("\'|\"|\(|\)|\n", '', r).split(", ")

['PF12746.7', 'GNAT_acetyltran', '0.9372136']

In [111]:
type(re.sub("\'|\"|\(|\)|\n", '', r).split(", ")[1])

str

In [41]:
import json

In [42]:
responses = []
for payload in payloads:
    if (payload != ''):
        response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                           ContentType= 'text/csv', 
                                           Body=payload + "\r\n")
        response = json.loads(response['Body'].read().decode())
        response = re.sub("\'|\"|\(|\)|\n", '', response).split(", ")
        response.append(payload)
        
        responses.append(tuple(response))

In [43]:
responses

[('PF12746.7',
  'GNAT_acetyltran',
  '0.9372136',
  'AFLFSGRREVMADACLQGMMGCVYGTAGGMDSAAAVLGDFCFLAGKPEERLIAWDYGRQYLLLAPPDAAWRELIKKVLGDRAREHTRYAIKKEGDCFDPGRLRTLAETLPAGITLSRIHGELYGKCLKEEWSRDLVSCFPSCEAYEAMGLGVAALRGNELLAGASSYARSRDAIEIEIDTREDMRNRGLASACGAALILECLERGLYPSWDAHTEISAALAEKLGYHVSHPYVVY'),
 ('PF01967.21',
  'MoaC',
  '0.99998176',
  'MVDVGGKPVSRRTAAASATVLLGEKAFWLVKENQLAKGDALAVAQIAGIMAAKQTSALIPLCHPIPLDRVAVSLELVEPGWRVVVTATCVASGRTGVEMEALTAASLAALALYDMCKAVTRDIVIQDVRLLSKTGG'),
 ('PF13649.6',
  'Methyltransf_25',
  '0.99475324',
  'VLDVACGTCDVAMEARNQTGDAAFIIGTDFSPGMLTLGLQKLKKNRRFATIPLVCANALALPFQSTHFDAVLIAFGIRNIMDRKGALKQFHDALKPGG'),
 ('PF03587.14',
  'EMG1',
  '0.99998105',
  'VVLERASLESVKVGKEYQLLNCDRHKGIAKKFKRDISTCRPDITHQCLLMLMDSPLNRAGLLQVFIRTEKNILIEINPQTRIPRTFDRFCGLMVQLLQKFSIHALDGNVKLLKVIKNPITDHFPNGCMKIGTSFSAEVVQDPTSVMTSTTNNDDDDAPIVFVVGAISRGSIDVDYVEKTISLSSYPLSAALTCAKLCGAFE'),
 ('PF01967.21',
  'MoaC',
  '0.99998176',
  'MVDVGGKPVSRRTAAASATVLLGEKAFWLVKENQLAKGDALAVAQIAGIMAAKQTSALIPLCHPIPLDRV

In [44]:
for a, b, c, d in responses:
    print(a)
    print(b)
    print(c)
    print(d)
    print("sssssssssssssssssssssss")

PF12746.7
GNAT_acetyltran
0.9372136
AFLFSGRREVMADACLQGMMGCVYGTAGGMDSAAAVLGDFCFLAGKPEERLIAWDYGRQYLLLAPPDAAWRELIKKVLGDRAREHTRYAIKKEGDCFDPGRLRTLAETLPAGITLSRIHGELYGKCLKEEWSRDLVSCFPSCEAYEAMGLGVAALRGNELLAGASSYARSRDAIEIEIDTREDMRNRGLASACGAALILECLERGLYPSWDAHTEISAALAEKLGYHVSHPYVVY
sssssssssssssssssssssss
PF01967.21
MoaC
0.99998176
MVDVGGKPVSRRTAAASATVLLGEKAFWLVKENQLAKGDALAVAQIAGIMAAKQTSALIPLCHPIPLDRVAVSLELVEPGWRVVVTATCVASGRTGVEMEALTAASLAALALYDMCKAVTRDIVIQDVRLLSKTGG
sssssssssssssssssssssss
PF13649.6
Methyltransf_25
0.99475324
VLDVACGTCDVAMEARNQTGDAAFIIGTDFSPGMLTLGLQKLKKNRRFATIPLVCANALALPFQSTHFDAVLIAFGIRNIMDRKGALKQFHDALKPGG
sssssssssssssssssssssss
PF03587.14
EMG1
0.99998105
VVLERASLESVKVGKEYQLLNCDRHKGIAKKFKRDISTCRPDITHQCLLMLMDSPLNRAGLLQVFIRTEKNILIEINPQTRIPRTFDRFCGLMVQLLQKFSIHALDGNVKLLKVIKNPITDHFPNGCMKIGTSFSAEVVQDPTSVMTSTTNNDDDDAPIVFVVGAISRGSIDVDYVEKTISLSSYPLSAALTCAKLCGAFE
sssssssssssssssssssssss
PF01967.21
MoaC
0.99998176
MVDVGGKPVSRRTAAASATVLLGEKAFWLVKENQLAKGDALAVAQIAGIMAAKQTSALIPLCHPIPLDRVAVSLEL

In [27]:
client = boto3.client("s3")
import codecs
import csv

In [28]:
data = client.get_object(Bucket="sagemaker-us-east-1-877465308896", Key="dict_class.csv")
protein_classes = {}
    
for protein_classes in csv.DictReader(codecs.getreader("utf-8")(data["Body"])):
    pass



In [45]:
import sagemaker

In [46]:
input_data_path = 's3://{}/{}/'.format("sagemaker-us-east-1-877465308896", 'batch-data')
output_data_path = 's3://{}/{}/'.format("sagemaker-us-east-1-877465308896", 'batch-results')

In [47]:
input_data_path, output_data_path

('s3://sagemaker-us-east-1-877465308896/batch-data/',
 's3://sagemaker-us-east-1-877465308896/batch-results/')

In [49]:
timestamp_prefix = "7"
job_name = 'serial-inference-batch-' + timestamp_prefix
transformer = sagemaker.transformer.Transformer(
    # This was the model created using PipelineModel and it contains feature processing and XGBoost
    model_name = model_name,
    instance_count = 1,
    instance_type = 'ml.m5.4xlarge',
    max_payload = 1,
    strategy = 'SingleRecord',
    assemble_with = 'Line',
    output_path = output_data_path,
    base_transform_job_name='serial-inference-batch',
    sagemaker_session=sess,
    accept = "text/csv"
)
transformer.transform(data = input_data_path,
                      job_name = job_name,
                      content_type = "text/csv", 
                      split_type = 'Line')
transformer.wait()

...................................................
[34mStarting the inference server with 16 workers.[0m
[34m2020/12/09 21:46:17 [crit] 10#10: *1 connect() to unix:/tmp/gunicorn.sock failed (2: No such file or directory) while connecting to upstream, client: 169.254.255.130, server: , request: "GET /ping HTTP/1.1", upstream: "http://unix:/tmp/gunicorn.sock:/ping", host: "169.254.255.131:8080"[0m
[34m169.254.255.130 - - [09/Dec/2020:21:46:17 +0000] "GET /ping HTTP/1.1" 502 166 "-" "Go-http-client/1.1"[0m
[34m2020/12/09 21:46:18 [crit] 10#10: *3 connect() to unix:/tmp/gunicorn.sock failed (2: No such file or directory) while connecting to upstream, client: 169.254.255.130, server: , request: "GET /ping HTTP/1.1", upstream: "http://unix:/tmp/gunicorn.sock:/ping", host: "169.254.255.131:8080"[0m
[34m169.254.255.130 - - [09/Dec/2020:21:46:18 +0000] "GET /ping HTTP/1.1" 502 166 "-" "Go-http-client/1.1"[0m
[34m[2020-12-09 21:46:18 +0000] [9] [INFO] Starting gunicorn 20.0.4[0m
[3

[32m2020-12-09T21:46:27.411:[sagemaker logs]: MaxConcurrentTransforms=1, MaxPayloadInMB=1, BatchStrategy=SINGLE_RECORD[0m
[34mInvoked with 1 records[0m
[34mMaking predictions...[0m
[34m169.254.255.130 - - [09/Dec/2020:21:46:28 +0000] "POST /invocations HTTP/1.1" 200 37 "-" "Go-http-client/1.1"[0m
[34mInvoked with 1 records[0m
[34mMaking predictions...[0m
[34m169.254.255.130 - - [09/Dec/2020:21:46:28 +0000] "POST /invocations HTTP/1.1" 200 43 "-" "Go-http-client/1.1"[0m
[34mInvoked with 1 records[0m
[34mMaking predictions...[0m
[34m169.254.255.130 - - [09/Dec/2020:21:46:29 +0000] "POST /invocations HTTP/1.1" 200 37 "-" "Go-http-client/1.1"[0m
[34mInvoked with 1 records[0m
[34mMaking predictions...[0m
[34m169.254.255.130 - - [09/Dec/2020:21:46:29 +0000] "POST /invocations HTTP/1.1" 200 44 "-" "Go-http-client/1.1"[0m
[34mInvoked with 1 records[0m
[34mMaking predictions...[0m
[34m169.254.255.130 - - [09/Dec/2020:21:46:29 +0000] "POST /invocations HTTP/1.1" 20

[34m2020-12-09 21:46:18.873633: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.[0m
[34m2020-12-09 21:46:18.915190: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory[0m
[34m2020-12-09 21:46:18.915463: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.[0m
[34m2020-12-09 21:46:18.987821: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory[0m
[34m2020-12-09 21:46:18.987870: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.[0m
[34m2020-12-09 21:46

In [74]:
r = "this test\n"

In [79]:
r[-1]

'\n'

In [78]:
r.strip()[-1]

't'