In [38]:
%%sh

# The name of our algorithm
algorithm_name=sagemaker-word2vec

cd container

chmod +x decision_trees/train
chmod +x decision_trees/serve

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-1 if none defined)
region=$(aws configure get region)
region=${region:-us-east-1}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Login Succeeded
Sending build context to Docker daemon  45.31MB
Step 1/9 : FROM ubuntu:16.04
 ---> c6a43cd4801e
Step 2/9 : MAINTAINER Amazon AI <sage-learner@amazon.com>
 ---> Using cache
 ---> f734f6968025
Step 3/9 : RUN apt-get -y update && apt-get install -y --no-install-recommends          wget          python          nginx          ca-certificates     && rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> 5e2e40379c41
Step 4/9 : RUN wget https://bootstrap.pypa.io/get-pip.py && python get-pip.py &&     pip install numpy==1.16.2 scipy==1.2.1 scikit-learn==0.20.2 pandas flask gevent gunicorn gensim &&         (cd /usr/local/lib/python2.7/dist-packages/scipy/.libs; rm *; ln ../../numpy/.libs/* .) &&         rm -rf /root/.cache
 ---> Using cache
 ---> 5d673228e635
Step 5/9 : ENV PYTHONUNBUFFERED=TRUE
 ---> Using cache
 ---> 225ae1690031
Step 6/9 : ENV PYTHONDONTWRITEBYTECODE=TRUE
 ---> Using cache
 ---> 4a55ba822175
Step 7/9 : ENV PATH="/opt/program:${PATH}"
 ---> Using cache
 ---> b6

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



In [39]:
# S3 prefix
prefix = 'word2vec'

# Define IAM role
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()

In [25]:
from sagemaker.session import Session

model_data = Session().upload_data(path='model.tar.gz', key_prefix='model')
print(model_data)

s3://sagemaker-us-east-1-639634733305/model/model.tar.gz


In [40]:
import sagemaker as sage
from time import gmtime, strftime

sess = sage.Session()

In [41]:
# generating the image path
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/sagemaker-word2vec:latest'.format(account, region)

In [42]:
# to deploy the model, you need to have the model created based on your model artifacts, 
# create an endpoint configuration and then create the endpoint based on the two.
# first let's create the model

client = boto3.client('sagemaker')
ModelName='TestCx-BYOA' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
first_model = client.create_model(
    ModelName=ModelName,
    PrimaryContainer={
        'Image': image,
        'ModelDataUrl': 's3://sagemaker-us-east-1-639634733305/model/model.tar.gz'    # note that the model.tar.gz file is a tarball of our word2vec_2.model file
        },
    ExecutionRoleArn=role)

In [43]:
# create the endpoint configuration
endpoint_config_name = 'TestCx-BYOA-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_config_name)
create_endpoint_config_response = client.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType':'ml.m4.xlarge',
        'InitialVariantWeight':1,
        'InitialInstanceCount':1,
        'ModelName':ModelName,
        'VariantName':'AllTraffic'}])

print("Endpoint Config Arn: " + create_endpoint_config_response['EndpointConfigArn'])

TestCx-BYOA-2020-01-13-08-50-58
Endpoint Config Arn: arn:aws:sagemaker:us-east-1:639634733305:endpoint-config/testcx-byoa-2020-01-13-08-50-58


In [44]:
# create the endpoint
endpoint_name = 'TestCx-BYOA-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(endpoint_name)
create_endpoint_response = client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name)
print(create_endpoint_response['EndpointArn'])

TestCx-BYOA-2020-01-13-08-51-00
arn:aws:sagemaker:us-east-1:639634733305:endpoint/testcx-byoa-2020-01-13-08-51-00


In [None]:
from sagemaker.predictor import csv_serializer

In [1]:
from sagemaker.tensorflow.serving import Predictor

predictor = Predictor(endpoint_name = "TestCx-BYOA-2020-01-13-08-51-00")

In [2]:
dir(predictor)

['__class__',
 '__delattr__',
 '__dict__',
 '__doc__',
 '__format__',
 '__getattribute__',
 '__hash__',
 '__init__',
 '__module__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_classify_or_regress',
 '_create_request_args',
 '_delete_endpoint_config',
 '_endpoint_config_name',
 '_get_endpoint_config_name',
 '_get_model_names',
 '_handle_response',
 '_model_attributes',
 '_model_names',
 'accept',
 'classify',
 'content_type',
 'delete_endpoint',
 'delete_model',
 'deserializer',
 'disable_data_capture',
 'enable_data_capture',
 'endpoint',
 'list_monitors',
 'predict',
 'regress',
 'sagemaker_session',
 'serializer',
 'update_data_capture_config']

In [22]:
import pandas as pd
df = pd.read_csv('./data/word2vec_test_data.csv')
df

Unnamed: 0,movieid,userid,users,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,72777,nmadhuk3,nmadhuk3,tt0072777,movie,Chhoti Si Baat,Chhoti Si Baat,0,1976,2005,123,"Comedy,Romance"
1,72777,mishu_mausam,mishu_mausam,tt0072777,movie,Chhoti Si Baat,Chhoti Si Baat,0,1976,2005,123,"Comedy,Romance"
2,72777,Kaaliyaa,Kaaliyaa,tt0072777,movie,Chhoti Si Baat,Chhoti Si Baat,0,1976,2005,123,"Comedy,Romance"
3,72777,Peter_Young,Peter_Young,tt0072777,movie,Chhoti Si Baat,Chhoti Si Baat,0,1976,2005,123,"Comedy,Romance"
4,72777,bluegreensalad,bluegreensalad,tt0072777,movie,Chhoti Si Baat,Chhoti Si Baat,0,1976,2005,123,"Comedy,Romance"


In [4]:
data = data['movieid']
data

0    72777
1    72777
2    72777
3    72777
4    72777
Name: movieid, dtype: int64

In [52]:
data

0    72777
1    72777
2    72777
3    72777
4    72777
Name: movieid, dtype: int64

In [5]:
map(int, data)

[72777, 72777, 72777, 72777, 72777]

In [23]:
df = df.to_json()
df

'{"movieid":{"0":72777,"1":72777,"2":72777,"3":72777,"4":72777},"userid":{"0":"nmadhuk3","1":"mishu_mausam","2":"Kaaliyaa","3":"Peter_Young","4":"bluegreensalad"},"users":{"0":"nmadhuk3","1":"mishu_mausam","2":"Kaaliyaa","3":"Peter_Young","4":"bluegreensalad"},"tconst":{"0":"tt0072777","1":"tt0072777","2":"tt0072777","3":"tt0072777","4":"tt0072777"},"titleType":{"0":"movie","1":"movie","2":"movie","3":"movie","4":"movie"},"primaryTitle":{"0":"Chhoti Si Baat","1":"Chhoti Si Baat","2":"Chhoti Si Baat","3":"Chhoti Si Baat","4":"Chhoti Si Baat"},"originalTitle":{"0":"Chhoti Si Baat","1":"Chhoti Si Baat","2":"Chhoti Si Baat","3":"Chhoti Si Baat","4":"Chhoti Si Baat"},"isAdult":{"0":0,"1":0,"2":0,"3":0,"4":0},"startYear":{"0":1976,"1":1976,"2":1976,"3":1976,"4":1976},"endYear":{"0":2005,"1":2005,"2":2005,"3":2005,"4":2005},"runtimeMinutes":{"0":123,"1":123,"2":123,"3":123,"4":123},"genres":{"0":"Comedy,Romance","1":"Comedy,Romance","2":"Comedy,Romance","3":"Comedy,Romance","4":"Comedy,Romanc

In [25]:
predictor.predict(df)

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (415) from model with message "This predictor only supports CSV data". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/TestCx-BYOA-2020-01-13-08-51-00 in account 639634733305 for more information.