In [None]:
# Invoke SageMaker Endpoint
# This example shows how to invoke SageMaker Endpoint from outside of AWS environment using SageMaker SDK
# Reference:
#  https://github.com/awslabs/amazon-sagemaker-examples


# NOTE: SageMaker SDK behavior has changed since this example was originally released. 
#   SDK now requires additional permissions to DescribeEndpoint and DescribeEndpointConfig
#   Please update SageMakerInvokeEndpoint permissions to reflect this policy document:
#   Logon with my_admin account and update permissions (IAM->Policies->SageMakerInvokeEndpoint->Edit Policy)
#   
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": [
                "sagemaker:DescribeEndpointConfig",
                "sagemaker:DescribeEndpoint",
                "sagemaker:InvokeEndpoint"
            ],
            "Resource": "*"
        }
    ]
}

In [None]:
import numpy as np
import pandas as pd
import os

# Define IAM role
import boto3
import re

import sagemaker

In [None]:
# Establish a session with AWS
# Specify credentials to be used for this session.
# We will use a ml_user_predict credentials that has limited privileges
boto_session = boto3.Session(profile_name='ml_user_predict',region_name='us-east-1')

In [None]:
sess = sagemaker.Session(boto_session=boto_session)

In [None]:
# Get RealTimePredictor using SageMaker SDK
# Specify Your Endpoint Name
endpoint_name = 'xgboost-biketrain-v1'

predictor = sagemaker.predictor.RealTimePredictor(endpoint=endpoint_name,
                                                 sagemaker_session=sess)

In [None]:
# We are sending data for inference in CSV format
from sagemaker.predictor import csv_serializer, json_deserializer

predictor.content_type = 'text/csv'
predictor.serializer = csv_serializer
predictor.deserializer = None

In [None]:
# Run predictions invokes predict method of real-time predictor
# It splits the data into chunks
def run_predictions(arr_input, batch_size):
    predictions = []
    
    if type(arr_input) != np.ndarray:
        arr_input = np.array(arr_input)
        
    for arr in np.array_split(arr_input,batch_size):
        if arr.size > 0: 
            print("Shape:{0}".format(arr.shape))
            result = predictor.predict(arr)
            result = result.decode("utf-8")
            result = result.split(',')
            predictions += [np.expm1(float(r)) for r in result]
    
    return predictions

In [None]:
# Use the test file that was created in previous labs
# If needed, download test file from sagemaker notebook instance
df_test = pd.read_csv('bike_test.csv')

In [None]:
df_test.head()

In [None]:
# Convert to numpy array

tmp_df = df_test[['season', 'holiday', 'workingday', 'weather', 'temp',
       'atemp', 'humidity', 'windspeed', 'year', 'month', 'day', 'dayofweek','hour']].values

In [None]:
tmp_df.shape

In [None]:
tmp_df[0]

In [None]:
predicted_count = run_predictions(arr_input=tmp_df, batch_size=10)

In [None]:
predicted_count[:5]

In [None]:
df_test['count'] = predicted_count

In [None]:
df_test.head()

In [None]:
df_test['count'].describe()