In [None]:
import boto3
from sagemaker import get_execution_role

role = get_execution_role()

region = boto3.Session().region_name

# Enter the s3 bucket and path where you want to store the training and test data
bucket = 'bucket'
prefix = 'prefix'
bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region,bucket)

In [None]:
import urllib.request

# Get the test data set
# This data was originally obtained from Data retrieved from https://archive.ics.uci.edu/ml/datasets/Census+Income
urllib.request.urlretrieve("https://raw.githubusercontent.com/FINRAOS/CodeSamples/master/machine-learning-samples/src/main/resources/adult.test", "adult.test")

In [None]:
# List of the field names for the data set
fields = [
    'age',
    'workclass',
    'fnlwgt',
    'education',
    'education-num',
    'marital-status',
    'occupation',
    'relationship',
    'race',
    'sex',
    'capital-gain',
    'capital-loss',
    'hours-per-week',
    'native-country'
]

# Set indexes for categorical and continuous field types
categoricalFieldIndexes = [1, 3, 5, 6, 7, 8, 9, 13]
continuousFieldIndexes = [0, 2, 4, 10, 11, 12]

In [None]:
import pandas as pd

# Load the data set into a dataframe and format it
def load_and_format_data(csv_name):
    df = pd.read_csv(csv_name, header=None, names=fields + ['label'])

    df = df.reindex(columns=['label'] + fields)

    df.replace(regex='^ ', value = '', inplace=True)
    df.replace({'label' : '\.$'}, {'label' : ''}, regex=True, inplace=True)
    
    return df

test_data = load_and_format_data('adult.test')

In [None]:
from sagemaker.predictor import RealTimePredictor

# Add the endpoint name
endpoint = 'endpoint'

predictor = RealTimePredictor(endpoint, content_type='text/csv')

In [None]:
count = 0
index = 0

labels = test_data['label'].to_list()

# Check the endpoint result against the test data label for each row
for row in test_data.drop(columns=['label']).to_csv(header=None, index=False).splitlines():
    result = str(predictor.predict(row).rstrip(), 'utf-8')

    if labels[index] != result:
        count = count + 1
        
    index = index + 1

print('Test error rate: {}'.format(count / len(test_data['label'])))