In [94]:
import numpy as np
from numpy import array
import matplotlib.pyplot as plt
import pandas as pd
import boto3
import re
from sagemaker import get_execution_role
import os
import io
import time
import json
import sagemaker.amazon.common as smac
from sklearn.model_selection import train_test_split

bucket = 'qs-demo-bgf'
prefix = 'transformed-data-no-header'

role = get_execution_role()

In [95]:
#load data 
s3 = boto3.resource('s3')

KEY = prefix+'/transformed_data_no_header.csv'
print(KEY)

s3.Bucket(bucket).download_file(KEY, 'transformed_data_no_header.csv')


transformed-data-no-header/transformed_data_no_header.csv


In [96]:
covtype = pd.read_csv('transformed_data_no_header.csv',dtype='float32').values
print(covtype.shape)
covtype_features, covtype_labels = covtype[:, :2], covtype[:, 2]
np.random.seed(0)
train_features, test_features, train_labels, test_labels = train_test_split(
    covtype_features, covtype_labels, test_size=0.2)
# further split the test set into validation and test sets
val_features, test_features, val_labels, test_labels = train_test_split(
    test_features, test_labels, test_size=0.5)

(21817, 3)


In [97]:
print(train_features[0])
print(train_labels[0])


[ 5. 19.]
0.0


In [98]:
import sagemaker
from sagemaker.amazon.amazon_estimator import RecordSet
import boto3

# instantiate the LinearLearner estimator object
multiclass_estimator = sagemaker.LinearLearner(role=sagemaker.get_execution_role(),
                                               train_instance_count=1,
                                               train_instance_type='ml.m4.xlarge',
                                               predictor_type='multiclass_classifier',
                                               num_classes=3)

In [99]:
# wrap data in RecordSet objects
train_records = multiclass_estimator.record_set(train_features, train_labels, channel='train')
val_records = multiclass_estimator.record_set(val_features, val_labels, channel='validation')
test_records = multiclass_estimator.record_set(test_features, test_labels, channel='test')

In [100]:
# start a training job
multiclass_estimator.fit([train_records, val_records, test_records])

2019-11-04 01:57:20 Starting - Starting the training job...
2019-11-04 01:57:22 Starting - Launching requested ML instances......
2019-11-04 01:58:21 Starting - Preparing the instances for training...
2019-11-04 01:59:19 Downloading - Downloading input data......
2019-11-04 02:00:17 Training - Training image download completed. Training in progress.
2019-11-04 02:00:17 Uploading - Uploading generated training model[31mDocker entrypoint called with argument(s): train[0m
[31m[11/04/2019 02:00:14 INFO 140226076981056] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'feature_dim': u'auto', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'auto', u'lr_sc


2019-11-04 02:00:24 Completed - Training job completed
Training seconds: 65
Billable seconds: 65


In [101]:
# deploy a model hosting endpoint
multiclass_predictor = multiclass_estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

--------------------------------------------------------------------------------------------------!

In [2]:

area = {"Queens":1.0,
"Staten Island":2.0,
"Brooklyn":3.0,
"Bronx":4.0,
"Manhattan":5.0}

cat = {
    "Hotdogs/Pretzels":1.0,
    "Donuts":2.0,
    "Bangladeshi":3.0,
    "Caribbean":4.0,
    "Chicken":5.0,
    "English":6.0,
    "Pizza":7.0,
    "Vegetarian":8.0,
    "Irish":9.0,
    "Not Listed/Not Applicable":10.0
}


In [5]:
import json 
import boto3 
client = boto3.client('runtime.sagemaker')

region = area["Manhattan"]
category = 54

values = str(region)+','+str(category)

response = client.invoke_endpoint(EndpointName='linear-learner-2019-11-04-01-57-20-572',
                                  ContentType='text/csv',
                                  Body=values)
result = json.loads(response['Body'].read().decode())

predict = result['predictions'][0]
print(predict)

grade = predict['predicted_label']

if(grade==0.0):
    letter = "A"
elif(grade==1.0):
    letter = "B"
else:
    letter = "C"
    
print("\nRestaurant Grade: "+letter)


{'score': [0.9371309280395508, 0.048006024211645126, 0.014863001182675362], 'predicted_label': 0.0}

Restaurant Grade: A
