In [72]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
data_location = "s3://sagemaker-biz-demo/automatedpricing/data/base_prod.csv"
dataset = pd.read_csv(data_location)
dataset

Unnamed: 0,sku_code,material,box_code,bursting_factor,flute_type,inner_height_mm,height_inch,inner_height,length_inch,updated_at,quantity,status,company_id,per_unit
0,BZ-SKU-0000027,"""Top paper golden Kraft rest brown Kraft""","""PC1""","""24x22x22x22x22""","""BB""","""646""","""3.9""","""687""","""7.9""",2018-11-01,795,3,4606,6.710000
1,BZ-SKU-0000027,"""Top paper golden Kraft rest brown Kraft""","""PC1""","""24x22x22x22x22""","""BB""","""646""","""3.9""","""687""","""7.9""",2018-11-01,12490,3,4606,7.100000
2,BZ-SKU-0004768,"""Paper""","""Regular 5P34""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,10,3,4606,63.430000
3,BZ-SKU-0005542,"""Paper""","""5P34.2 Regular (3)""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,2165,3,9508,67.000000
4,BZ-SKU-0005542,"""Paper""","""5P34.2 Regular (3)""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,315,3,9508,67.000000
5,BZ-SKU-0004768,"""Paper""","""Regular 5P34""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,825,3,4606,67.229947
6,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1000,4,4606,6.930000
7,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1998,3,4606,7.340000
8,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1000,1,4606,6.930000
9,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1000,3,4606,7.860000


In [62]:
#Data Processing
from sklearn.preprocessing import LabelEncoder 
labelencoder_dataset = LabelEncoder()
dataset["sku_code"] = labelencoder_dataset.fit_transform(dataset["sku_code"])
dataset["material"] = labelencoder_dataset.fit_transform(dataset["material"])
dataset["box_code"] = labelencoder_dataset.fit_transform(dataset["box_code"])
dataset["bursting_factor"] = labelencoder_dataset.fit_transform(dataset["bursting_factor"])
dataset["flute_type"] = labelencoder_dataset.fit_transform(dataset["flute_type"])
dataset["updated_at"] = labelencoder_dataset.fit_transform(dataset["updated_at"])
dataset["company_id"] = labelencoder_dataset.fit_transform(dataset["company_id"])

dataset["inner_height_mm"] = list(map(lambda x: float(x.replace("\"", "" )), dataset["inner_height_mm"]))
dataset["height_inch"] = list(map(lambda x: float(x.replace("\"", "" )), dataset["height_inch"]))
dataset["inner_height"] = list(map(lambda x: float(x.replace("\"", "" )), dataset["inner_height"]))
dataset["length_inch"] = list(map(lambda x: float(x.replace("\"", "" )), dataset["length_inch"]))

modelData = np.array(dataset.iloc[:, 0:13]).astype('float32')
target = np.array(dataset.iloc[:, 13]).astype('float32')

from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
modelData = sc_X.fit_transform(modelData)

array([[1.000e+00, 1.000e+00, 1.500e+01, ..., 7.950e+02, 3.000e+00,
        0.000e+00],
       [1.000e+00, 1.000e+00, 1.500e+01, ..., 1.249e+04, 3.000e+00,
        0.000e+00],
       [2.100e+01, 0.000e+00, 1.600e+01, ..., 1.000e+01, 3.000e+00,
        0.000e+00],
       ...,
       [7.000e+00, 1.000e+00, 7.000e+00, ..., 1.000e+04, 1.000e+00,
        0.000e+00],
       [7.000e+00, 1.000e+00, 7.000e+00, ..., 2.000e+04, 1.000e+00,
        2.000e+00],
       [7.000e+00, 1.000e+00, 7.000e+00, ..., 2.000e+03, 3.000e+00,
        2.000e+00]], dtype=float32)

In [63]:
# Setup sagemaker
import boto3
import sagemaker
import io
import os
import sagemaker.amazon.common as smac

sess = sagemaker.Session()
bucket = "sagemaker-biz-demo"
prefix = "automatedpricing/test_results"

buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, modelData, target)
buf.seek(0)

key = 'linearlearner'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
print('uploaded training data location: {}'.format(s3_train_data))

output_location = 's3://{}/{}/output'.format(bucket, prefix)
print('training artifacts will be uploaded to: {}'.format(output_location))


In [64]:
#defining region
containers = {
              'ap-south-1': '991648021394.dkr.ecr.ap-south-1.amazonaws.com/linear-learner:latest'
              }

containers[boto3.Session().region_name]


uploaded training data location: s3://sagemaker-biz-demo/automatedpricing/test_results/train/linearlearner
training artifacts will be uploaded to: s3://sagemaker-biz-demo/automatedpricing/test_results/output


In [65]:
#setup execution 
from sagemaker import get_execution_role
role = get_execution_role()
linear = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],
                                       role=role, 
                                       train_instance_count=1, 
                                       train_instance_type='ml.c4.xlarge',
                                       output_path=output_location,
                                       sagemaker_session=sess)



In [66]:
linear.set_hyperparameters(feature_dim=13,
                           predictor_type='regressor',
                           mini_batch_size=100,
                           normalize_data=False)

linear.fit({'train': s3_train_data})
linear_predictor = linear.deploy(initial_instance_count=1,
                                 instance_type='ml.c4.xlarge')


2019-07-07 07:30:01 Starting - Starting the training job...
2019-07-07 07:30:05 Starting - Launching requested ML instances......
2019-07-07 07:31:07 Starting - Preparing the instances for training......
2019-07-07 07:32:29 Downloading - Downloading input data
2019-07-07 07:32:29 Training - Downloading the training image..
[31mDocker entrypoint called with argument(s): train[0m
[31m[07/07/2019 07:32:44 INFO 139741542360896] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'auto', u'lr_scheduler_step': u'auto', u'init_method': u'uniform', u'init_sigma': u'0.01', u'lr_scheduler_minimum_lr': u'auto', u'tar


2019-07-07 07:32:54 Uploading - Uploading generated training model
2019-07-07 07:32:54 Completed - Training job completed
Billable seconds: 42
-------------------------------------------------------------------------------------!

In [67]:
#train data
linear.fit({'train': s3_train_data})
linear_predictor = linear.deploy(initial_instance_count=1,
                                 instance_type='ml.c4.xlarge')



{'predictions': [{'score': 19.28443145751953}]}


In [68]:
#setup prediction
from sagemaker.predictor import csv_serializer, json_deserializer
linear_predictor.content_type = 'text/csv'
linear_predictor.serializer = csv_serializer
linear_predictor.deserializer = json_deserializer

Unnamed: 0,sku_code,material,box_code,bursting_factor,flute_type,inner_height_mm,height_inch,inner_height,length_inch,updated_at,quantity,status,company_id,per_unit,Predicted
0,1,1,15,5,1,646.00,3.9,687.00,7.9,46,795,3,0,6.710000,19.284431
1,1,1,15,5,1,646.00,3.9,687.00,7.9,46,12490,3,0,7.100000,18.713291
2,21,0,16,0,2,300.00,11.8,730.00,28.7,29,10,3,0,63.430000,67.455147
3,22,0,0,0,2,300.00,11.8,730.00,28.7,30,2165,3,2,67.000000,62.297081
4,22,0,0,0,2,300.00,11.8,730.00,28.7,30,315,3,2,67.000000,62.387428
5,21,0,16,0,2,300.00,11.8,730.00,28.7,29,825,3,0,67.229947,67.415344
6,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1000,4,0,6.930000,14.219876
7,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1998,3,0,7.340000,14.140439
8,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1000,1,0,6.930000,14.127779
9,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1000,3,0,7.860000,14.189178


In [69]:
# Pass the first row of data to the predictor
result = linear_predictor.predict(modelData[0])
print(result)

predictions = []
for array in modelData:
    result = linear_predictor.predict(array)
    predictions += [r['score'] for r in result['predictions']]
predictions = np.array(predictions)


Unnamed: 0,sku_code,material,box_code,bursting_factor,flute_type,inner_height_mm,height_inch,inner_height,length_inch,updated_at,quantity,status,company_id,per_unit,Predicted
0,1,1,15,5,1,646.00,3.9,687.00,7.9,46,795,3,0,6.710000,19.284431
1,1,1,15,5,1,646.00,3.9,687.00,7.9,46,12490,3,0,7.100000,18.713291
2,21,0,16,0,2,300.00,11.8,730.00,28.7,29,10,3,0,63.430000,67.455147
3,22,0,0,0,2,300.00,11.8,730.00,28.7,30,2165,3,2,67.000000,62.297081
4,22,0,0,0,2,300.00,11.8,730.00,28.7,30,315,3,2,67.000000,62.387428
5,21,0,16,0,2,300.00,11.8,730.00,28.7,29,825,3,0,67.229947,67.415344
6,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1000,4,0,6.930000,14.219876
7,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1998,3,0,7.340000,14.140439
8,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1000,1,0,6.930000,14.127779
9,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1000,3,0,7.860000,14.189178


In [70]:
# Push into our pandas dataframe
dataset['Predicted'] = predictions.astype('float32')

In [71]:
dataset

Unnamed: 0,sku_code,material,box_code,bursting_factor,flute_type,inner_height_mm,height_inch,inner_height,length_inch,updated_at,quantity,status,company_id,per_unit,Predicted
0,1,1,15,5,1,646.00,3.9,687.00,7.9,46,795,3,0,6.710000,19.284431
1,1,1,15,5,1,646.00,3.9,687.00,7.9,46,12490,3,0,7.100000,18.713291
2,21,0,16,0,2,300.00,11.8,730.00,28.7,29,10,3,0,63.430000,67.455147
3,22,0,0,0,2,300.00,11.8,730.00,28.7,30,2165,3,2,67.000000,62.297081
4,22,0,0,0,2,300.00,11.8,730.00,28.7,30,315,3,2,67.000000,62.387428
5,21,0,16,0,2,300.00,11.8,730.00,28.7,29,825,3,0,67.229947,67.415344
6,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1000,4,0,6.930000,14.219876
7,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1998,3,0,7.340000,14.140439
8,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1000,1,0,6.930000,14.127779
9,5,0,1,1,0,101.14,4.1,306.88,12.2,44,1000,3,0,7.860000,14.189178
