In [173]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
data_location = "s3://sagemaker-biz-demo/automatedpricing/data/base_prod.csv"
dataset = pd.read_csv(data_location)
dataset

Unnamed: 0,sku_code,material,box_code,bursting_factor,flute_type,inner_height_mm,height_inch,inner_height,length_inch,updated_at,quantity,status,company_id,per_unit
0,BZ-SKU-0000027,"""Top paper golden Kraft rest brown Kraft""","""PC1""","""24x22x22x22x22""","""BB""","""646""","""3.9""","""687""","""7.9""",2018-11-01,795,3,4606,6.710000
1,BZ-SKU-0000027,"""Top paper golden Kraft rest brown Kraft""","""PC1""","""24x22x22x22x22""","""BB""","""646""","""3.9""","""687""","""7.9""",2018-11-01,12490,3,4606,7.100000
2,BZ-SKU-0004768,"""Paper""","""Regular 5P34""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,10,3,4606,63.430000
3,BZ-SKU-0005542,"""Paper""","""5P34.2 Regular (3)""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,2165,3,9508,67.000000
4,BZ-SKU-0005542,"""Paper""","""5P34.2 Regular (3)""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,315,3,9508,67.000000
5,BZ-SKU-0004768,"""Paper""","""Regular 5P34""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,825,3,4606,67.229947
6,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1000,4,4606,6.930000
7,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1998,3,4606,7.340000
8,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1000,1,4606,6.930000
9,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1000,3,4606,7.860000


In [174]:
#Data Processing
from sklearn.preprocessing import LabelEncoder 
labelencoder_dataset = LabelEncoder()
dataset["sku_code"] = labelencoder_dataset.fit_transform(dataset["sku_code"])
dataset["material"] = labelencoder_dataset.fit_transform(dataset["material"])
dataset["box_code"] = labelencoder_dataset.fit_transform(dataset["box_code"])
dataset["bursting_factor"] = labelencoder_dataset.fit_transform(dataset["bursting_factor"])
dataset["flute_type"] = labelencoder_dataset.fit_transform(dataset["flute_type"])
dataset["company_id"] = labelencoder_dataset.fit_transform(dataset["company_id"])

dataset["inner_height_mm"] = list(map(lambda x: float(x.replace("\"", "" )), dataset["inner_height_mm"]))
dataset["height_inch"] = list(map(lambda x: float(x.replace("\"", "" )), dataset["height_inch"]))
dataset["inner_height"] = list(map(lambda x: float(x.replace("\"", "" )), dataset["inner_height"]))
dataset["length_inch"] = list(map(lambda x: float(x.replace("\"", "" )), dataset["length_inch"]))


#changing date to year
from datetime import datetime
year = lambda x: datetime.strptime(x, "%Y-%m-%d" ).year
dataset['updated_at'] = dataset['updated_at'].map(year)
day_of_week = lambda x: datetime.strptime(x, "%Y-%m-%d" ).weekday()
month = lambda x: datetime.strptime(x, "%Y-%m-%d" ).month
# please read docs on how week numbers are calculate
week_number = lambda x: datetime.strptime(x, "%Y-%m-%d" ).strftime('%V')


#subtract year from current year
from datetime import date
year = date.today().year
dataset['updated_at']= year-dataset['updated_at']


#define modelData and target
modelData = np.array(dataset.iloc[:, 0:13]).astype('float32')
target = np.array(dataset.iloc[:, 13]).astype('float32')
dataset

Unnamed: 0,sku_code,material,box_code,bursting_factor,flute_type,inner_height_mm,height_inch,inner_height,length_inch,updated_at,quantity,status,company_id,per_unit
0,1,1,15,5,1,646.00,3.9,687.00,7.9,1,795,3,0,6.710000
1,1,1,15,5,1,646.00,3.9,687.00,7.9,1,12490,3,0,7.100000
2,21,0,16,0,2,300.00,11.8,730.00,28.7,1,10,3,0,63.430000
3,22,0,0,0,2,300.00,11.8,730.00,28.7,1,2165,3,2,67.000000
4,22,0,0,0,2,300.00,11.8,730.00,28.7,1,315,3,2,67.000000
5,21,0,16,0,2,300.00,11.8,730.00,28.7,1,825,3,0,67.229947
6,5,0,1,1,0,101.14,4.1,306.88,12.2,1,1000,4,0,6.930000
7,5,0,1,1,0,101.14,4.1,306.88,12.2,1,1998,3,0,7.340000
8,5,0,1,1,0,101.14,4.1,306.88,12.2,1,1000,1,0,6.930000
9,5,0,1,1,0,101.14,4.1,306.88,12.2,1,1000,3,0,7.860000


In [163]:
# Sagemaker setup
import boto3
import sagemaker
import io
import os
import sagemaker.amazon.common as smac

sess = sagemaker.Session()
bucket = "sagemaker-biz-demo"
prefix = "automatedpricing/test_results2"

buf = io.BytesIO()
smac.write_numpy_to_dense_tensor(buf, modelData, target)
buf.seek(0)

key = 'linearlearner'
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)
print('uploaded training data location: {}'.format(s3_train_data))

output_location = 's3://{}/{}/output'.format(bucket, prefix)
print('training artifacts will be uploaded to: {}'.format(output_location))


uploaded training data location: s3://sagemaker-biz-demo/automatedpricing/test_results2/train/linearlearner
training artifacts will be uploaded to: s3://sagemaker-biz-demo/automatedpricing/test_results2/output


In [164]:
containers = {
              'ap-south-1': '991648021394.dkr.ecr.ap-south-1.amazonaws.com/linear-learner:latest'
              }


In [165]:
containers[boto3.Session().region_name]

'991648021394.dkr.ecr.ap-south-1.amazonaws.com/linear-learner:latest'

In [166]:
#Execution setup
from sagemaker import get_execution_role
role = get_execution_role()
linear = sagemaker.estimator.Estimator(containers[boto3.Session().region_name],
                                       role=role, 
                                       train_instance_count=1, 
                                       train_instance_type='ml.c4.xlarge',
                                       output_path=output_location,
                                       sagemaker_session=sess)

In [167]:
#Hyperparameter setup
linear.set_hyperparameters(feature_dim=13,
                           predictor_type='regressor',
                           mini_batch_size=100,
                           normalize_data=False)


In [168]:
linear.fit({'train': s3_train_data})
linear_predictor = linear.deploy(initial_instance_count=1,
                                 instance_type='ml.c4.xlarge')

2019-07-09 12:51:52 Starting - Starting the training job...
2019-07-09 12:51:58 Starting - Launching requested ML instances.........
2019-07-09 12:53:27 Starting - Preparing the instances for training...
2019-07-09 12:54:19 Downloading - Downloading input data......
2019-07-09 12:55:15 Training - Training image download completed. Training in progress.
[31mDocker entrypoint called with argument(s): train[0m
[31m[07/09/2019 12:55:19 INFO 140213383800640] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', u'bias_lr_mult': u'auto', u'lr_scheduler_step': u'auto', u'init_method': u'uniform', u'init_sigma': u'0.01', u'lr_schedul

[31m[2019-07-09 12:55:19.797] [tensorio] [info] epoch_stats={"data_pipeline": "/opt/ml/input/data/train", "epoch": 8, "duration": 172, "num_examples": 9, "num_bytes": 85824}[0m
[31m#metrics {"Metrics": {"train_mse_objective": {"count": 1, "max": 793.0788537597656, "sum": 793.0788537597656, "min": 793.0788537597656}}, "EndTime": 1562676919.798098, "Dimensions": {"model": 0, "Host": "algo-1", "Operation": "training", "Algorithm": "Linear Learner", "epoch": 2}, "StartTime": 1562676919.798005}
[0m
[31m#metrics {"Metrics": {"train_mse_objective": {"count": 1, "max": 7614.258349456787, "sum": 7614.258349456787, "min": 7614.258349456787}}, "EndTime": 1562676919.798196, "Dimensions": {"model": 1, "Host": "algo-1", "Operation": "training", "Algorithm": "Linear Learner", "epoch": 2}, "StartTime": 1562676919.798175}
[0m
[31m#metrics {"Metrics": {"train_mse_objective": {"count": 1, "max": 7441.1615234375, "sum": 7441.1615234375, "min": 7441.1615234375}}, "EndTime": 1562676919.798256, "Dimen


2019-07-09 12:55:33 Uploading - Uploading generated training model
2019-07-09 12:55:33 Completed - Training job completed
Billable seconds: 75
--------------------------------------------------------------------------------------!

In [169]:
#  Prediction Set up
from sagemaker.predictor import csv_serializer, json_deserializer
linear_predictor.content_type = 'text/csv'
linear_predictor.serializer = csv_serializer
linear_predictor.deserializer = json_deserializer


In [170]:
# Pass the first row of data to the predictor
result = linear_predictor.predict(modelData[0])
print(result)


{'predictions': [{'score': 110.3026351928711}]}


In [171]:
predictions = []
for array in modelData:
    result = linear_predictor.predict(array)
    predictions += [r['score'] for r in result['predictions']]
predictions = np.array(predictions)
# Push into our pandas dataframe
dataset['Predicted'] = predictions.astype('float32')

In [172]:
dataset

Unnamed: 0,sku_code,material,box_code,bursting_factor,flute_type,inner_height_mm,height_inch,inner_height,length_inch,updated_at,quantity,status,company_id,per_unit,year,Predicted
0,1,1,15,5,1,646.00,3.9,687.00,7.9,1,795,3,0,6.710000,2018,110.302635
1,1,1,15,5,1,646.00,3.9,687.00,7.9,1,12490,3,0,7.100000,2018,95.907799
2,21,0,16,0,2,300.00,11.8,730.00,28.7,1,10,3,0,63.430000,2018,31.560587
3,22,0,0,0,2,300.00,11.8,730.00,28.7,1,2165,3,2,67.000000,2018,31.038671
4,22,0,0,0,2,300.00,11.8,730.00,28.7,1,315,3,2,67.000000,2018,33.315758
5,21,0,16,0,2,300.00,11.8,730.00,28.7,1,825,3,0,67.229947,2018,30.557444
6,5,0,1,1,0,101.14,4.1,306.88,12.2,1,1000,4,0,6.930000,2018,11.918482
7,5,0,1,1,0,101.14,4.1,306.88,12.2,1,1998,3,0,7.340000,2018,9.833929
8,5,0,1,1,0,101.14,4.1,306.88,12.2,1,1000,1,0,6.930000,2018,9.350004
9,5,0,1,1,0,101.14,4.1,306.88,12.2,1,1000,3,0,7.860000,2018,11.062323


In [155]:
dataset

Unnamed: 0,sku_code,material,box_code,bursting_factor,flute_type,inner_height_mm,height_inch,inner_height,length_inch,updated_at,quantity,status,company_id,per_unit,Predicted
0,BZ-SKU-0000027,"""Top paper golden Kraft rest brown Kraft""","""PC1""","""24x22x22x22x22""","""BB""","""646""","""3.9""","""687""","""7.9""",2018-11-01,795,3,4606,6.710000,110.302635
1,BZ-SKU-0000027,"""Top paper golden Kraft rest brown Kraft""","""PC1""","""24x22x22x22x22""","""BB""","""646""","""3.9""","""687""","""7.9""",2018-11-01,12490,3,4606,7.100000,95.907799
2,BZ-SKU-0004768,"""Paper""","""Regular 5P34""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,10,3,4606,63.430000,31.560587
3,BZ-SKU-0005542,"""Paper""","""5P34.2 Regular (3)""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,2165,3,9508,67.000000,31.038671
4,BZ-SKU-0005542,"""Paper""","""5P34.2 Regular (3)""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,315,3,9508,67.000000,33.315758
5,BZ-SKU-0004768,"""Paper""","""Regular 5P34""","""20x18x18x18x18""","""BC""","""300""","""11.8""","""730""","""28.7""",2018-02-07,825,3,4606,67.229947,30.557444
6,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1000,4,4606,6.930000,11.918482
7,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1998,3,4606,7.340000,9.833929
8,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1000,1,4606,6.930000,9.350004
9,BZ-SKU-0000068,"""Paper""","""A1""","""22x22x22""","""B""","""101.14""","""4.1""","""306.88""","""12.2""",2018-10-05,1000,3,4606,7.860000,11.062323
