In [1]:
import time
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import datetime


# This code is derived from AWS SageMaker Samples:
# https://github.com/awslabs/amazon-sagemaker-examples/tree/master/introduction_to_amazon_algorithms/deepar_electricity
# https://github.com/awslabs/amazon-sagemaker-examples/tree/master/introduction_to_amazon_algorithms/deepar_synthetic

In [2]:
import boto3
import sagemaker
from sagemaker import get_execution_role

In [3]:
# Set a good base job name when building different models
# It will help in identifying trained models and endpoints
with_categories = False
if with_categories:
    base_job_name = 'deepar-biketrain-with-categories'
else:
    base_job_name = 'deepar-biketrain-no-categories'

In [4]:
# Specify your bucket name
bucket = 'chandra-ml-sagemaker'
prefix = 'deepar/bikerental'

# This structure allows multiple training and test files for model development and testing
if with_categories:
    s3_data_path = "{}/{}/data_with_categories".format(bucket, prefix)
else:
    s3_data_path = "{}/{}/data".format(bucket, prefix)
    

s3_output_path = "{}/{}/output".format(bucket, prefix)

In [5]:
s3_data_path,s3_output_path

('chandra-ml-sagemaker/deepar/bikerental/data',
 'chandra-ml-sagemaker/deepar/bikerental/output')

In [6]:
# File name is referred as key name in S3
# Files stored in S3 are automatically replicated across
# three different availability zones in the region where the bucket was created.
# http://boto3.readthedocs.io/en/latest/guide/s3.html
def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f: # Read in binary mode
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [7]:
# Upload one or more training files and test files to S3
if with_categories:
    write_to_s3('train_with_categories.json',bucket,'deepar/bikerental/data_with_categories/train/train_with_categories.json')
    write_to_s3('test_with_categories.json',bucket,'deepar/bikerental/data_with_categories/test/test_with_categories.json')
else:
    write_to_s3('train.json',bucket,'deepar/bikerental/data/train/train.json')
    write_to_s3('test.json',bucket,'deepar/bikerental/data/test/test.json')

In [8]:
sagemaker_session = sagemaker.Session()
role = get_execution_role()

In [9]:
# We no longer have to maintain a mapping of container images by region
# Simply use the convenience method provided by sagemaker
# https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
from sagemaker.amazon.amazon_estimator import get_image_uri
image_name = get_image_uri(boto3.Session().region_name, 'forecasting-deepar')

In [10]:
image_name

'522234722520.dkr.ecr.us-east-1.amazonaws.com/forecasting-deepar:1'

In [11]:
freq='H' # Timeseries consists Hourly Data and we need to predict hourly rental count

# how far in the future predictions can be made
# 12 days worth of hourly forecast 
prediction_length = 288 

# aws recommends setting context same as prediction length as a starting point. 
# This controls how far in the past the network can see
context_length = 288

In [12]:
# Check Free Tier (if you are still under free-tier)
# At this time, m4.xlarge is offered as part of 2 months free tier
# https://aws.amazon.com/sagemaker/pricing/
# If you are outside of free-tier, you can also use ml.m5.xlarge  (newer generation instance)
# In this example, I am using ml.m5.xlarge for training
estimator = sagemaker.estimator.Estimator(
    sagemaker_session=sagemaker_session,
    image_name=image_name,
    role=role,
    train_instance_count=1,
    train_instance_type='ml.m5.xlarge',
    base_job_name=base_job_name,
    output_path="s3://" + s3_output_path
)

In [13]:
freq, context_length, prediction_length

('H', 288, 288)

In [14]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/deepar_hyperparameters.html
hyperparameters = {
    "time_freq": freq,
    "epochs": "400",
    "early_stopping_patience": "40",
    "mini_batch_size": "64",
    "learning_rate": "5E-4",
    "context_length": str(context_length),
    "prediction_length": str(prediction_length),
    "cardinality" : "auto" if with_categories else ''
}

In [15]:
hyperparameters

{'time_freq': 'H',
 'epochs': '400',
 'early_stopping_patience': '40',
 'mini_batch_size': '64',
 'learning_rate': '5E-4',
 'context_length': '288',
 'prediction_length': '288',
 'cardinality': ''}

In [16]:
estimator.set_hyperparameters(**hyperparameters)

In [17]:
# Here, we are simply referring to train path and test path
# You can have multiple files in each path
# SageMaker will use all the files
data_channels = {
    "train": "s3://{}/train/".format(s3_data_path),
    "test": "s3://{}/test/".format(s3_data_path)
}

In [18]:
data_channels

{'train': 's3://chandra-ml-sagemaker/deepar/bikerental/data/train/',
 'test': 's3://chandra-ml-sagemaker/deepar/bikerental/data/test/'}

In [19]:
# This step takes around 35 minutes to train the model with m4.xlarge instance
estimator.fit(inputs=data_channels)

2020-04-23 22:12:59 Starting - Starting the training job...
2020-04-23 22:13:01 Starting - Launching requested ML instances......
2020-04-23 22:14:05 Starting - Preparing the instances for training......
2020-04-23 22:15:05 Downloading - Downloading input data...
2020-04-23 22:15:52 Training - Training image download completed. Training in progress..[34mArguments: train[0m
[34m[04/23/2020 22:15:55 INFO 139961471010624] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'num_dynamic_feat': u'auto', u'dropout_rate': u'0.10', u'mini_batch_size': u'128', u'test_quantiles': u'[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]', u'_tuning_objective_metric': u'', u'_num_gpus': u'auto', u'num_eval_samples': u'100', u'learning_rate': u'0.001', u'num_cells': u'40', u'num_layers': u'2', u'embedding_dimension': u'10', u'_kvstore': u'auto', u'_num_kv_servers': u'auto', u'cardinality': u'auto', u'likelihood': u'student-t', u'early_stopp

[34m[04/23/2020 22:17:00 INFO 139961471010624] Epoch[2] Batch[0] avg_epoch_loss=3.544548[0m
[34m[04/23/2020 22:17:00 INFO 139961471010624] #quality_metric: host=algo-1, epoch=2, batch=0 train loss <loss>=3.54454803467[0m
[34m[04/23/2020 22:17:05 INFO 139961471010624] Epoch[2] Batch[5] avg_epoch_loss=3.488328[0m
[34m[04/23/2020 22:17:05 INFO 139961471010624] #quality_metric: host=algo-1, epoch=2, batch=5 train loss <loss>=3.48832829793[0m
[34m[04/23/2020 22:17:05 INFO 139961471010624] Epoch[2] Batch [5]#011Speed: 60.72 samples/sec#011loss=3.488328[0m
[34m[04/23/2020 22:17:09 INFO 139961471010624] processed a total of 636 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14779.155015945435, "sum": 14779.155015945435, "min": 14779.155015945435}}, "EndTime": 1587680229.646922, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587680214.867705}
[0m
[34m[04/23/2020 22:17:09 INFO 139961471010624] #throughp

[34m[04/23/2020 22:18:18 INFO 139961471010624] Epoch[7] Batch[0] avg_epoch_loss=3.071953[0m
[34m[04/23/2020 22:18:18 INFO 139961471010624] #quality_metric: host=algo-1, epoch=7, batch=0 train loss <loss>=3.07195281982[0m
[34m[04/23/2020 22:18:23 INFO 139961471010624] Epoch[7] Batch[5] avg_epoch_loss=3.048494[0m
[34m[04/23/2020 22:18:23 INFO 139961471010624] #quality_metric: host=algo-1, epoch=7, batch=5 train loss <loss>=3.0484940211[0m
[34m[04/23/2020 22:18:23 INFO 139961471010624] Epoch[7] Batch [5]#011Speed: 61.60 samples/sec#011loss=3.048494[0m
[34m[04/23/2020 22:18:27 INFO 139961471010624] processed a total of 604 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14693.672895431519, "sum": 14693.672895431519, "min": 14693.672895431519}}, "EndTime": 1587680307.504144, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587680292.810413}
[0m
[34m[04/23/2020 22:18:27 INFO 139961471010624] #throughpu

[34m[04/23/2020 22:19:38 INFO 139961471010624] Epoch[12] Batch[5] avg_epoch_loss=2.906964[0m
[34m[04/23/2020 22:19:38 INFO 139961471010624] #quality_metric: host=algo-1, epoch=12, batch=5 train loss <loss>=2.90696430206[0m
[34m[04/23/2020 22:19:38 INFO 139961471010624] Epoch[12] Batch [5]#011Speed: 60.95 samples/sec#011loss=2.906964[0m
[34m[04/23/2020 22:19:43 INFO 139961471010624] processed a total of 625 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14785.629987716675, "sum": 14785.629987716675, "min": 14785.629987716675}}, "EndTime": 1587680383.122089, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587680368.336399}
[0m
[34m[04/23/2020 22:19:43 INFO 139961471010624] #throughput_metric: host=algo-1, train throughput=42.2703382143 records/second[0m
[34m[04/23/2020 22:19:43 INFO 139961471010624] #progress_metric: host=algo-1, completed 3 % of epochs[0m
[34m[04/23/2020 22:19:43 INFO 1399614710

[34m[04/23/2020 22:20:53 INFO 139961471010624] Epoch[17] Batch[5] avg_epoch_loss=2.853821[0m
[34m[04/23/2020 22:20:53 INFO 139961471010624] #quality_metric: host=algo-1, epoch=17, batch=5 train loss <loss>=2.85382080078[0m
[34m[04/23/2020 22:20:53 INFO 139961471010624] Epoch[17] Batch [5]#011Speed: 61.79 samples/sec#011loss=2.853821[0m
[34m[04/23/2020 22:20:57 INFO 139961471010624] processed a total of 616 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14636.117935180664, "sum": 14636.117935180664, "min": 14636.117935180664}}, "EndTime": 1587680457.29685, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587680442.660674}
[0m
[34m[04/23/2020 22:20:57 INFO 139961471010624] #throughput_metric: host=algo-1, train throughput=42.0873894974 records/second[0m
[34m[04/23/2020 22:20:57 INFO 139961471010624] #progress_metric: host=algo-1, completed 4 % of epochs[0m
[34m[04/23/2020 22:20:57 INFO 13996147101

[34m[04/23/2020 22:22:00 INFO 139961471010624] Epoch[21] Batch[10] avg_epoch_loss=2.829593[0m
[34m[04/23/2020 22:22:00 INFO 139961471010624] #quality_metric: host=algo-1, epoch=21, batch=10 train loss <loss>=2.81509947777[0m
[34m[04/23/2020 22:22:00 INFO 139961471010624] Epoch[21] Batch [10]#011Speed: 60.90 samples/sec#011loss=2.815099[0m
[34m[04/23/2020 22:22:00 INFO 139961471010624] processed a total of 653 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 15815.82498550415, "sum": 15815.82498550415, "min": 15815.82498550415}}, "EndTime": 1587680520.541405, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587680504.725526}
[0m
[34m[04/23/2020 22:22:00 INFO 139961471010624] #throughput_metric: host=algo-1, train throughput=41.2874110865 records/second[0m
[34m[04/23/2020 22:22:00 INFO 139961471010624] #progress_metric: host=algo-1, completed 5 % of epochs[0m
[34m[04/23/2020 22:22:00 INFO 1399614710

[34m[04/23/2020 22:23:22 INFO 139961471010624] Epoch[27] Batch[0] avg_epoch_loss=2.725409[0m
[34m[04/23/2020 22:23:22 INFO 139961471010624] #quality_metric: host=algo-1, epoch=27, batch=0 train loss <loss>=2.72540926933[0m
[34m[04/23/2020 22:23:27 INFO 139961471010624] Epoch[27] Batch[5] avg_epoch_loss=2.717329[0m
[34m[04/23/2020 22:23:27 INFO 139961471010624] #quality_metric: host=algo-1, epoch=27, batch=5 train loss <loss>=2.71732902527[0m
[34m[04/23/2020 22:23:27 INFO 139961471010624] Epoch[27] Batch [5]#011Speed: 61.34 samples/sec#011loss=2.717329[0m
[34m[04/23/2020 22:23:31 INFO 139961471010624] processed a total of 626 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14726.186990737915, "sum": 14726.186990737915, "min": 14726.186990737915}}, "EndTime": 1587680611.579872, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587680596.853336}
[0m
[34m[04/23/2020 22:23:31 INFO 139961471010624] #thr

[34m[04/23/2020 22:24:53 INFO 139961471010624] Epoch[33] Batch[0] avg_epoch_loss=2.544800[0m
[34m[04/23/2020 22:24:53 INFO 139961471010624] #quality_metric: host=algo-1, epoch=33, batch=0 train loss <loss>=2.54480004311[0m
[34m[04/23/2020 22:24:58 INFO 139961471010624] Epoch[33] Batch[5] avg_epoch_loss=2.637818[0m
[34m[04/23/2020 22:24:58 INFO 139961471010624] #quality_metric: host=algo-1, epoch=33, batch=5 train loss <loss>=2.63781793912[0m
[34m[04/23/2020 22:24:58 INFO 139961471010624] Epoch[33] Batch [5]#011Speed: 61.39 samples/sec#011loss=2.637818[0m
[34m[04/23/2020 22:25:04 INFO 139961471010624] Epoch[33] Batch[10] avg_epoch_loss=2.645751[0m
[34m[04/23/2020 22:25:04 INFO 139961471010624] #quality_metric: host=algo-1, epoch=33, batch=10 train loss <loss>=2.65526957512[0m
[34m[04/23/2020 22:25:04 INFO 139961471010624] Epoch[33] Batch [10]#011Speed: 60.55 samples/sec#011loss=2.655270[0m
[34m[04/23/2020 22:25:04 INFO 139961471010624] processed a total of 683 examples

[34m[04/23/2020 22:26:11 INFO 139961471010624] Epoch[38] Batch[0] avg_epoch_loss=2.733540[0m
[34m[04/23/2020 22:26:11 INFO 139961471010624] #quality_metric: host=algo-1, epoch=38, batch=0 train loss <loss>=2.73354029655[0m
[34m[04/23/2020 22:26:16 INFO 139961471010624] Epoch[38] Batch[5] avg_epoch_loss=2.666913[0m
[34m[04/23/2020 22:26:16 INFO 139961471010624] #quality_metric: host=algo-1, epoch=38, batch=5 train loss <loss>=2.6669126749[0m
[34m[04/23/2020 22:26:16 INFO 139961471010624] Epoch[38] Batch [5]#011Speed: 61.19 samples/sec#011loss=2.666913[0m
[34m[04/23/2020 22:26:22 INFO 139961471010624] Epoch[38] Batch[10] avg_epoch_loss=2.702325[0m
[34m[04/23/2020 22:26:22 INFO 139961471010624] #quality_metric: host=algo-1, epoch=38, batch=10 train loss <loss>=2.74482073784[0m
[34m[04/23/2020 22:26:22 INFO 139961471010624] Epoch[38] Batch [10]#011Speed: 60.65 samples/sec#011loss=2.744821[0m
[34m[04/23/2020 22:26:22 INFO 139961471010624] processed a total of 643 examples[

[34m[04/23/2020 22:27:44 INFO 139961471010624] Epoch[44] Batch[0] avg_epoch_loss=2.737001[0m
[34m[04/23/2020 22:27:44 INFO 139961471010624] #quality_metric: host=algo-1, epoch=44, batch=0 train loss <loss>=2.73700118065[0m
[34m[04/23/2020 22:27:49 INFO 139961471010624] Epoch[44] Batch[5] avg_epoch_loss=2.636045[0m
[34m[04/23/2020 22:27:49 INFO 139961471010624] #quality_metric: host=algo-1, epoch=44, batch=5 train loss <loss>=2.63604545593[0m
[34m[04/23/2020 22:27:49 INFO 139961471010624] Epoch[44] Batch [5]#011Speed: 61.15 samples/sec#011loss=2.636045[0m
[34m[04/23/2020 22:27:55 INFO 139961471010624] Epoch[44] Batch[10] avg_epoch_loss=2.576937[0m
[34m[04/23/2020 22:27:55 INFO 139961471010624] #quality_metric: host=algo-1, epoch=44, batch=10 train loss <loss>=2.50600771904[0m
[34m[04/23/2020 22:27:55 INFO 139961471010624] Epoch[44] Batch [10]#011Speed: 60.96 samples/sec#011loss=2.506008[0m
[34m[04/23/2020 22:27:55 INFO 139961471010624] processed a total of 658 examples

[34m[04/23/2020 22:29:15 INFO 139961471010624] Epoch[50] Batch[0] avg_epoch_loss=2.571643[0m
[34m[04/23/2020 22:29:15 INFO 139961471010624] #quality_metric: host=algo-1, epoch=50, batch=0 train loss <loss>=2.57164263725[0m
[34m[04/23/2020 22:29:20 INFO 139961471010624] Epoch[50] Batch[5] avg_epoch_loss=2.649390[0m
[34m[04/23/2020 22:29:20 INFO 139961471010624] #quality_metric: host=algo-1, epoch=50, batch=5 train loss <loss>=2.64938950539[0m
[34m[04/23/2020 22:29:20 INFO 139961471010624] Epoch[50] Batch [5]#011Speed: 61.82 samples/sec#011loss=2.649390[0m
[34m[04/23/2020 22:29:26 INFO 139961471010624] Epoch[50] Batch[10] avg_epoch_loss=2.657359[0m
[34m[04/23/2020 22:29:26 INFO 139961471010624] #quality_metric: host=algo-1, epoch=50, batch=10 train loss <loss>=2.66692142487[0m
[34m[04/23/2020 22:29:26 INFO 139961471010624] Epoch[50] Batch [10]#011Speed: 60.88 samples/sec#011loss=2.666921[0m
[34m[04/23/2020 22:29:26 INFO 139961471010624] processed a total of 668 examples

[34m[04/23/2020 22:30:47 INFO 139961471010624] Epoch[56] Batch[0] avg_epoch_loss=2.640340[0m
[34m[04/23/2020 22:30:47 INFO 139961471010624] #quality_metric: host=algo-1, epoch=56, batch=0 train loss <loss>=2.64033961296[0m
[34m[04/23/2020 22:30:52 INFO 139961471010624] Epoch[56] Batch[5] avg_epoch_loss=2.608908[0m
[34m[04/23/2020 22:30:52 INFO 139961471010624] #quality_metric: host=algo-1, epoch=56, batch=5 train loss <loss>=2.60890809695[0m
[34m[04/23/2020 22:30:52 INFO 139961471010624] Epoch[56] Batch [5]#011Speed: 61.93 samples/sec#011loss=2.608908[0m
[34m[04/23/2020 22:30:56 INFO 139961471010624] processed a total of 611 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14615.293979644775, "sum": 14615.293979644775, "min": 14615.293979644775}}, "EndTime": 1587681056.588921, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587681041.973091}
[0m
[34m[04/23/2020 22:30:56 INFO 139961471010624] #thr

[34m[04/23/2020 22:32:08 INFO 139961471010624] Epoch[61] Batch[5] avg_epoch_loss=2.612033[0m
[34m[04/23/2020 22:32:08 INFO 139961471010624] #quality_metric: host=algo-1, epoch=61, batch=5 train loss <loss>=2.61203320821[0m
[34m[04/23/2020 22:32:08 INFO 139961471010624] Epoch[61] Batch [5]#011Speed: 61.47 samples/sec#011loss=2.612033[0m
[34m[04/23/2020 22:32:12 INFO 139961471010624] processed a total of 620 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14738.193988800049, "sum": 14738.193988800049, "min": 14738.193988800049}}, "EndTime": 1587681132.648081, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587681117.909355}
[0m
[34m[04/23/2020 22:32:12 INFO 139961471010624] #throughput_metric: host=algo-1, train throughput=42.0664793342 records/second[0m
[34m[04/23/2020 22:32:12 INFO 139961471010624] #progress_metric: host=algo-1, completed 15 % of epochs[0m
[34m[04/23/2020 22:32:12 INFO 139961471

[34m[04/23/2020 22:33:19 INFO 139961471010624] Epoch[66] Batch[0] avg_epoch_loss=2.580717[0m
[34m[04/23/2020 22:33:19 INFO 139961471010624] #quality_metric: host=algo-1, epoch=66, batch=0 train loss <loss>=2.58071660995[0m
[34m[04/23/2020 22:33:24 INFO 139961471010624] Epoch[66] Batch[5] avg_epoch_loss=2.566659[0m
[34m[04/23/2020 22:33:24 INFO 139961471010624] #quality_metric: host=algo-1, epoch=66, batch=5 train loss <loss>=2.5666590929[0m
[34m[04/23/2020 22:33:24 INFO 139961471010624] Epoch[66] Batch [5]#011Speed: 61.88 samples/sec#011loss=2.566659[0m
[34m[04/23/2020 22:33:28 INFO 139961471010624] processed a total of 634 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14680.252075195312, "sum": 14680.252075195312, "min": 14680.252075195312}}, "EndTime": 1587681208.703015, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587681194.0227}
[0m
[34m[04/23/2020 22:33:28 INFO 139961471010624] #throug

[34m[04/23/2020 22:34:48 INFO 139961471010624] Epoch[72] Batch[0] avg_epoch_loss=2.656508[0m
[34m[04/23/2020 22:34:48 INFO 139961471010624] #quality_metric: host=algo-1, epoch=72, batch=0 train loss <loss>=2.65650844574[0m
[34m[04/23/2020 22:34:53 INFO 139961471010624] Epoch[72] Batch[5] avg_epoch_loss=2.557988[0m
[34m[04/23/2020 22:34:53 INFO 139961471010624] #quality_metric: host=algo-1, epoch=72, batch=5 train loss <loss>=2.55798840523[0m
[34m[04/23/2020 22:34:53 INFO 139961471010624] Epoch[72] Batch [5]#011Speed: 61.45 samples/sec#011loss=2.557988[0m
[34m[04/23/2020 22:34:57 INFO 139961471010624] processed a total of 597 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14669.08597946167, "sum": 14669.08597946167, "min": 14669.08597946167}}, "EndTime": 1587681297.832421, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587681283.162891}
[0m
[34m[04/23/2020 22:34:57 INFO 139961471010624] #throug

[34m[04/23/2020 22:36:20 INFO 139961471010624] Epoch[78] Batch[0] avg_epoch_loss=2.512269[0m
[34m[04/23/2020 22:36:20 INFO 139961471010624] #quality_metric: host=algo-1, epoch=78, batch=0 train loss <loss>=2.51226949692[0m
[34m[04/23/2020 22:36:26 INFO 139961471010624] Epoch[78] Batch[5] avg_epoch_loss=2.513519[0m
[34m[04/23/2020 22:36:26 INFO 139961471010624] #quality_metric: host=algo-1, epoch=78, batch=5 train loss <loss>=2.51351877054[0m
[34m[04/23/2020 22:36:26 INFO 139961471010624] Epoch[78] Batch [5]#011Speed: 61.94 samples/sec#011loss=2.513519[0m
[34m[04/23/2020 22:36:31 INFO 139961471010624] Epoch[78] Batch[10] avg_epoch_loss=2.500819[0m
[34m[04/23/2020 22:36:31 INFO 139961471010624] #quality_metric: host=algo-1, epoch=78, batch=10 train loss <loss>=2.48558020592[0m
[34m[04/23/2020 22:36:31 INFO 139961471010624] Epoch[78] Batch [10]#011Speed: 61.07 samples/sec#011loss=2.485580[0m
[34m[04/23/2020 22:36:31 INFO 139961471010624] processed a total of 678 examples

[34m[04/23/2020 22:37:43 INFO 139961471010624] Epoch[83] Batch[5] avg_epoch_loss=2.473511[0m
[34m[04/23/2020 22:37:43 INFO 139961471010624] #quality_metric: host=algo-1, epoch=83, batch=5 train loss <loss>=2.47351090113[0m
[34m[04/23/2020 22:37:43 INFO 139961471010624] Epoch[83] Batch [5]#011Speed: 60.87 samples/sec#011loss=2.473511[0m
[34m[04/23/2020 22:37:47 INFO 139961471010624] processed a total of 616 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14681.957006454468, "sum": 14681.957006454468, "min": 14681.957006454468}}, "EndTime": 1587681467.978164, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587681453.29593}
[0m
[34m[04/23/2020 22:37:47 INFO 139961471010624] #throughput_metric: host=algo-1, train throughput=41.9559823308 records/second[0m
[34m[04/23/2020 22:37:47 INFO 139961471010624] #progress_metric: host=algo-1, completed 21 % of epochs[0m
[34m[04/23/2020 22:37:47 INFO 1399614710

[34m[04/23/2020 22:39:08 INFO 139961471010624] Epoch[89] Batch[0] avg_epoch_loss=2.555187[0m
[34m[04/23/2020 22:39:08 INFO 139961471010624] #quality_metric: host=algo-1, epoch=89, batch=0 train loss <loss>=2.55518651009[0m
[34m[04/23/2020 22:39:14 INFO 139961471010624] Epoch[89] Batch[5] avg_epoch_loss=2.588220[0m
[34m[04/23/2020 22:39:14 INFO 139961471010624] #quality_metric: host=algo-1, epoch=89, batch=5 train loss <loss>=2.58821956317[0m
[34m[04/23/2020 22:39:14 INFO 139961471010624] Epoch[89] Batch [5]#011Speed: 60.78 samples/sec#011loss=2.588220[0m
[34m[04/23/2020 22:39:18 INFO 139961471010624] processed a total of 622 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14730.572938919067, "sum": 14730.572938919067, "min": 14730.572938919067}}, "EndTime": 1587681558.190585, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587681543.459619}
[0m
[34m[04/23/2020 22:39:18 INFO 139961471010624] #thr

[34m[04/23/2020 22:40:37 INFO 139961471010624] Epoch[95] Batch[0] avg_epoch_loss=2.542497[0m
[34m[04/23/2020 22:40:37 INFO 139961471010624] #quality_metric: host=algo-1, epoch=95, batch=0 train loss <loss>=2.54249739647[0m
[34m[04/23/2020 22:40:42 INFO 139961471010624] Epoch[95] Batch[5] avg_epoch_loss=2.551905[0m
[34m[04/23/2020 22:40:42 INFO 139961471010624] #quality_metric: host=algo-1, epoch=95, batch=5 train loss <loss>=2.55190459887[0m
[34m[04/23/2020 22:40:42 INFO 139961471010624] Epoch[95] Batch [5]#011Speed: 62.39 samples/sec#011loss=2.551905[0m
[34m[04/23/2020 22:40:48 INFO 139961471010624] Epoch[95] Batch[10] avg_epoch_loss=2.478803[0m
[34m[04/23/2020 22:40:48 INFO 139961471010624] #quality_metric: host=algo-1, epoch=95, batch=10 train loss <loss>=2.39108073711[0m
[34m[04/23/2020 22:40:48 INFO 139961471010624] Epoch[95] Batch [10]#011Speed: 61.37 samples/sec#011loss=2.391081[0m
[34m[04/23/2020 22:40:48 INFO 139961471010624] processed a total of 643 examples

[34m[04/23/2020 22:41:54 INFO 139961471010624] Epoch[100] Batch[0] avg_epoch_loss=2.611029[0m
[34m[04/23/2020 22:41:54 INFO 139961471010624] #quality_metric: host=algo-1, epoch=100, batch=0 train loss <loss>=2.61102938652[0m
[34m[04/23/2020 22:41:59 INFO 139961471010624] Epoch[100] Batch[5] avg_epoch_loss=2.508635[0m
[34m[04/23/2020 22:41:59 INFO 139961471010624] #quality_metric: host=algo-1, epoch=100, batch=5 train loss <loss>=2.50863476594[0m
[34m[04/23/2020 22:41:59 INFO 139961471010624] Epoch[100] Batch [5]#011Speed: 61.55 samples/sec#011loss=2.508635[0m
[34m[04/23/2020 22:42:03 INFO 139961471010624] processed a total of 627 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14699.090003967285, "sum": 14699.090003967285, "min": 14699.090003967285}}, "EndTime": 1587681723.777265, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587681709.077904}
[0m
[34m[04/23/2020 22:42:03 INFO 139961471010624]

[34m[04/23/2020 22:43:11 INFO 139961471010624] Epoch[105] Batch[0] avg_epoch_loss=2.497335[0m
[34m[04/23/2020 22:43:11 INFO 139961471010624] #quality_metric: host=algo-1, epoch=105, batch=0 train loss <loss>=2.49733495712[0m
[34m[04/23/2020 22:43:16 INFO 139961471010624] Epoch[105] Batch[5] avg_epoch_loss=2.506161[0m
[34m[04/23/2020 22:43:16 INFO 139961471010624] #quality_metric: host=algo-1, epoch=105, batch=5 train loss <loss>=2.50616061687[0m
[34m[04/23/2020 22:43:16 INFO 139961471010624] Epoch[105] Batch [5]#011Speed: 61.51 samples/sec#011loss=2.506161[0m
[34m[04/23/2020 22:43:20 INFO 139961471010624] processed a total of 638 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14673.171043395996, "sum": 14673.171043395996, "min": 14673.171043395996}}, "EndTime": 1587681800.844917, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587681786.171305}
[0m
[34m[04/23/2020 22:43:20 INFO 139961471010624]

[34m[04/23/2020 22:44:37 INFO 139961471010624] Epoch[110] Batch[10] avg_epoch_loss=2.563106[0m
[34m[04/23/2020 22:44:37 INFO 139961471010624] #quality_metric: host=algo-1, epoch=110, batch=10 train loss <loss>=2.59839992523[0m
[34m[04/23/2020 22:44:37 INFO 139961471010624] Epoch[110] Batch [10]#011Speed: 61.37 samples/sec#011loss=2.598400[0m
[34m[04/23/2020 22:44:37 INFO 139961471010624] processed a total of 685 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 15698.955059051514, "sum": 15698.955059051514, "min": 15698.955059051514}}, "EndTime": 1587681877.36943, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587681861.669889}
[0m
[34m[04/23/2020 22:44:37 INFO 139961471010624] #throughput_metric: host=algo-1, train throughput=43.6332275303 records/second[0m
[34m[04/23/2020 22:44:37 INFO 139961471010624] #progress_metric: host=algo-1, completed 27 % of epochs[0m
[34m[04/23/2020 22:44:37 INFO 1399

[34m[04/23/2020 22:45:57 INFO 139961471010624] Epoch[116] Batch[0] avg_epoch_loss=2.591634[0m
[34m[04/23/2020 22:45:57 INFO 139961471010624] #quality_metric: host=algo-1, epoch=116, batch=0 train loss <loss>=2.59163379669[0m
[34m[04/23/2020 22:46:03 INFO 139961471010624] Epoch[116] Batch[5] avg_epoch_loss=2.531063[0m
[34m[04/23/2020 22:46:03 INFO 139961471010624] #quality_metric: host=algo-1, epoch=116, batch=5 train loss <loss>=2.53106300036[0m
[34m[04/23/2020 22:46:03 INFO 139961471010624] Epoch[116] Batch [5]#011Speed: 61.77 samples/sec#011loss=2.531063[0m
[34m[04/23/2020 22:46:08 INFO 139961471010624] Epoch[116] Batch[10] avg_epoch_loss=2.497310[0m
[34m[04/23/2020 22:46:08 INFO 139961471010624] #quality_metric: host=algo-1, epoch=116, batch=10 train loss <loss>=2.45680689812[0m
[34m[04/23/2020 22:46:08 INFO 139961471010624] Epoch[116] Batch [10]#011Speed: 61.49 samples/sec#011loss=2.456807[0m
[34m[04/23/2020 22:46:08 INFO 139961471010624] processed a total of 654 e

[34m[04/23/2020 22:47:28 INFO 139961471010624] Epoch[122] Batch[0] avg_epoch_loss=2.497370[0m
[34m[04/23/2020 22:47:28 INFO 139961471010624] #quality_metric: host=algo-1, epoch=122, batch=0 train loss <loss>=2.49737024307[0m
[34m[04/23/2020 22:47:33 INFO 139961471010624] Epoch[122] Batch[5] avg_epoch_loss=2.482946[0m
[34m[04/23/2020 22:47:33 INFO 139961471010624] #quality_metric: host=algo-1, epoch=122, batch=5 train loss <loss>=2.48294568062[0m
[34m[04/23/2020 22:47:33 INFO 139961471010624] Epoch[122] Batch [5]#011Speed: 61.48 samples/sec#011loss=2.482946[0m
[34m[04/23/2020 22:47:37 INFO 139961471010624] processed a total of 626 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14689.244985580444, "sum": 14689.244985580444, "min": 14689.244985580444}}, "EndTime": 1587682057.578372, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587682042.888689}
[0m
[34m[04/23/2020 22:47:37 INFO 139961471010624]

[34m[04/23/2020 22:49:01 INFO 139961471010624] Epoch[128] Batch[5] avg_epoch_loss=2.487455[0m
[34m[04/23/2020 22:49:01 INFO 139961471010624] #quality_metric: host=algo-1, epoch=128, batch=5 train loss <loss>=2.48745524883[0m
[34m[04/23/2020 22:49:01 INFO 139961471010624] Epoch[128] Batch [5]#011Speed: 61.94 samples/sec#011loss=2.487455[0m
[34m[04/23/2020 22:49:05 INFO 139961471010624] processed a total of 622 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14582.728147506714, "sum": 14582.728147506714, "min": 14582.728147506714}}, "EndTime": 1587682145.665682, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587682131.082578}
[0m
[34m[04/23/2020 22:49:05 INFO 139961471010624] #throughput_metric: host=algo-1, train throughput=42.6529294366 records/second[0m
[34m[04/23/2020 22:49:05 INFO 139961471010624] #progress_metric: host=algo-1, completed 32 % of epochs[0m
[34m[04/23/2020 22:49:05 INFO 139961

[34m[04/23/2020 22:50:25 INFO 139961471010624] Epoch[134] Batch[0] avg_epoch_loss=2.514423[0m
[34m[04/23/2020 22:50:25 INFO 139961471010624] #quality_metric: host=algo-1, epoch=134, batch=0 train loss <loss>=2.51442265511[0m
[34m[04/23/2020 22:50:30 INFO 139961471010624] Epoch[134] Batch[5] avg_epoch_loss=2.470279[0m
[34m[04/23/2020 22:50:30 INFO 139961471010624] #quality_metric: host=algo-1, epoch=134, batch=5 train loss <loss>=2.47027870019[0m
[34m[04/23/2020 22:50:30 INFO 139961471010624] Epoch[134] Batch [5]#011Speed: 62.27 samples/sec#011loss=2.470279[0m
[34m[04/23/2020 22:50:34 INFO 139961471010624] processed a total of 595 examples[0m
[34m#metrics {"Metrics": {"update.time": {"count": 1, "max": 14516.695022583008, "sum": 14516.695022583008, "min": 14516.695022583008}}, "EndTime": 1587682234.761521, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587682220.244391}
[0m
[34m[04/23/2020 22:50:34 INFO 139961471010624]

[34m[04/23/2020 22:51:55 INFO 139961471010624] Epoch[140] Batch[0] avg_epoch_loss=2.538957[0m
[34m[04/23/2020 22:51:55 INFO 139961471010624] #quality_metric: host=algo-1, epoch=140, batch=0 train loss <loss>=2.53895688057[0m
[34m[04/23/2020 22:52:00 INFO 139961471010624] Epoch[140] Batch[5] avg_epoch_loss=2.526720[0m
[34m[04/23/2020 22:52:00 INFO 139961471010624] #quality_metric: host=algo-1, epoch=140, batch=5 train loss <loss>=2.52672028542[0m
[34m[04/23/2020 22:52:00 INFO 139961471010624] Epoch[140] Batch [5]#011Speed: 62.00 samples/sec#011loss=2.526720[0m
[34m[04/23/2020 22:52:05 INFO 139961471010624] Epoch[140] Batch[10] avg_epoch_loss=2.544873[0m
[34m[04/23/2020 22:52:05 INFO 139961471010624] #quality_metric: host=algo-1, epoch=140, batch=10 train loss <loss>=2.56665525436[0m
[34m[04/23/2020 22:52:05 INFO 139961471010624] Epoch[140] Batch [10]#011Speed: 61.45 samples/sec#011loss=2.566655[0m
[34m[04/23/2020 22:52:05 INFO 139961471010624] processed a total of 644 e


2020-04-23 22:53:45 Uploading - Uploading generated training model[34m#metrics {"Metrics": {"model.score.time": {"count": 1, "max": 16841.02702140808, "sum": 16841.02702140808, "min": 16841.02702140808}}, "EndTime": 1587682420.673546, "Dimensions": {"Host": "algo-1", "Operation": "training", "Algorithm": "AWS/DeepAR"}, "StartTime": 1587682403.832611}
[0m
[34m[04/23/2020 22:53:40 INFO 139961471010624] #test_score (algo-1, RMSE): 40.1434449833[0m
[34m[04/23/2020 22:53:40 INFO 139961471010624] #test_score (algo-1, mean_absolute_QuantileLoss): 16355.638567985097[0m
[34m[04/23/2020 22:53:40 INFO 139961471010624] #test_score (algo-1, mean_wQuantileLoss): 0.13587803080489402[0m
[34m[04/23/2020 22:53:40 INFO 139961471010624] #test_score (algo-1, wQuantileLoss[0.1]): 0.07697326526651156[0m
[34m[04/23/2020 22:53:40 INFO 139961471010624] #test_score (algo-1, wQuantileLoss[0.2]): 0.12001783375002434[0m
[34m[04/23/2020 22:53:40 INFO 139961471010624] #test_score (algo-1, wQuantileLoss[

In [20]:
job_name = estimator.latest_training_job.name

In [21]:
# Hard code name for now as we stopped the notebook.  
# If you do this in a single sitting, you don't need to hard code
# job_name = 'deepar-biketrain-with-categories-2018-12-21-04-05-44-478'

In [22]:
print ('job name: {0}'.format(job_name))

job name: deepar-biketrain-no-categories-2020-04-23-22-12-59-148


In [23]:
# Create an endpoint for real-time predictions
endpoint_name = sagemaker_session.endpoint_from_job(
    job_name=job_name,
    initial_instance_count=1,
    instance_type='ml.m4.xlarge',
    deployment_image=image_name,
    role=role
)

-----------------!

In [24]:
print ('endpoint name: {0}'.format(endpoint_name))

endpoint name: deepar-biketrain-no-categories-2020-04-23-22-12-59-148


In [25]:
# Don't forget to terminate the end point after completing the demo
# Otherwise, you account will accumulate hourly charges

# you can delete from sagemaker management console or through command line or throught code

# sagemaker_session.delete_endpoint(endpoint_name)