In [None]:
import time
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import datetime

In [None]:
import boto3
import sagemaker
from sagemaker import get_execution_role

In [None]:
with_categories = False
# Set a good base job name when building different models
# It will help in identifying trained models and endpoints
base_job_name = 'deepar-biketrain-with-dynamic-feat'

In [None]:
bucket = 'chandra-ml-sagemaker'
prefix = 'deepar/bikerental'

# This structure allows multiple training and test files for model development and testing
s3_data_path = "{}/{}/data_dynamic".format(bucket, prefix)
s3_output_path = "{}/{}/output".format(bucket, prefix)

In [None]:
s3_data_path,s3_output_path

In [None]:
# File name is referred as key name in S3
# Files stored in S3 are automatically replicated across
# three different availability zones in the region where the bucket was created.
# http://boto3.readthedocs.io/en/latest/guide/s3.html
def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f: # Read in binary mode
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [None]:
# Upload one or more training files and test files to S3
write_to_s3('train_dynamic_feat.json',bucket,'deepar/bikerental/data_dynamic/train/train_dynamic_feat.json')
write_to_s3('test_dynamic_feat.json',bucket,'deepar/bikerental/data_dynamic/test/test_dynamic_feat.json')

In [None]:
sagemaker_session = sagemaker.Session()
role = get_execution_role()

In [None]:
# We no longer have to maintain a mapping of container images by region
# Simply use the convenience method provided by sagemaker
# https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
from sagemaker.amazon.amazon_estimator import get_image_uri
image_name = get_image_uri(boto3.Session().region_name, 'forecasting-deepar')

In [None]:
image_name

In [None]:
freq='H' # Timeseries consists Hourly Data and we need to predict hourly rental count

# how far in the future predictions can be made
# 12 days worth of hourly forecast 
prediction_length = 288 

# aws recommends setting context same as prediction length as a starting point. 
# This controls how far in the past the network can see
context_length = 288

In [None]:
# Check Free Tier (if you are still under free-tier)
# At this time, m4.xlarge is offered as part of 2 months free tier
# https://aws.amazon.com/sagemaker/pricing/
# If you are outside of free-tier, you can also use ml.m5.xlarge  (newer generation instance)
# In this example, I am using ml.m5.xlarge for training

# Dynamic Feat - Using a large instance ml.c5.4xlarge = 16 CPU, 32 GB
# 'ml.c4.xlarge' -> 'ml.c5.4xlarge'. out of memory error with c4.xlarge
estimator = sagemaker.estimator.Estimator(
    sagemaker_session=sagemaker_session,
    image_name=image_name,
    role=role,
    train_instance_count=1,
    train_instance_type='ml.c5.4xlarge',
    base_job_name=base_job_name,
    output_path="s3://" + s3_output_path
)

In [None]:
freq, context_length, prediction_length

In [None]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/deepar_hyperparameters.html
hyperparameters = {
    "time_freq": freq,
    "epochs": "400",
    "early_stopping_patience": "40",
    "mini_batch_size": "64",
    "learning_rate": "5E-4",
    "context_length": str(context_length),
    "prediction_length": str(prediction_length),
    "cardinality" : "auto" if with_categories else ''
}

In [None]:
hyperparameters

In [None]:
estimator.set_hyperparameters(**hyperparameters)

In [None]:
# Here, we are simply referring to train path and test path
# You can have multiple files in each path
# SageMaker will use all the files
data_channels = {
    "train": "s3://{}/train/".format(s3_data_path),
    "test": "s3://{}/test/".format(s3_data_path)
}

In [None]:
data_channels

In [None]:
# This step takes around 35 minutes to train the model with m4.xlarge instance
estimator.fit(inputs=data_channels)

In [None]:
job_name = estimator.latest_training_job.name

In [None]:
job_name = 'deepar-biketrain-with-dynamic-feat-2018-12-17-17-54-02-175'

In [None]:
print ('job name: {0}'.format(job_name))

In [None]:
# Create an endpoint for real-time predictions
endpoint_name = sagemaker_session.endpoint_from_job(
    job_name=job_name,
    initial_instance_count=1,
    instance_type='ml.m4.xlarge',
    deployment_image=image_name,
    role=role
)

In [None]:
print ('endpoint name: {0}'.format(endpoint_name))

In [None]:
# Don't forget to terminate the end point after completing the demo
# Otherwise, you account will accumulate hourly charges

# you can delete from sagemaker management console or through command line or throught code

#sagemaker_session.delete_endpoint(endpoint_name)