In [1]:
import sagemaker 
import boto3
from sagemaker import get_execution_role 

role = get_execution_role()
session = sagemaker.Session()
region_name = boto3.Session().region_name

In [2]:
s3_bucket = 'sagemaker-cookbook-bucket'
prefix = 'chapter11'

In [3]:
training_s3_input_location = f"s3://{s3_bucket}/{prefix}/input/training.jsonl"
test_s3_input_location = f"s3://{s3_bucket}/{prefix}/input/test.jsonl"
training_s3_output_location = f"s3://{s3_bucket}/{prefix}/output/"

In [4]:
from sagemaker.inputs import TrainingInput

train = TrainingInput(training_s3_input_location, content_type="json")
test = TrainingInput(test_s3_input_location, content_type="json")

In [8]:
%store -r prediction_length
prediction_length

50

In [10]:
%store -r freq
freq

'H'

In [11]:
context_length = prediction_length

In [12]:
from sagemaker.image_uris import retrieve 

container = retrieve("forecasting-deepar", region_name, "1")
container

'522234722520.dkr.ecr.us-east-1.amazonaws.com/forecasting-deepar:1'

In [13]:
estimator = sagemaker.estimator.Estimator(
    container,
    role, 
    instance_count=1, 
    instance_type='ml.c4.2xlarge',
    output_path=training_s3_output_location,
    sagemaker_session=session)

In [14]:
estimator.set_hyperparameters(
    time_freq=freq,
    context_length=str(context_length),
    prediction_length=str(prediction_length),
    num_cells=40,
    num_layers=3,
    likelihood="gaussian",
    epochs=20,
    mini_batch_size=32,
    learning_rate=0.001,
    dropout_rate=0.05,
    early_stopping_patience=10
)

In [15]:
data_channels = {"train": train, "test": test}

In [16]:
estimator.fit(inputs=data_channels)

2021-03-30 16:04:51 Starting - Starting the training job...
2021-03-30 16:04:55 Starting - Launching requested ML instancesProfilerReport-1617120291: InProgress
.........
2021-03-30 16:06:36 Starting - Preparing the instances for training......
2021-03-30 16:07:44 Downloading - Downloading input data...
2021-03-30 16:08:17 Training - Training image download completed. Training in progress.[34mArguments: train[0m
[34m[03/30/2021 16:08:16 INFO 139972648842624] Reading default configuration from /opt/amazon/lib/python3.6/site-packages/algorithm/resources/default-input.json: {'_kvstore': 'auto', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_tuning_objective_metric': '', 'cardinality': 'auto', 'dropout_rate': '0.10', 'early_stopping_patience': '', 'embedding_dimension': '10', 'learning_rate': '0.001', 'likelihood': 'student-t', 'mini_batch_size': '128', 'num_cells': '40', 'num_dynamic_feat': 'auto', 'num_eval_samples': '100', 'num_layers': '2', 'test_quantiles': '[0.1, 0.2, 0.3, 0.4,

[34m[03/30/2021 16:08:20 INFO 139972648842624] Epoch[2] Batch[0] avg_epoch_loss=7.584526[0m
[34m[03/30/2021 16:08:20 INFO 139972648842624] #quality_metric: host=algo-1, epoch=2, batch=0 train loss <loss>=7.584526062011719[0m
[34m[03/30/2021 16:08:20 INFO 139972648842624] Epoch[2] Batch[5] avg_epoch_loss=7.730430[0m
[34m[03/30/2021 16:08:20 INFO 139972648842624] #quality_metric: host=algo-1, epoch=2, batch=5 train loss <loss>=7.730430285135905[0m
[34m[03/30/2021 16:08:20 INFO 139972648842624] Epoch[2] Batch [5]#011Speed: 331.65 samples/sec#011loss=7.730430[0m
[34m[03/30/2021 16:08:21 INFO 139972648842624] Epoch[2] Batch[10] avg_epoch_loss=7.588910[0m
[34m[03/30/2021 16:08:21 INFO 139972648842624] #quality_metric: host=algo-1, epoch=2, batch=10 train loss <loss>=7.4190857887268065[0m
[34m[03/30/2021 16:08:21 INFO 139972648842624] Epoch[2] Batch [10]#011Speed: 256.11 samples/sec#011loss=7.419086[0m
[34m[03/30/2021 16:08:21 INFO 139972648842624] processed a total of 337 ex

[34m[03/30/2021 16:08:30 INFO 139972648842624] Epoch[10] Batch[5] avg_epoch_loss=6.499100[0m
[34m[03/30/2021 16:08:30 INFO 139972648842624] #quality_metric: host=algo-1, epoch=10, batch=5 train loss <loss>=6.499099572499593[0m
[34m[03/30/2021 16:08:30 INFO 139972648842624] Epoch[10] Batch [5]#011Speed: 354.07 samples/sec#011loss=6.499100[0m
[34m[03/30/2021 16:08:31 INFO 139972648842624] processed a total of 302 examples[0m
[34m#metrics {"StartTime": 1617120510.0263429, "EndTime": 1617120511.1115274, "Dimensions": {"Algorithm": "AWS/DeepAR", "Host": "algo-1", "Operation": "training"}, "Metrics": {"update.time": {"sum": 1085.1130485534668, "count": 1, "min": 1085.1130485534668, "max": 1085.1130485534668}}}
[0m
[34m[03/30/2021 16:08:31 INFO 139972648842624] #throughput_metric: host=algo-1, train throughput=278.27789831479583 records/second[0m
[34m[03/30/2021 16:08:31 INFO 139972648842624] #progress_metric: host=algo-1, completed 55.0 % of epochs[0m
[34m[03/30/2021 16:08:31 


2021-03-30 16:08:52 Uploading - Uploading generated training model
2021-03-30 16:08:52 Completed - Training job completed
[34m[03/30/2021 16:08:40 INFO 139972648842624] processed a total of 309 examples[0m
[34m#metrics {"StartTime": 1617120519.334677, "EndTime": 1617120520.4030344, "Dimensions": {"Algorithm": "AWS/DeepAR", "Host": "algo-1", "Operation": "training"}, "Metrics": {"update.time": {"sum": 1067.5427913665771, "count": 1, "min": 1067.5427913665771, "max": 1067.5427913665771}}}
[0m
[34m[03/30/2021 16:08:40 INFO 139972648842624] #throughput_metric: host=algo-1, train throughput=289.4139472233307 records/second[0m
[34m[03/30/2021 16:08:40 INFO 139972648842624] #progress_metric: host=algo-1, completed 95.0 % of epochs[0m
[34m[03/30/2021 16:08:40 INFO 139972648842624] #quality_metric: host=algo-1, epoch=18, train loss <loss>=6.56499719619751[0m
[34m[03/30/2021 16:08:40 INFO 139972648842624] loss did not improve[0m
[34m[03/30/2021 16:08:40 INFO 139972648842624] Epoch[

In [17]:
estimator.model_data

's3://sagemaker-cookbook-bucket/chapter11/output/forecasting-deepar-2021-03-30-16-04-51-022/output/model.tar.gz'

In [18]:
predictor = estimator.deploy(
    initial_instance_count=1,
    instance_type="ml.m4.xlarge"
)

---------------!

In [19]:
predictor.endpoint_name

'forecasting-deepar-2021-03-30-16-09-46-185'

In [20]:
endpoint_name = predictor.endpoint_name

%store endpoint_name

Stored 'endpoint_name' (str)
