In [1]:
import time
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import boto3
import sagemaker
from sagemaker import get_execution_role
import pickle
import io
np.random.seed(1)

In [2]:
print("3")

3


In [3]:
context_length=5
prediction_length=5

In [4]:
freq="D"

In [5]:
from sagemaker import get_execution_role
role = get_execution_role()

In [6]:
prefix = "bit-coin-forecast"

sagemaker_session = sagemaker.Session()
role = get_execution_role()
bucket = "bit-coin-data"

s3_data_path = f"{bucket}/{prefix}/data"
s3_output_path = f"{bucket}/{prefix}/output"

In [7]:
import os
def downloadDirectoryFroms3(bucketName, remoteDirectoryName):
    s3_resource = boto3.resource('s3')
    bucket = s3_resource.Bucket(bucketName) 
    for obj in bucket.objects.filter(Prefix = remoteDirectoryName):
        if not os.path.exists(os.path.dirname(obj.key)):
            os.makedirs(os.path.dirname(obj.key))
        bucket.download_file(obj.key, obj.key) # save to same path

In [8]:
downloadDirectoryFroms3("bit-coin-data", "bit-coin-forecast")

In [9]:
with open('bit-coin-forecast/data/output/ground_truth.pkl', 'rb') as file:
    time_series= pickle.load(file)

In [10]:
with open('bit-coin-forecast/data/output/train.pkl', 'rb') as file:
     time_series_training= pickle.load(file)

In [11]:
print(type(time_series[0]))

<class 'pandas.core.series.Series'>


In [12]:
def series_to_obj(ts, cat=None):
    obj = {"start": str(ts.index[0]), "target": list(ts)}
    if cat is not None:
        obj["cat"] = cat
    return obj


def series_to_jsonline(ts, cat=None):
    return json.dumps(series_to_obj(ts, cat))

In [13]:
encoding = "utf-8"
FILE_TRAIN = "train.json"
FILE_TEST = "test.json"
with open(FILE_TRAIN, "wb") as f:
    for ts in time_series_training:
        f.write(series_to_jsonline(ts).encode(encoding))
        f.write("\n".encode(encoding))

with open(FILE_TEST, "wb") as f:
    for ts in time_series:
        f.write(series_to_jsonline(ts).encode(encoding))
        f.write("\n".encode(encoding))
s3 = boto3.client("s3")
s3.upload_file(FILE_TRAIN, bucket, prefix + "/data/train/" + FILE_TRAIN)
s3.upload_file(FILE_TEST, bucket, prefix + "/data/test/" + FILE_TRAIN)

In [14]:
from sagemaker.amazon.amazon_estimator import get_image_uri

image_uri = get_image_uri(boto3.Session().region_name, "forecasting-deepar")

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: 1.


In [15]:
estimator = sagemaker.estimator.Estimator(
    sagemaker_session=sagemaker_session,
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type="ml.c4.xlarge",
    output_path=f"s3://{s3_output_path}",
)

In [16]:
hyperparameters = {
    "time_freq": freq,
    "context_length": str(context_length),
    "prediction_length": str(prediction_length),
    "num_cells": "40",
    "num_layers": "3",
    "likelihood": "gaussian",
    "epochs": "20",
    "mini_batch_size": "32",
    "learning_rate": "0.001",
    "dropout_rate": "0.05",
    "early_stopping_patience": "10",
}

In [17]:
estimator.set_hyperparameters(**hyperparameters)

In [18]:
from time import gmtime, strftime

data_channels = {"train": f"s3://{s3_data_path}/train/", "test": f"s3://{s3_data_path}/test/"}

job_name = f'jumpstart-example-deepar-{strftime("%Y-%m-%d-%H-%M-%S", gmtime())}'
estimator.fit(inputs=data_channels, job_name = job_name)

2022-09-27 14:16:44 Starting - Starting the training job...
2022-09-27 14:17:07 Starting - Preparing the instances for trainingProfilerReport-1664288204: InProgress
.........
2022-09-27 14:18:44 Downloading - Downloading input data...
2022-09-27 14:19:09 Training - Downloading the training image......
2022-09-27 14:20:15 Training - Training image download completed. Training in progress..[34mArguments: train[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
  from collections import Mapping, MutableMapping, Sequence[0m
[34m[09/27/2022 14:20:21 INFO 140081561163584] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'_kvstore': 'auto', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_tuning_objective_metric': '', 'cardinality': 'auto', 'dropout_rate': '0.10', 'early_stopping_patience': '', 'embedding_dimension': '10', 'learning_rate': '0.001', 'likelihood': 'student-t', 'mini_batch_size': '128', 'num_cells': '40'

In [19]:
job_name = estimator.latest_training_job.name

endpoint_name = sagemaker_session.endpoint_from_job(
    job_name=job_name,
    initial_instance_count=1,
    instance_type="ml.m4.xlarge",
    image_uri=image_uri,
    role=role,
)

-------!

In [20]:
endpoint_name

'jumpstart-example-deepar-2022-09-27-14-16-44'

In [21]:
dic={"epn":endpoint_name}

In [22]:
with open('epn.pkl', 'wb') as file:
    pickle.dump(dic, file)

In [23]:
s3.upload_file("epn.pkl", bucket, prefix + "/data/output/" + "epn.pkl")