In [None]:
#
# Initialization
#

import os
import sys

import ipynbname
from pathlib import Path

# Set notebook's src module path. Note that you may have to update your IDE's project settings to do the same for the
#  local library imports to work the same
MODULE_PATH = ipynbname.path().parent.parent
sys.path.append(str(MODULE_PATH))

# Keep paths consistent throughout notebook
os.chdir(MODULE_PATH)

# This should always be `./src`
print(f"Current working directory [{os.getcwd()}]")

# Place all local artifacts in a disposable, git-ignored directory
local_artifact_dir = Path(os.getcwd()).parent / "out"
local_artifact_dir.mkdir(parents=True, exist_ok=True)

# Autoreload imports at the beginning of cell execution.
#  https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [None]:
#
# Setup utils
#

import subprocess

from utils.logger_util import LoggerUtil
from utils.utils import Utils
from utils import config

LOGGER = LoggerUtil(config.MODEL_ID, local_artifact_dir / "logs")
UTILS = Utils(LOGGER)

UTILS.describe_env()

AWS_INSTANCE = 'ml.m5.large'
LOCAL_INSTANCE = 'local'
try:
    if subprocess.call('nvidia-smi') == 0:
        LOCAL_INSTANCE = 'local_gpu'
except:
    print("The nvidia-smi binary was not found and thus GPU computation is not supported. Using the default CPU "
          "computation")

# Change this to your desired instance type
INSTANCE_TYPE = LOCAL_INSTANCE
IS_LOCAL = LOCAL_INSTANCE == INSTANCE_TYPE

In [None]:
#
# Parse dataset
#

from data_processing.parse import Parse

PARSE = Parse(LOGGER)

dataset_dir_path = local_artifact_dir / "datasets"

# Creates train and test dataset CSVs
PARSE.split_train_test_dataset(dataset_dir_path)

In [None]:
#
# Setup local/aws environment. If aws, upload the datasets to S3
#

from data_processing.upload import Upload
from sagemaker import LocalSession

UPLOAD = Upload(LOGGER, config.MODEL_ID)

sagemaker_session = None

if IS_LOCAL:
    LOGGER.log("Notebook is set to local mode, not uploading to S3")
    model_output_dir_path = local_artifact_dir / "models"
    model_output_dir_path.mkdir(parents=True, exist_ok=True)

    dataset_dir_uri = f"file://{dataset_dir_path}"
    model_output_dir_uri = f"file://{model_output_dir_path}"

    sagemaker_session = LocalSession()
    sagemaker_session.config = {
        'local': {
            'local_code': True,
            'container_root': str(model_output_dir_path)
        }
    }
else:
    sagemaker_session = UPLOAD.sagemaker_session

    UPLOAD.upload_to_sagemaker_s3_bucket(dataset_dir_path, PARSE.TRAIN_DATASET_FILENAME)
    UPLOAD.upload_to_sagemaker_s3_bucket(dataset_dir_path, PARSE.TEST_DATASET_FILENAME)
    dataset_dir_uri = UPLOAD.s3_dataset_dir_uri

    model_output_dir_uri = UPLOAD.s3_model_output_uri

In [None]:
#
# Configure sagemaker and estimator
#

from ml.train import Train

TRAIN = Train(LOGGER)

estimator = TRAIN.create_model(config.SM_ROLE, INSTANCE_TYPE, sagemaker_session)
TRAIN.fit_model(estimator, dataset_dir_uri)

In [None]:
#
# Load model and define test data and variables to visually evaluate the model
#

from gluonts.model.predictor import Predictor
from gluonts.dataset.util import to_pandas
from gluonts.dataset.common import ListDataset

import pandas as pd
import matplotlib.pyplot as plt
from itertools import islice


#TODO: Use deepar.model_fn?
# model_output_dir_path is basically the same path as it was before, though sagemaker appends a random temp directory
#  to the path. The path from TRAIN includes that random temp directory
model_output_dir_path = TRAIN.model_dir_path / "model"
predictor = Predictor.deserialize(Path(model_output_dir_path))

test_dataset_filename = dataset_dir_path / config.TEST_DATASET_FILENAME
df = pd.read_csv(filepath_or_buffer=test_dataset_filename, header=0, index_col=0)

test_data = ListDataset(
    [
        # This is first so we can easily graph the entire test dataset below
        {"start": df.index[0], "target": df["close"][:]},
        {"start": df.index[0], "target": df["close"][:"2020-11-20 12:20:00"]},
        {"start": df.index[0], "target": df["close"][:"2021-01-20 18:50:00"]}
    ],
    freq="5min"
)

to_pandas(next(iter(test_data)))[:200].plot(figsize=(12, 5), linewidth=2)
plt.grid()
plt.legend(["close"])
plt.show()

def plot_prob_forecasts(ts_list, forecast_list, plot_length=100):
    for target, forecast in islice(zip(ts_list, forecast_list), len(forecasts)):
        prediction_intervals = (50.0, 90.0)
        legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

        ax = target[-plot_length:].plot(figsize=(10, 7), linewidth=2)
        forecast.plot(prediction_intervals=prediction_intervals, color='g')
        plt.grid(which='both')
        plt.legend(legend, loc="upper left")
        plt.show()

In [None]:
#
# Visually evaluate the model by graphing some prediction test results
#

from gluonts.evaluation.backtest import make_evaluation_predictions

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_data,  # test dataset
    predictor=predictor,  # predictor
    num_samples=100,  # number of sample paths we want for evaluation
)

forecasts = list(forecast_it)
forecast_entry = forecasts[0]
tss = list(ts_it)

print(f"Number of sample paths: {forecast_entry.num_samples}")
print(f"Dimension of samples: {forecast_entry.samples.shape}")
print(f"Start date of the forecast window: {forecast_entry.start_date}")
print(f"Frequency of the time series: {forecast_entry.freq}")

plot_prob_forecasts(tss, forecasts)

In [None]:
#
# NOTE: FURTHER CELLS ARE COMPATIBLE WITH AWS SAGEMAKER ONLY, LOCAL MODE WILL NOT WORK
# Hyperparameter tune the model
#

from ml.tune import Tune

TUNE = Tune(LOGGER)

train_dataset_uri = f"{dataset_dir_uri}/{PARSE.TRAIN_DATASET_FILENAME}"
test_dataset_uri = f"{dataset_dir_uri}/{PARSE.TEST_DATASET_FILENAME}"

tuner = TUNE.create_tuner(estimator)
TUNE.fit_tuner(tuner, train_dataset_uri, test_dataset_uri)

In [None]:
#
# Get updates for Hyperparameter tune job. Ensure this is completed before going to the next cell
#

TUNE.get_tune_job_update()

In [None]:
#
# Evaluate the metrics of the tune job
#

TUNE.report_job_analytics()
