In [None]:
#
# Setup constants
#

MODEL_NAME = "giia"
# DATASET = "datasets/SandP_1995_2019_monthly.csv"
DATASET = "datasets/SandP_1995_2020_daily.csv"
SM_ROLE ='arn:aws:iam::941048668662:role/service-role/AmazonSageMaker-ExecutionRole-20191206T145896'

LOCAL_INSTANCE='local'
AWS_INSTANCE = 'ml.m5.large'
INSTANCE_TYPE = LOCAL_INSTANCE
IS_LOCAL = LOCAL_INSTANCE==INSTANCE_TYPE

In [None]:
#
# Initialization
#

import os
import sys

# Set notebook's src module path. Note that you may have to update your IDE's project settings to do the same for the
#  local library imports to work the same
module_path = os.path.dirname(os.path.dirname(os.path.abspath(''))) + '/src'
sys.path.append(module_path)

# Keep paths consistent throughout notebook
os.chdir(module_path)

# Autoreload imports at the beginning of cell execution.
#  https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

from utils.logging import LoggerUtil
from utils.utils import Utils

LOGGER = LoggerUtil(MODEL_NAME)
UTILS = Utils(LOGGER)

LOGGER.log("Current working directory [{}]".format(os.getcwd()))
UTILS.describe_env()

In [None]:
#
# Parse dataset
#

from data_processing.parse import Parse

PARSE = Parse(LOGGER)

# Creates train and test dataset CSVs
train_dataset_path, test_dataset_path = PARSE.split_train_test_dataset(DATASET)

In [None]:
#
# Setup local/aws environment. If aws, upload the datasets to S3
#

from data_processing.upload import Upload
from sagemaker.local import LocalSession

UPLOAD = Upload(LOGGER)
sagemaker_session = None

if IS_LOCAL:
    LOGGER.log("Notebook is set to local mode, not uploading")
    train_dataset_path = "file://{}/{}".format(os.getcwd(), train_dataset_path)
    test_dataset_path = "file://{}/{}".format(os.getcwd(), test_dataset_path)

    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}
else:
    train_dataset_path, test_dataset_path = UPLOAD.upload_to_sagemaker_s3_bucket(
        MODEL_NAME, train_dataset_path, test_dataset_path)
        # sagemaker_session = UPLOAD.sagemaker_session

In [None]:
#
# Configure sagemaker and estimator
#

from ml.train import Train

TRAIN = Train(LOGGER)

print(train_dataset_path)
estimator = TRAIN.create_model(SM_ROLE, INSTANCE_TYPE, sagemaker_session)
TRAIN.fit_model(estimator, train_dataset_path, test_dataset_path)

In [None]:
#
# NOTE: FURTHER CELLS ARE COMPATABLE WITH AWS SAGEMAKER ONLY, LOCAL MODE WILL NOT WORK
# Hyperparameter tune the model
#

from ml.tune import Tune

TUNE = Tune(LOGGER)

tuner = TUNE.create_tuner(estimator)
tuning_job_name = TUNE.fit(tuner, train_dataset_path, test_dataset_path)

In [None]:
#
# Get updates for Hyperparameter tune job. Ensure this is completed before going to the next cell
#

TUNE.get_tune_job_update()

In [None]:
#
# Evaluate the metrics of the tune job
#

TUNE.report_job_analytics()
