In [1]:
#
# Setup constants
#

MODEL_NAME = "giia"
# DATASET = "datasets/SandP_1995_2019_monthly.csv"
DATASET = "datasets/SandP_1995_2020_daily.csv"
SM_ROLE ='arn:aws:iam::941048668662:role/service-role/AmazonSageMaker-ExecutionRole-20191206T145896'

LOCAL_INSTANCE='local'
AWS_INSTANCE = 'ml.m5.large'
INSTANCE_TYPE = LOCAL_INSTANCE
IS_LOCAL = LOCAL_INSTANCE==INSTANCE_TYPE

In [7]:
#
# Initialization
#

import os
import sys

# Set notebook's src module path. Note that you may have to update your IDE's project settings to do the same for the
#  local library imports to work the same
module_path = os.path.dirname(os.path.dirname(os.path.abspath(''))) + '/src'
sys.path.append(module_path)

# Keep paths consistent throughout notebook
os.chdir(module_path)

# Autoreload imports at the beginning of cell execution.
#  https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

from utils.logging import LoggerUtil
from utils.utils import Utils

LOGGER = LoggerUtil(MODEL_NAME)
UTILS = Utils(LOGGER)

LOGGER.log("Current working directory [{}]".format(os.getcwd()))
UTILS.describe_env()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2020-05-26 07:18:23.124532 Current working directory [/Users/jbeckman/Projects/plutus-realm/gai/AmazonSageMaker-Gia/src]
2020-05-26 07:18:23.124798 1.6.0
2020-05-26 07:18:23.124965 The GPU count is [0]


In [34]:
#
# Parse dataset
#

from data_processing.parse import Parse

PARSE = Parse(LOGGER)

# Creates train and test dataset CSVs
train_dataset_path, test_dataset_path = PARSE.split_train_test_dataset(DATASET)

2020-05-26 08:04:50.738965 First sample:
2020-05-26 08:04:50.739919                   Open        High         Low       Close   Adj Close  \
Date                                                                     
1995-01-03  459.209991  459.269989  457.200012  459.109985  459.109985   

               Volume  
Date                   
1995-01-03  262450000  
2020-05-26 08:04:50.752809 
Last sample:
2020-05-26 08:04:50.753572                   Open         High          Low        Close    Adj Close  \
Date                                                                         
2020-04-09  2776.98999  2818.570068  2762.360107  2789.820068  2789.820068   

                Volume  
Date                    
2020-04-09  7880140000  
2020-05-26 08:04:50.768708 datasets/train.csv


In [41]:
#
# Upload dataset(s) to S3
# Dataset retrieved from:
#   https://finance.yahoo.com/quote/%5EGSPC/history?period1=788936400&period2=1564545600&interval=1mo&filter=history&frequency=1mo
#

from data_processing.upload import Upload
from sagemaker.local import LocalSession

UPLOAD = Upload(LOGGER)
sagemaker_session = None

# if IS_LOCAL:
#     LOGGER.log("Notebook is set to local mode, not uploading")
#     train_dataset_path = "file://{}/{}".format(os.getcwd(), train_dataset_path)
#     test_dataset_path = "file://{}/{}".format(os.getcwd(), test_dataset_path)
#
#     sagemaker_session = LocalSession()
#     sagemaker_session.config = {'local': {'local_code': True}}
# else:
train_dataset_path, test_dataset_path = UPLOAD.upload_to_sagemaker_s3_bucket(
    MODEL_NAME, train_dataset_path, test_dataset_path)
    # sagemaker_session = UPLOAD.sagemaker_session

2020-05-26 08:08:26.467392 Data will be uploaded to: 
2020-05-26 08:08:26.696060 Uploading file to s3://sagemaker-us-east-1-941048668662/giia/train/train.csv
2020-05-26 08:08:28.644484 Uploading file to s3://sagemaker-us-east-1-941048668662/giia/test/test.csv
2020-05-26 08:08:29.855871 Date,Open,High,Low,Close,Adj Close,Volume
...


In [44]:
#
# Configure sagemaker and estimator
#

from ml.train import Train

TRAIN = Train(LOGGER)

print(train_dataset_path)
estimator = TRAIN.create_model(SM_ROLE, INSTANCE_TYPE, sagemaker_session)
TRAIN.fit_model(estimator, train_dataset_path, test_dataset_path)

s3://sagemaker-us-east-1-941048668662/giia/train
Creating tmpa55gic40_algo-1-cg1ka_1 ... 
[1BAttaching to tmpa55gic40_algo-1-cg1ka_12mdone[0m
[36malgo-1-cg1ka_1  |[0m 2020-05-26 12:20:59,247 sagemaker-containers INFO     Imported framework sagemaker_mxnet_container.training
[36malgo-1-cg1ka_1  |[0m 2020-05-26 12:20:59,259 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-cg1ka_1  |[0m 2020-05-26 12:20:59,273 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1-cg1ka"]', 'SM_NETWORK_INTERFACE_NAME': 'eth0', 'SM_HPS': '{"dropout_rate":0.02,"epochs":6,"num_layers":4,"prediction_length":13}', 'SM_USER_ENTRY_POINT': 'ml/train.py', 'SM_FRAMEWORK_PARAMS': '{}', 'SM_RESOURCE_CONFIG': '{"current_host":"algo-1-cg1ka","hosts":["algo-1-cg1ka"]}', 'SM_INPUT_DATA_CONFIG': '{"test":{"TrainingInputMode":"File"},"train":{"TrainingInputMode":"File"}}', 'SM_OUTPUT_DATA_DIR': '/opt/ml/output/data', 'SM_CHANNELS': '[

In [None]:
#
# NOTE: FURTHER CELLS ARE COMPATABLE WITH AWS SAGEMAKER ONLY, LOCAL MODE WILL NOT WORK
# Hyperparameter tune the model
#

from ml.tune import Tune

TUNE = Tune(LOGGER)

tuner = TUNE.create_tuner(estimator)
tuning_job_name = TUNE.fit(tuner, train_dataset_path, test_dataset_path)

In [None]:
#
# Get updates for Hyperparameter tune job. Ensure this is completed before going to the next cell
#

TUNE.get_tune_job_update()

In [None]:
#
# Evaluate the metrics of the tune job
#

TUNE.report_job_analytics()
