In [9]:
#
# Setup constants
#

import subprocess

MODEL_NAME = "giia"
MODEL_VERSION = "0.3.3"
# DATASET = "datasets/SandP_1995_2019_monthly.csv"
DATASET = "datasets/SandP_1995_2020_daily.csv"
SM_ROLE ='arn:aws:iam::941048668662:role/service-role/AmazonSageMaker-ExecutionRole-20191206T145896'

AWS_INSTANCE = 'ml.m5.large'
LOCAL_INSTANCE = 'local'
try:
    if subprocess.call('nvidia-smi') == 0:
        LOCAL_INSTANCE = 'local_gpu'
except:
    print("The nvidia-smi binary was not found and thus GPU computation is not supported. Using the default CPU "
          "computation")

# Change this to your desired instance type
INSTANCE_TYPE = LOCAL_INSTANCE
IS_LOCAL = LOCAL_INSTANCE==INSTANCE_TYPE

MODULE_PATH = ""

The nvidia-smi binary was not found and thus GPU computation is not supported. Using the default CPU computation


In [7]:
#
# Initialization
#

import os
import sys
import cache_magic
from pathlib import Path

# Set notebook's src module path. Note that you may have to update your IDE's project settings to do the same for the
#  local library imports to work the same
%cache MODULE_PATH = os.path.dirname(Path().resolve())
sys.path.append(MODULE_PATH)

# Keep paths consistent throughout notebook
os.chdir(MODULE_PATH)

# Autoreload imports at the beginning of cell execution.
#  https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

from utils.logging import LoggerUtil
from utils.utils import Utils

LOGGER = LoggerUtil(f"{MODEL_NAME}-{MODEL_VERSION}")
UTILS = Utils(LOGGER)

LOGGER.log("Current working directory [{}]".format(os.getcwd()))
UTILS.describe_env()

loading cached value for variable 'MODULE_PATH'. Time since pickling  0:00:50.522172
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2021-01-22 12:11:54.711336 Current working directory [/Users/jbeckman/projects/capia/src]
2021-01-22 12:11:54.711798 1.7.0
2021-01-22 12:11:54.712229 The GPU count is [0]


In [3]:
#
# Parse dataset
#

from data_processing.parse import Parse

PARSE = Parse(LOGGER)

# Creates train and test dataset CSVs
train_dataset, test_dataset = PARSE.split_train_test_dataset(DATASET)

2021-01-22 12:11:06.526441 First sample:
2021-01-22 12:11:06.527064                   Open        High         Low       Close   Adj Close  \
Date                                                                     
1995-01-03  459.209991  459.269989  457.200012  459.109985  459.109985   

               Volume  
Date                   
1995-01-03  262450000  
2021-01-22 12:11:06.533394 
Last sample:
2021-01-22 12:11:06.533692                   Open         High          Low        Close    Adj Close  \
Date                                                                         
2020-04-09  2776.98999  2818.570068  2762.360107  2789.820068  2789.820068   

                Volume  
Date                    
2020-04-09  7880140000  
2021-01-22 12:11:06.539092 datasets/train.csv


In [4]:
#
# Setup local/aws environment. If aws, upload the datasets to S3
#

from data_processing.upload import Upload
from sagemaker import LocalSession

UPLOAD = Upload(LOGGER)

sagemaker_session = None

if IS_LOCAL:
    LOGGER.log("Notebook is set to local mode, not uploading to S3")
    train_dataset_path = "file://{}/{}".format(os.getcwd(), train_dataset)
    test_dataset_path = "file://{}/{}".format(os.getcwd(), test_dataset)

    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}
else:
    train_dataset_path, test_dataset_path = UPLOAD.upload_to_sagemaker_s3_bucket(
        f"{MODEL_NAME}-{MODEL_VERSION}", train_dataset, test_dataset)
    sagemaker_session = UPLOAD.sagemaker_session

2021-01-22 12:11:08.404008 Notebook is set to local mode, not uploading to S3


In [5]:
#
# Configure sagemaker and estimator
#

from ml.train import Train

TRAIN = Train(LOGGER)

estimator = TRAIN.create_model(SM_ROLE, INSTANCE_TYPE, sagemaker_session)
TRAIN.fit_model(estimator, train_dataset_path, test_dataset_path)

ValueError: Unsupported mxnet version: 1.7.0.post1. You may need to upgrade your SDK version (pip install -U sagemaker) for newer mxnet versions. Supported mxnet version(s): 0.12.1, 1.0.0, 1.1.0, 1.2.1, 1.3.0, 1.4.0, 1.4.1, 1.6.0, 1.7.0, 1.8.0, 0.12, 1.0, 1.1, 1.2, 1.3, 1.4, 1.6, 1.7, 1.8.

In [None]:
#
# NOTE: FURTHER CELLS ARE COMPATIBLE WITH AWS SAGEMAKER ONLY, LOCAL MODE WILL NOT WORK
# Hyperparameter tune the model
#

from ml.tune import Tune

TUNE = Tune(LOGGER)

tuner = TUNE.create_tuner(estimator)
tuning_job_name = TUNE.fit_tuner(tuner, train_dataset_path, test_dataset_path)

In [None]:
#
# Get updates for Hyperparameter tune job. Ensure this is completed before going to the next cell
#

TUNE.get_tune_job_update()

In [None]:
#
# Evaluate the metrics of the tune job
#

TUNE.report_job_analytics()
