In [1]:
#
# Setup constants
#

import subprocess

MODEL_NAME = "giia"
MODEL_VERSION = "0.3.3"
MODEL_ID = f"{MODEL_NAME}-{MODEL_VERSION}"
print(MODEL_ID)

DATASET = "../freqtrade/user_data/data/binance/ETH_BTC-5m.json"
SRC_DATASET_DIR = "../freqtrade/user_data/data/binance"
SM_ROLE ='arn:aws:iam::941048668662:role/service-role/AmazonSageMaker-ExecutionRole-20191206T145896'

AWS_INSTANCE = 'ml.m5.large'
LOCAL_INSTANCE = 'local'
try:
    if subprocess.call('nvidia-smi') == 0:
        LOCAL_INSTANCE = 'local_gpu'
except:
    print("The nvidia-smi binary was not found and thus GPU computation is not supported. Using the default CPU "
          "computation")

# Change this to your desired instance type
INSTANCE_TYPE = LOCAL_INSTANCE
IS_LOCAL = LOCAL_INSTANCE==INSTANCE_TYPE

MODULE_PATH = ""

giia-0.3.3
The nvidia-smi binary was not found and thus GPU computation is not supported. Using the default CPU computation


In [2]:
#
# Initialization
#

import os
import sys
import cache_magic
from pathlib import Path

# Set notebook's src module path. Note that you may have to update your IDE's project settings to do the same for the
#  local library imports to work the same
%cache MODULE_PATH = os.path.dirname(Path().resolve())
sys.path.append(MODULE_PATH)

# Keep paths consistent throughout notebook
os.chdir(MODULE_PATH)

# Place all local artifacts in a disposable, git-ignored directory
local_artifact_dir = Path(os.getcwd()).parent / "out"
local_artifact_dir.mkdir(parents=True, exist_ok=True)

# Autoreload imports at the beginning of cell execution.
#  https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

from utils.logger_util import LoggerUtil
from utils.utils import Utils

LOGGER = LoggerUtil(MODEL_ID, local_artifact_dir / "logs")
UTILS = Utils(LOGGER)

UTILS.describe_env()

%cache magic is now registered in ipython
loading cached value for variable 'MODULE_PATH'. Time since pickling  51 days, 9:10:31.054788
2021-03-14 21:21:36.548124 Background logger started
2021-03-14 21:21:36.548711 Current working directory [/Users/jbeckman/projects/capia/src]
2021-03-14 21:21:36.548886 The MXNet version is [1.6.0]
2021-03-14 21:21:36.549064 The GPU count is [0]


In [9]:
#
# Parse dataset
#

from data_processing.parse import Parse

PARSE = Parse(LOGGER)

dataset_dir_path = local_artifact_dir / "datasets"

# Creates train and test dataset CSVs
PARSE.split_train_test_dataset(Path(SRC_DATASET_DIR), dataset_dir_path)

2021-03-14 21:30:26.320264 First sample:


AttributeError: 'NoneType' object has no attribute 'info'

In [None]:
#
# Setup local/aws environment. If aws, upload the datasets to S3
#

from data_processing.upload import Upload
from sagemaker import LocalSession

UPLOAD = Upload(LOGGER, MODEL_ID)

sagemaker_session = None

if IS_LOCAL:
    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}

    LOGGER.log("Notebook is set to local mode, not uploading to S3")
    dataset_dir_uri = f"file://{dataset_dir_path}"

    model_output_dir_path = local_artifact_dir / "models"
    model_output_dir_path.mkdir(parents=True, exist_ok=True)
    model_output_dir_uri = f"file://{model_output_dir_path}"
else:
    sagemaker_session = UPLOAD.sagemaker_session

    UPLOAD.upload_to_sagemaker_s3_bucket(dataset_dir_path, PARSE.TRAIN_DATASET_FILENAME)
    UPLOAD.upload_to_sagemaker_s3_bucket(dataset_dir_path, PARSE.TEST_DATASET_FILENAME)
    dataset_dir_uri = UPLOAD.s3_dataset_dir_uri

    model_output_dir_uri = UPLOAD.s3_model_output_uri

In [None]:
#
# Configure sagemaker and estimator
#

from ml.train import Train

TRAIN = Train(LOGGER)

estimator = TRAIN.create_model(SM_ROLE, INSTANCE_TYPE, model_output_dir_uri, sagemaker_session)
TRAIN.fit_model(estimator, dataset_dir_uri)

In [None]:
#
# Graph some prediction test results
#

import matplotlib.pyplot as plt
from gluonts.model.predictor import Predictor

#TODO: Should really use deepar.model_fn
model_path = model_output_dir_path
print("Adsf")
print(model_path)
print("qwer")
predictor = Predictor.deserialize(Path(model_output_dir_uri))

def plot_prob_forecasts(ts_entry, forecast_entry):
    plot_length = 150
    prediction_intervals = (50.0, 90.0)
    legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]

    fig, ax = plt.subplots(1, 1, figsize=(10, 7))
    ts_entry[-plot_length:].plot(ax=ax)  # plot the time series
    forecast_entry.plot(prediction_intervals=prediction_intervals, color='g')
    plt.grid(which="both")
    plt.legend(legend, loc="upper left")
    plt.show()

plot_prob_forecasts(tss[0], forecasts[0])

In [None]:
#
# NOTE: FURTHER CELLS ARE COMPATIBLE WITH AWS SAGEMAKER ONLY, LOCAL MODE WILL NOT WORK
# Hyperparameter tune the model
#

from ml.tune import Tune

TUNE = Tune(LOGGER)

train_dataset_uri = f"{dataset_dir_uri}/{PARSE.TRAIN_DATASET_FILENAME}"
test_dataset_uri = f"{dataset_dir_uri}/{PARSE.TEST_DATASET_FILENAME}"

tuner = TUNE.create_tuner(estimator)
TUNE.fit_tuner(tuner, train_dataset_uri, test_dataset_uri)

In [None]:
#
# Get updates for Hyperparameter tune job. Ensure this is completed before going to the next cell
#

TUNE.get_tune_job_update()

In [None]:
#
# Evaluate the metrics of the tune job
#

TUNE.report_job_analytics()
