In [58]:
# Define IAM role
import boto3
import re
from sagemaker import get_execution_role

region = boto3.Session().region_name

session = sagemaker.Session()
bucket = 'btc-alt-daily-ohlcv'
prefix = 'sagemaker/autopilot'

role = get_execution_role()

sm = boto3.Session().client(service_name='sagemaker',region_name=region)

In [59]:
import numpy as np                                # For matrix operations and numerical processing
import pandas as pd                               # For munging tabular data
import matplotlib.pyplot as plt                   # For charts and visualizations
from IPython.display import Image                 # For displaying images in the notebook
from IPython.display import display               # For displaying outputs in the notebook
from time import gmtime, strftime                 # For labeling SageMaker models, endpoints, etc.
import sys                                        # For writing outputs to notebook
import math                                       # For ceiling function
import json                                       # For parsing hosting outputs
import os                                         # For manipulating filepath names
import sagemaker                                  # Amazon SageMaker's Python SDK provides many helper functions
from sagemaker.predictor import csv_serializer    # Converts strings for HTTP POST requests on inference

In [60]:
data_key = 'ltc-daily-train.csv'
data_location = 's3://{}/{}'.format(bucket, data_key)

data = pd.read_csv(data_location).drop(columns='idx')
pd.set_option('display.max_columns', 500)     # Make sure we can see all of the columns
pd.set_option('display.max_rows', 10)         # Keep the output on one page

In [61]:
data

Unnamed: 0,T,Open,High,Low,Close,Volume BTC,Volume USD,BV,C,H,L,O,V,P
0,2014-03-19,619.78,633.66,612.22,620.00,4.86,3.013790e+03,0.216336,0.029000,0.031500,0.029000,0.031500,7.201305,0.00
1,2014-03-20,620.00,625.02,590.00,590.00,8.49,5.127290e+03,0.105921,0.022600,0.022600,0.022600,0.022600,4.686776,0.25
2,2014-03-21,590.00,615.23,580.00,590.00,16.27,9.667690e+03,0.054864,0.026000,0.026000,0.022500,0.022500,2.147940,0.00
3,2014-03-22,590.00,596.00,554.05,572.30,11.84,6.716210e+03,0.777287,0.026000,0.029000,0.025600,0.029000,29.579544,0.00
4,2014-03-23,572.30,596.00,566.36,574.22,1.31,7.587000e+02,0.109937,0.026300,0.030000,0.026000,0.026000,4.098219,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2065,2019-11-25,6908.36,7380.00,6515.00,7122.14,18716.60,1.293666e+08,40.729751,0.006415,0.006761,0.006305,0.006370,6211.986042,0.00
2066,2019-11-26,7122.14,7344.91,7018.78,7159.22,7643.85,5.456624e+07,10.312589,0.006550,0.006600,0.006397,0.006400,1579.469402,0.00
2067,2019-11-27,7159.22,7676.27,6847.72,7527.84,15156.99,1.103491e+08,18.368477,0.006360,0.006555,0.006320,0.006555,2853.567822,0.00
2068,2019-11-28,7527.84,7659.92,7372.19,7436.72,6925.58,5.209053e+07,23.064838,0.006300,0.006380,0.006234,0.006346,3662.899037,0.00


In [62]:
counts = data['P'].value_counts(normalize=True).to_frame()

We want to see the percent incidence of target values in case model decides to put 0.00 for everything to get a "high" accuracy

In [63]:
counts

Unnamed: 0,P
0.0,0.771014
0.25,0.109179
1.0,0.057005
0.5,0.043961
0.75,0.018841


In [64]:
train_data = data[:1600]
test_data = data[1600:]
test_data_no_target = test_data.drop(columns=['P'])

In [65]:
train_file = 'train_data.csv';
train_data.to_csv(train_file, index=False, header=True)
train_data_s3_path = session.upload_data(path=train_file, key_prefix=prefix + "/train")
print('Train data uploaded to: ' + train_data_s3_path)

test_file = 'test_data.csv';
test_data_no_target.to_csv(test_file, index=False, header=False)
test_data_s3_path = session.upload_data(path=test_file, key_prefix=prefix + "/test")
print('Test data uploaded to: ' + test_data_s3_path)

Train data uploaded to: s3://sagemaker-us-east-1-575764779739/sagemaker/autopilot/train/train_data.csv
Test data uploaded to: s3://sagemaker-us-east-1-575764779739/sagemaker/autopilot/test/test_data.csv


In [68]:
bucket = session.default_bucket()
input_data_config = [{
      'DataSource': {
        'S3DataSource': {
          'S3DataType': 'S3Prefix',
          'S3Uri': 's3://{}/{}/train'.format(bucket,prefix)
        }
      },
      'TargetAttributeName': 'P'
    }
  ]

output_data_config = {
    'S3OutputPath': 's3://{}/{}/output'.format(bucket,prefix)
  }

In [69]:
from time import gmtime, strftime, sleep
timestamp_suffix = strftime('%d-%H-%M-%S', gmtime())

auto_ml_job_name = 'automl-alt-predict-' + timestamp_suffix
print('AutoMLJobName: ' + auto_ml_job_name)

sm.create_auto_ml_job(AutoMLJobName=auto_ml_job_name,
                      InputDataConfig=input_data_config,
                      OutputDataConfig=output_data_config,
                      RoleArn=role)

AutoMLJobName: automl-alt-predict-16-05-07-25


{'AutoMLJobArn': 'arn:aws:sagemaker:us-east-1:575764779739:automl-job/automl-alt-predict-16-05-07-25',
 'ResponseMetadata': {'RequestId': 'eacad778-0d26-4a9d-93c8-dab3d19d4b2c',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'eacad778-0d26-4a9d-93c8-dab3d19d4b2c',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '101',
   'date': 'Thu, 16 Jan 2020 05:07:25 GMT'},
  'RetryAttempts': 0}}

In [None]:

print ('JobStatus - Secondary Status')
print('------------------------------')


describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
job_run_status = describe_response['AutoMLJobStatus']
    
while job_run_status not in ('Failed', 'Completed', 'Stopped'):
    describe_response = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)
    job_run_status = describe_response['AutoMLJobStatus']
    
    print (describe_response['AutoMLJobStatus'] + " - " + describe_response['AutoMLJobSecondaryStatus'])
    sleep(30)

JobStatus - Secondary Status
------------------------------
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - AnalyzingData
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - FeatureEngineering
InProgress - 

In [71]:
best_candidate = sm.describe_auto_ml_job(AutoMLJobName=auto_ml_job_name)['BestCandidate']
best_candidate_name = best_candidate['CandidateName']
print(best_candidate)
print('\n')
print("CandidateName: " + best_candidate_name)
print("FinalAutoMLJobObjectiveMetricName: " + best_candidate['FinalAutoMLJobObjectiveMetric']['MetricName'])
print("FinalAutoMLJobObjectiveMetricValue: " + str(best_candidate['FinalAutoMLJobObjectiveMetric']['Value']))

{'CandidateName': 'tuning-job-1-a694ed9e07db43e6b7-236-9e0f2e8a', 'FinalAutoMLJobObjectiveMetric': {'MetricName': 'validation:accuracy', 'Value': 0.9440990090370178}, 'ObjectiveStatus': 'Succeeded', 'CandidateSteps': [{'CandidateStepType': 'AWS::SageMaker::ProcessingJob', 'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:575764779739:processing-job/db-1-16ecf0ce001f49e6ad14c0388fbc61de71e389d143504951a7f4ad5e86', 'CandidateStepName': 'db-1-16ecf0ce001f49e6ad14c0388fbc61de71e389d143504951a7f4ad5e86'}, {'CandidateStepType': 'AWS::SageMaker::TrainingJob', 'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:575764779739:training-job/automl-alt-dpp2-1-46df3f7948a949e7a70d2c6b3f8e9d0647f38bb127084', 'CandidateStepName': 'automl-alt-dpp2-1-46df3f7948a949e7a70d2c6b3f8e9d0647f38bb127084'}, {'CandidateStepType': 'AWS::SageMaker::TransformJob', 'CandidateStepArn': 'arn:aws:sagemaker:us-east-1:575764779739:transform-job/automl-alt-dpp2-csv-1-bfc99e82f9084f0cbdfb321c64d1765f0c6e18fab', 'CandidateStepNa

## 94.4% accuracy!!!!!??

In [72]:
train_counts = train_data['P'].value_counts(normalize=True).to_frame()
test_counts = test_data['P'].value_counts(normalize=True).to_frame()

In [75]:
train_counts

Unnamed: 0,P
0.0,0.77375
0.25,0.08125
1.0,0.07375
0.5,0.046875
0.75,0.024375


In [74]:
test_counts

Unnamed: 0,P
0.0,0.761702
0.25,0.204255
0.5,0.034043
