In [5]:
!pip install boto3 numpy pandas sagemaker



In [None]:
# S3 prefix
prefix = 'heart-sagemaker-demo'

# Define IAM role
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role

import sagemaker as sage
from time import gmtime, strftime


role = get_execution_role()
sess = sage.Session()

In [8]:
WORK_DIRECTORY = 'data'

data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

In [9]:
import json 

with open('../src/params.json') as in_str:
    hyperparams = json.loads(in_str.read())

In [10]:
hyperparams

{'model_params': {'iterations': 1000,
  'eval_metric': 'PRAUC',
  'early_stopping_rounds': 40},
 'data_params': {'pos_class': 1,
  'cat_features': ['sex',
   'chest-pain-type',
   'fasting-blood-sugar',
   'resting-ecg',
   'exercise-angina',
   'slope',
   'colored-vessels',
   'thal',
   'datetime',
   'postalcode'],
  'target': 'narrowing-diagnosis',
  'metrics': ['roc_auc', 'accuracy', 'lift', 'hosmer_lemeshow']}}

In [11]:
hyperparams['debug'] = True

In [None]:
# can be used in training job anad later to be viewed in training analytics output
metric_definitions = [
    {
        "Name": "roc_auc",
        "Regex": "roc_auc=(.*?)",
    },
    {
        "Name": "accuracy",
        "Regex": "accuracy=(.*?);",
    },
    {
        "Name": "lift",
        "Regex": "lift=(.*?);",
    },
    {
        "Name": "hosmer_lemeshow",
        "Regex": "hosmer_lemeshow=(.*?);",
    },
]

In [24]:
# account = sess.boto_session.client('sts').get_caller_identity()['Account']
# region = sess.boto_session.region_name
# image = '{}.dkr.ecr.{}.amazonaws.com/heart:latest'.format(account, region)

# tree = sage.estimator.Estimator(image,
#                                 role, 1, 'ml.c4.2xlarge',
#                                 output_path="s3://{}/output".format(sess.default_bucket()),
#                                 sagemaker_session=sess,
#                                 tags=[{'Key': 'CostGroup', 'Value': 'DEV-TA-ANALYTICS'}],
#                                 )

# tree.fit(data_location, {'training': data_location})
# # tree.fit(data_location, {'train': data_location})

In [None]:
# tree

In [14]:
from sagemaker.predictor import csv_serializer
predictor = tree.deploy(1, 'ml.m4.xlarge', serializer=csv_serializer)

-------------!

In [20]:
estimator = sage.estimator.Estimator.attach('heart-release-demo-2021-02-11-09-03-25-738')
output_path="s3://{}/{}".format(sess.default_bucket(), 'po123-out')


2021-02-11 09:08:13 Starting - Preparing the instances for training
2021-02-11 09:08:13 Downloading - Downloading input data
2021-02-11 09:08:13 Training - Training image download completed. Training in progress.
2021-02-11 09:08:13 Uploading - Uploading generated training model
2021-02-11 09:08:13 Completed - Training job completed


In [21]:
transformer = estimator.transformer(instance_count=1,
                               instance_type='ml.m4.xlarge',
                               output_path=output_path,
                               assemble_with='Line',
                               accept='text/csv')

In [22]:
data_location = sess.upload_data('data', key_prefix='p123')

In [23]:
transformer.transform(data_location, content_type='text/csv', split_type='Line', input_filter='$[:-1]', logs=True)
transformer.wait()

...........................
[32m2021-02-11T12:08:58.018:[sagemaker logs]: MaxConcurrentTransforms=1, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD[0m
[34mStarting the inference server with 4 workers.[0m
[34m2021/02/11 12:08:57 [crit] 10#10: *1 connect() to unix:/tmp/gunicorn.sock failed (2: No such file or directory) while connecting to upstream, client: 169.254.255.130, server: , request: "GET /ping HTTP/1.1", upstream: "http://unix:/tmp/gunicorn.sock:/ping", host: "169.254.255.131:8080"[0m
[34m169.254.255.130 - - [11/Feb/2021:12:08:57 +0000] "GET /ping HTTP/1.1" 502 173 "-" "Go-http-client/1.1"[0m
[34m169.254.255.130 - - [11/Feb/2021:12:08:57 +0000] "GET /ping HTTP/1.1" 502 173 "-" "Go-http-client/1.1"[0m
[34m2021/02/11 12:08:57 [crit] 10#10: *3 connect() to unix:/tmp/gunicorn.sock failed (2: No such file or directory) while connecting to upstream, client: 169.254.255.130, server: , request: "GET /ping HTTP/1.1", upstream: "http://unix:/tmp/gunicorn.sock:/ping", host: "169.

In [47]:
s3_client = sess.boto_session.client('s3')
s3_client.download_file(sess.default_bucket(), "{}/payload.csv.out".format(transform_output_folder), '/tmp/payload.csv.out')
with open('/tmp/payload.csv.out') as f:
    results = f.readlines()   
print("Transform results: \n{}".format(''.join(results)))

Transform results: 
1
1
1
1
0
1
1
0
1
1
0
0
0
0
0
1
0
1
0
1
0
0
1
0
1
1
1
1
0
0
1
0
1
1
0
0
0
0
1
0
1
1
0
1
0
1
1
1
1
0
0
0
0
0
1
0
0
0
0
0
0

