In [1]:
import pandas as pd
from time import time
import sagemaker
from sagemaker import get_execution_role
import json
import boto3

## Initializations

In [2]:
sess = sagemaker.Session()
role = get_execution_role()
bucket = 'sagemaker-churns-prediction'
region_name = boto3.Session().region_name
account = sess.boto_session.client('sts').get_caller_identity()['Account']
image = '{}.dkr.ecr.{}.amazonaws.com/sagemaker-random-forest:latest'.format(account, region_name)
print(f'Account: {account}')
print(f'Role: {role}')
print(f'Image: {image}')

Account: 254464376720
Role: arn:aws:iam::254464376720:role/service-role/AmazonSageMaker-ExecutionRole-20200522T014344
Image: 254464376720.dkr.ecr.us-east-1.amazonaws.com/sagemaker-random-forest:latest


## Batch Transform

In [3]:
transform_output_folder = "batch-transform-output"
output_path="s3://{}/{}".format(bucket, transform_output_folder)

model_path = "s3://sagemaker-churns-prediction/output/sagemaker-random-forest-2020-06-02-21-36-46-091/output/model.tar.gz"
model = sagemaker.model.Model(
    model_data=model_path,
    image=image,  # example path for the semantic segmentation in eu-west-1
    role=role)  # your role here; could be different name

In [4]:
transformer = model.transformer(instance_count=1,
                               instance_type='ml.m4.xlarge',
                               output_path=output_path,
                               assemble_with='Line',
                               accept='text/csv')

In [5]:
test_data_location = 's3://{}/{}'.format(bucket, 'input_data/churn_test.csv')
test_data_location

's3://sagemaker-churns-prediction/input_data/churn_test.csv'

In [6]:
transformer.transform(test_data_location, content_type='text/csv', split_type='Line')

In [7]:
transformer.wait()

.......................[34mStarting the inference server with 4 workers.[0m
[34m[2020-06-02 21:48:04 +0000] [11] [INFO] Starting gunicorn 19.10.0[0m
[34m[2020-06-02 21:48:04 +0000] [11] [INFO] Listening at: unix:/tmp/gunicorn.sock (11)[0m
[34m[2020-06-02 21:48:04 +0000] [11] [INFO] Using worker: gevent[0m
[34m[2020-06-02 21:48:04 +0000] [16] [INFO] Booting worker with pid: 16[0m
[34m[2020-06-02 21:48:04 +0000] [17] [INFO] Booting worker with pid: 17[0m
[34m[2020-06-02 21:48:04 +0000] [18] [INFO] Booting worker with pid: 18[0m
[34m[2020-06-02 21:48:04 +0000] [19] [INFO] Booting worker with pid: 19[0m
[34m169.254.255.130 - - [02/Jun/2020:21:48:37 +0000] "GET /ping HTTP/1.1" 200 1 "-" "Go-http-client/1.1"[0m
[34m169.254.255.130 - - [02/Jun/2020:21:48:37 +0000] "GET /execution-parameters HTTP/1.1" 404 2 "-" "Go-http-client/1.1"[0m
[34mInvoked with 10000 records[0m
[34m[2.48 5.0 5.0 1.0 0.0 2 True 100.0 0 0 1 1 0][0m
[34m169.254.255.130 - - [02/Jun/2020:21:48:37 +00

## View Output

In [10]:
s3_client = sess.boto_session.client('s3')
s3_client.download_file(bucket, "{}/churn_test.csv.out".format(transform_output_folder), '/tmp/churn_test.csv.out')
with open('/tmp/churn_test.csv.out') as f:
    results = f.readlines()   
prediction_result = pd.DataFrame({'Transform results': list(map(str.strip, results))})
prediction_result.head(10)

Unnamed: 0,Transform results
0,False
1,False
2,False
3,False
4,False
5,False
6,True
7,False
8,False
9,True
