#### Imports 

In [1]:
from sagemaker import get_execution_role
from time import gmtime, strftime
import pandas as pd
import sagemaker
import boto3
import time

#### Essentials

In [2]:
bucket = sagemaker.Session().default_bucket()
prefix = 'loan-default-prediction'
region = 'us-east-1'

In [3]:
batch_input = f's3://{bucket}/{prefix}/batch_input/'
batch_input

's3://sagemaker-us-east-1-892313895307/loan-default-prediction/batch_input/'

In [4]:
batch_output = f's3://{bucket}/{prefix}/batch_output/'
batch_output

's3://sagemaker-us-east-1-892313895307/loan-default-prediction/batch_output/'

In [5]:
current_timestamp = strftime('%Y-%m-%d-%H-%M-%S', gmtime())

In [6]:
automl_job_name = 'LOAN-DEFAULT-PREDICTION-3'  # Copy this from the console
model_name = f'autopilot-best-model-{current_timestamp}'
transform_job_name = f'autopilot-batch-job-{current_timestamp}'

In [7]:
session = boto3.Session()
sagemaker_execution_role = get_execution_role()
sagemaker_session = sagemaker.session.Session()
sagemaker_client = boto3.client('sagemaker', region_name=region)
s3_client = boto3.client('s3')

Couldn't call 'get_role' to get Role ARN from role name AmazonSageMaker-ExecutionRole-20210527T121473 to get Role path.
Assuming role was created in SageMaker AWS console, as the name contains `AmazonSageMaker-ExecutionRole`. Defaulting to Role ARN with service-role in path. If this Role ARN is incorrect, please add IAM read permissions to your role or supply the Role Arn directly.


#### Copy batch input data from local to S3

In [8]:
!aws s3 cp ./data/train/loans_unlabeled.csv {batch_input}

upload: data/train/loans_unlabeled.csv to s3://sagemaker-us-east-1-892313895307/loan-default-prediction/batch_input/loans_unlabeled.csv


#### Get the best model using Autopilot job name

In [9]:
best_candidate = sagemaker_client.describe_auto_ml_job(AutoMLJobName=automl_job_name)["BestCandidate"]
best_candidate_name = best_candidate["CandidateName"]
print(f"CandidateName: {best_candidate_name}")
print(f'FinalAutoMLJobObjectiveMetricName: {best_candidate["FinalAutoMLJobObjectiveMetric"]["MetricName"]}')
print(f'FinalAutoMLJobObjectiveMetricValue: {best_candidate["FinalAutoMLJobObjectiveMetric"]["Value"]}')

CandidateName: tuning-job-1-fe12f12c5a7642a59c-199-88a01bbf
FinalAutoMLJobObjectiveMetricName: validation:f1
FinalAutoMLJobObjectiveMetricValue: 0.6585800051689148


In [10]:
model = sagemaker_client.create_model(Containers=best_candidate["InferenceContainers"], 
                                      ModelName=model_name, 
                                      ExecutionRoleArn=sagemaker_execution_role)

print(f'Model ARN corresponding to the best candidate is : {model["ModelArn"]}')

Model ARN corresponding to the best candidate is : arn:aws:sagemaker:us-east-1:892313895307:model/autopilot-best-model-2021-06-28-19-59-36


#### Create Batch Transform job 

In [11]:
transform_input = {
    "DataSource": {"S3DataSource": {"S3DataType": "S3Prefix", "S3Uri": batch_input}},
    "ContentType": "text/csv",
    "CompressionType": "None",
    "SplitType": "Line",
}

transform_output = {
    "S3OutputPath": batch_output,
}

transform_resources = {"InstanceType": "ml.m5.4xlarge", "InstanceCount": 1}

sagemaker_client.create_transform_job(
    TransformJobName=transform_job_name,
    ModelName=model_name,
    TransformInput=transform_input,
    TransformOutput=transform_output,
    TransformResources=transform_resources,
)

{'TransformJobArn': 'arn:aws:sagemaker:us-east-1:892313895307:transform-job/autopilot-batch-job-2021-06-28-19-59-36',
 'ResponseMetadata': {'RequestId': '405c4a3c-9a52-4374-ac6e-f6ded900e4c0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '405c4a3c-9a52-4374-ac6e-f6ded900e4c0',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '116',
   'date': 'Mon, 28 Jun 2021 19:59:49 GMT'},
  'RetryAttempts': 0}}

#### Check the status of the running job

In [14]:
print("[JobStatus]\n")


describe_response = sagemaker_client.describe_transform_job(TransformJobName=transform_job_name)
job_run_status = describe_response["TransformJobStatus"]
print(job_run_status)

while job_run_status not in ("Failed", "Completed", "Stopped"):
    describe_response = sagemaker_client.describe_transform_job(TransformJobName=transform_job_name)
    job_run_status = describe_response["TransformJobStatus"]
    print(job_run_status)
    time.sleep(30)

[JobStatus]

InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
InProgress
Completed


#### Download the output of the batch transform job from S3 to local

In [21]:
s3_output_key = f"{prefix}/batch_output/loans_unlabeled.csv.out"
local_inference_results_path = "./data/train/inference_results.csv"

s3 = boto3.resource("s3")


inference_results_bucket = s3.Bucket(sagemaker_session.default_bucket())

inference_results_bucket.download_file(s3_output_key, local_inference_results_path)

#### Inspect the results

In [23]:
data = pd.read_csv(local_inference_results_path, sep=";")
pd.set_option("display.max_rows", 10)  
data

Unnamed: 0,1
0,0
1,1
2,1
3,1
4,1
...,...
9994,1
9995,0
9996,0
9997,0
