This notebook is developed using the `Python 3 (Data Science)` kernel on an `ml.t3.medium` instance.

In [None]:
import sagemaker
import boto3

region = boto3.Session().region_name
session = sagemaker.Session()
role = sagemaker.get_execution_role()

bucket = session.default_bucket()
local_prefix='winequality'
prefix = f'sagemaker-studio-book/chapter08/{local_prefix}'

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

In [None]:
import pandas as pd
import numpy as np
from time import gmtime, strftime, sleep
import json
import io
from urllib.parse import urlparse
from sklearn.metrics import f1_score, precision_score, classification_report, confusion_matrix

In [None]:
from sagemaker import AutoML
automl = AutoML.attach(auto_ml_job_name='white-wine-predict-quality')

In [None]:
TOP_N_CANDIDATES = 3
candidates = automl.list_candidates(sort_by='FinalObjectiveMetricValue',
                                    sort_order='Descending',
                                    max_results=TOP_N_CANDIDATES)

for candidate in candidates:
    print("Candidate name: ", candidate['CandidateName'])
    print("Objective metric name: ", candidate['FinalAutoMLJobObjectiveMetric']['MetricName'])
    print("Objective metric value: ", candidate['FinalAutoMLJobObjectiveMetric']['Value'])
    print('\n')

In [None]:
test_data = pd.read_csv(f'{local_prefix}/winequality-white-test.csv')
test_file_basename = 'winequality-white-test-notarget.csv'
test_file =  f's3://{bucket}/{prefix}/{test_file_basename}'
target_attribute_name = 'quality' # name of target column (values to predict)

In [None]:
inference_response_keys = ['predicted_label']

In [None]:
s3_transform_output_path = 's3://{}/{}/inference-results/'.format(bucket, prefix);

transformers = []

for candidate in candidates:
    model = automl.create_model(name=candidate['CandidateName'],
                                candidate=candidate,
                                inference_response_keys=inference_response_keys)
    
    output_path = s3_transform_output_path + candidate['CandidateName'] +'/'
    
    transformers.append(
        model.transformer(instance_count=1, 
                          instance_type='ml.m5.xlarge',
                          assemble_with='Line',
                          output_path=output_path))

print("Setting up {} Batch Transform Jobs in `transformers`".format(len(transformers)))

In [None]:
for transformer in transformers:
    transformer.transform(data=test_file, split_type='Line', 
                          content_type='text/csv', wait=False)
    print("Starting transform job {}".format(transformer._current_job_name))

In [None]:
pending_complete = True

while pending_complete:
    pending_complete = False
    num_transform_jobs = len(transformers)
    for transformer in transformers:
        desc = sm.describe_transform_job(TransformJobName=transformer._current_job_name)
        if desc['TransformJobStatus'] not in ['Failed', 'Completed']:
            pending_complete = True
        else:
            num_transform_jobs -= 1
    print("{} out of {} transform jobs are running.".format(num_transform_jobs, len(transformers)))
    sleep(30)
    
for transformer in transformers:
    desc = sm.describe_transform_job(TransformJobName=transformer._current_job_name)
    print("Transform job '{}' finished with status {}".format(transformer._current_job_name, desc['TransformJobStatus']))

In [None]:
def get_csv_from_s3(s3uri, file_name):
    parsed_url = urlparse(s3uri)
    bucket_name = parsed_url.netloc
    prefix = parsed_url.path[1:].strip('/')
    s3 = boto3.resource('s3')
    obj = s3.Object(bucket_name, '{}/{}'.format(prefix, file_name))
    return obj.get()["Body"].read().decode('utf-8')

predictions = []

for transformer in transformers:
    print(transformer.output_path)
    pred_csv = get_csv_from_s3(transformer.output_path, 
                               '{}.out'.format(test_file_basename))
    predictions.append(pd.read_csv(io.StringIO(pred_csv), header=None))

In [None]:
labels = test_data[target_attribute_name]

In [None]:
for prediction, candidate in zip(predictions, candidates):
    print("Candidate name: ", candidate['CandidateName'])
    print("Objective metric name: ", candidate['FinalAutoMLJobObjectiveMetric']['MetricName'])
    print("Objective metric value: ", candidate['FinalAutoMLJobObjectiveMetric']['Value'])

    scores={}
    for avg in ['macro', 'weighted']:
        scores[avg] = [f1_score(labels, prediction, average=avg), 
                       precision_score(labels, prediction, average=avg)]
        print('f1 = %.2f, Precision = %.2f (%s)' % (scores[avg][0], scores[avg][1], avg))
    print(classification_report(labels, prediction))
    print(confusion_matrix(labels, prediction))
    print()