In [None]:
s3_bucket = '<INSERT S3 BUCKET HERE>'

In [None]:
prefix = 'pipeline'

In [None]:
endpoint_name = "AutoGluonEndpoint"

In [None]:
from sagemaker.predictor import Predictor

import sagemaker

In [None]:
session = sagemaker.Session()

In [None]:
!mkdir -p tmp

In [None]:
test_data_path = f"s3://{s3_bucket}/{prefix}/output/test/data.csv"

In [None]:
!aws s3 cp {test_data_path} tmp/test_data.csv

In [None]:
import pandas as pd

test_df = pd.read_csv("tmp/test_data.csv", header=None)
test_df

In [None]:
test_df.rename(
    columns={ 
        test_df.columns[0]: "is_cancelled" 
    }, 
    inplace = True
)

In [None]:
test_df

In [None]:
predictor = Predictor(endpoint_name, session)

In [None]:
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

predictor.serializer = CSVSerializer()
predictor.deserializer = JSONDeserializer()

In [None]:
import csv

def get_test_payload(index, test_df=test_df):
    test_data = test_df.drop(['is_cancelled'], axis=1)
    target_record = test_data.iloc[index]
    predictor_values = target_record.to_csv(
        header=None,
        index=False,
        quotechar='"',
        quoting=csv.QUOTE_NONNUMERIC
    ).split()
    csv_string = ','.join(predictor_values)
    return csv_string

In [None]:
def get_test_actual_result(index, test_df=test_df):
    result = test_df.iloc[index]['is_cancelled']
    return result

In [None]:
def predict(index, predictor=predictor):
    payload = get_test_payload(index)
    prediction = predictor.predict([payload])
    print(prediction)
    [[prob_0, prob_1]] = prediction['probabilities']
    
    if prob_0 > prob_1:
        return 0
    else:
        return 1

In [None]:
predict(5)

In [None]:
from time import sleep

actual_list = []
predicted_list = []

for i in range(0, 100):
    actual = get_test_actual_result(i)
    predicted = predict(i)
    print(f"[iteration # {i}]")
    print(f"actual = {actual}; predicted = {predicted}")
    
    actual_list.append(actual)
    predicted_list.append(predicted)

    sleep(0.05)

In [None]:
from sklearn.metrics import classification_report

target_names = ['not cancelled', 'cancelled']
print(classification_report(actual_list, predicted_list, target_names=target_names))

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(actual_list, predicted_list)

In [None]:
predictor.delete_endpoint()