# Baseline LightGBM Evaluation

## Import libraries

In [1]:
import json
import boto3
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

## Load Train Data

In [2]:
# Download the data from s3 buckets
s3 = boto3.client("s3")
data_bucket = 'ads-508-group-6-final'

data_path = "data.csv"

# downloading the test data from data_bucket
s3.download_file(data_bucket, 'churn_model_data/train/data.csv', data_path)

In [3]:
newline, bold, unbold = '\n', '\033[1m', '\033[0m'

# read the data
test_data = pd.read_csv(data_path, header=None)
test_data.columns = ['Target'] + [f"Feature_{i}" for i in range(1, test_data.shape[1])]

num_examples, num_columns = test_data.shape
print(f"{bold}The test dataset contains {num_examples} examples and {num_columns} columns.{unbold}\n")

# prepare the ground truth target and predicting features to send into the endpoint.
ground_truth_label, features = test_data.iloc[:, :1], test_data.iloc[:, 1:]

print(f"{bold}The first 5 observations of the data: {unbold} \n")
test_data.head(5)

[1mThe test dataset contains 110673 examples and 1093 columns.[0m

[1mThe first 5 observations of the data: [0m 



Unnamed: 0,Target,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,...,Feature_1083,Feature_1084,Feature_1085,Feature_1086,Feature_1087,Feature_1088,Feature_1089,Feature_1090,Feature_1091,Feature_1092
0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,1083.0,1084.0,1085.0,1086.0,1087.0,1088.0,1089.0,1090.0,1091.0,1092.0
1,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.458333,0.0,0.10106,0.367112,-0.22254,-0.333333,-0.313837,0.019701,0.0,0.45
2,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.291667,-0.297297,-0.390507,-1.044013,-0.440974,-0.333333,-0.343278,-0.157604,0.0,1.0
3,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.333333,-0.432432,-0.385317,0.477752,10.702828,0.166667,0.304122,0.409771,5.254663,0.4
4,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.458333,5.594595,5.053682,-0.206079,4.261801,5.166667,4.226595,-0.144095,4.060302,-0.425


## Quering Endpoint

In [4]:
content_type = "text/csv"
def query_endpoint(encoded_tabular_data):
    endpoint_name = 'jumpstart-ftc-lgb-classification-model'
    client = boto3.client('runtime.sagemaker')
    response = client.invoke_endpoint(EndpointName=endpoint_name, ContentType=content_type, Body=encoded_tabular_data)
    return response

def parse_response(query_response):
    model_predictions = json.loads(query_response["Body"].read())
    predicted_probabilities = np.array(model_predictions['probabilities'])
    return predicted_probabilities

# split the test data into smaller size of batches to query the endpoint due to the large size of test data. 
batch_size = 1500
predict_prob = []
for i in np.arange(0, num_examples, step=batch_size):
    try:
        query_response_batch = query_endpoint(features.iloc[i:(i+batch_size), :].to_csv(header=False, index=False).encode("utf-8"))
    except Exception as e:
        if e.response is not None and 'Error' in e.response and e.response.get('Error').get('Code') == 'ModelError':
            raise Exception(
                 "Backend scripts have been updated in February '22 to standardize response "
                 "format of endpoint response."
                 "Previous endpoints may not support verbose response type used in this notebook."
                 f"To use this notebook, please launch the endpoint again. Error: {e}."
            )
        else:
            raise
    try:
        predict_prob_batch = parse_response(query_response_batch)  # prediction probability per batch
    except (TypeError, KeyError) as e:
        raise Exception(
              "Backend scripts have been updated in February '22 to standardize response "
              "format of endpoint response."
               "Response from previous endpoints may not be consistent with this notebook."
               f"To use this notebook, please launch the endpoint again. Error: {e}."
       )
    predict_prob.append(predict_prob_batch)
    

predict_prob = np.concatenate(predict_prob, axis=0)
predict_label = np.argmax(predict_prob, axis=1)

Exception: Backend scripts have been updated in February '22 to standardize response format of endpoint response.Previous endpoints may not support verbose response type used in this notebook.To use this notebook, please launch the endpoint again. Error: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received client error (413) from primary and could not load the entire response body. See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/jumpstart-ftc-lgb-classification-model in account 765790021581 for more information..

## So this is either due to Amazon not updating associated scripts due to update or dimensionality of training array. Regardless, this leaves us to implement XGBoost.

## Evaluating Train Data

In [None]:
# Visualize the predictions results by plotting the confusion matrix.
conf_matrix = confusion_matrix(y_true=ground_truth_label.values, y_pred=predict_label)
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')
 
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
# Measure the prediction results quantitatively.
eval_accuracy = accuracy_score(ground_truth_label.values, predict_label)
eval_f1_macro = f1_score(ground_truth_label.values, predict_label, average="macro")
eval_f1_micro = f1_score(ground_truth_label.values, predict_label, average="micro")

print (
    f"{bold}Evaluation result on test data{unbold}:{newline}"
    f"{bold}{accuracy_score.__name__}{unbold}: {eval_accuracy}{newline}"
    f"{bold}F1 Macro{unbold}: {eval_f1_macro}{newline}"
    f"{bold}F1 Micro{unbold}: {eval_f1_micro}{newline}"
)

## Load Validation Data

In [None]:
# Download the data from s3 buckets
s3 = boto3.client("s3")
data_bucket = 'ads-508-group-6-final'

data_path = "data.csv"

# downloading the test data from data_bucket
s3.download_file(data_bucket, 'churn_model_data/validation/data.csv', data_path)

In [None]:
newline, bold, unbold = '\n', '\033[1m', '\033[0m'

# read the data
test_data = pd.read_csv(data_path, header=None)
test_data.columns = ['Target'] + [f"Feature_{i}" for i in range(1, test_data.shape[1])]

num_examples, num_columns = test_data.shape
print(f"{bold}The test dataset contains {num_examples} examples and {num_columns} columns.{unbold}\n")

# prepare the ground truth target and predicting features to send into the endpoint.
ground_truth_label, features = test_data.iloc[:, :1], test_data.iloc[:, 1:]

print(f"{bold}The first 5 observations of the data: {unbold} \n")
test_data.head(5)

## Quering Endpoint

In [None]:
content_type = "text/csv"
def query_endpoint(encoded_tabular_data):
    endpoint_name = 'jumpstart-ftc-lgb-classification-model'
    client = boto3.client('runtime.sagemaker')
    response = client.invoke_endpoint(EndpointName=endpoint_name, ContentType=content_type, Body=encoded_tabular_data)
    return response

def parse_response(query_response):
    model_predictions = json.loads(query_response["Body"].read())
    predicted_probabilities = np.array(model_predictions['probabilities'])
    return predicted_probabilities

# split the test data into smaller size of batches to query the endpoint due to the large size of test data. 
batch_size = 1500
predict_prob = []
for i in np.arange(0, num_examples, step=batch_size):
    try:
        query_response_batch = query_endpoint(features.iloc[i:(i+batch_size), :].to_csv(header=False, index=False).encode("utf-8"))
    except Exception as e:
        if e.response is not None and 'Error' in e.response and e.response.get('Error').get('Code') == 'ModelError':
            raise Exception(
                 "Backend scripts have been updated in February '22 to standardize response "
                 "format of endpoint response."
                 "Previous endpoints may not support verbose response type used in this notebook."
                 f"To use this notebook, please launch the endpoint again. Error: {e}."
            )
        else:
            raise
    try:
        predict_prob_batch = parse_response(query_response_batch)  # prediction probability per batch
    except (TypeError, KeyError) as e:
        raise Exception(
              "Backend scripts have been updated in February '22 to standardize response "
              "format of endpoint response."
               "Response from previous endpoints may not be consistent with this notebook."
               f"To use this notebook, please launch the endpoint again. Error: {e}."
       )
    predict_prob.append(predict_prob_batch)
    

predict_prob = np.concatenate(predict_prob, axis=0)
predict_label = np.argmax(predict_prob, axis=1)

## Evaluating Validation Data

In [None]:
# Visualize the predictions results by plotting the confusion matrix.
conf_matrix = confusion_matrix(y_true=ground_truth_label.values, y_pred=predict_label)
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')
 
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
# Measure the prediction results quantitatively.
eval_accuracy = accuracy_score(ground_truth_label.values, predict_label)
eval_f1_macro = f1_score(ground_truth_label.values, predict_label, average="macro")
eval_f1_micro = f1_score(ground_truth_label.values, predict_label, average="micro")

print (
    f"{bold}Evaluation result on test data{unbold}:{newline}"
    f"{bold}{accuracy_score.__name__}{unbold}: {eval_accuracy}{newline}"
    f"{bold}F1 Macro{unbold}: {eval_f1_macro}{newline}"
    f"{bold}F1 Micro{unbold}: {eval_f1_micro}{newline}"
)

## Load Test Data

In [None]:
# Download the data from s3 buckets
s3 = boto3.client("s3")
data_bucket = 'ads-508-group-6-final'

data_path = "data.csv"

# downloading the test data from data_bucket
s3.download_file(data_bucket, 'churn_model_data/test/data.csv', data_path)

In [None]:
newline, bold, unbold = '\n', '\033[1m', '\033[0m'

# read the data
test_data = pd.read_csv(data_path, header=None)
test_data.columns = ['Target'] + [f"Feature_{i}" for i in range(1, test_data.shape[1])]

num_examples, num_columns = test_data.shape
print(f"{bold}The test dataset contains {num_examples} examples and {num_columns} columns.{unbold}\n")

# prepare the ground truth target and predicting features to send into the endpoint.
ground_truth_label, features = test_data.iloc[:, :1], test_data.iloc[:, 1:]

print(f"{bold}The first 5 observations of the data: {unbold} \n")
test_data.head(5)

## Quering Endpoint

In [None]:
content_type = "text/csv"
def query_endpoint(encoded_tabular_data):
    endpoint_name = 'jumpstart-ftc-lgb-classification-model'
    client = boto3.client('runtime.sagemaker')
    response = client.invoke_endpoint(EndpointName=endpoint_name, ContentType=content_type, Body=encoded_tabular_data)
    return response

def parse_response(query_response):
    model_predictions = json.loads(query_response["Body"].read())
    predicted_probabilities = np.array(model_predictions['probabilities'])
    return predicted_probabilities

# split the test data into smaller size of batches to query the endpoint due to the large size of test data. 
batch_size = 1500
predict_prob = []
for i in np.arange(0, num_examples, step=batch_size):
    try:
        query_response_batch = query_endpoint(features.iloc[i:(i+batch_size), :].to_csv(header=False, index=False).encode("utf-8"))
    except Exception as e:
        if e.response is not None and 'Error' in e.response and e.response.get('Error').get('Code') == 'ModelError':
            raise Exception(
                 "Backend scripts have been updated in February '22 to standardize response "
                 "format of endpoint response."
                 "Previous endpoints may not support verbose response type used in this notebook."
                 f"To use this notebook, please launch the endpoint again. Error: {e}."
            )
        else:
            raise
    try:
        predict_prob_batch = parse_response(query_response_batch)  # prediction probability per batch
    except (TypeError, KeyError) as e:
        raise Exception(
              "Backend scripts have been updated in February '22 to standardize response "
              "format of endpoint response."
               "Response from previous endpoints may not be consistent with this notebook."
               f"To use this notebook, please launch the endpoint again. Error: {e}."
       )
    predict_prob.append(predict_prob_batch)
    

predict_prob = np.concatenate(predict_prob, axis=0)
predict_label = np.argmax(predict_prob, axis=1)

## Evaluating Test Data

In [None]:
# Visualize the predictions results by plotting the confusion matrix.
conf_matrix = confusion_matrix(y_true=ground_truth_label.values, y_pred=predict_label)
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')
 
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()

In [None]:
# Measure the prediction results quantitatively.
eval_accuracy = accuracy_score(ground_truth_label.values, predict_label)
eval_f1_macro = f1_score(ground_truth_label.values, predict_label, average="macro")
eval_f1_micro = f1_score(ground_truth_label.values, predict_label, average="micro")

print (
    f"{bold}Evaluation result on test data{unbold}:{newline}"
    f"{bold}{accuracy_score.__name__}{unbold}: {eval_accuracy}{newline}"
    f"{bold}F1 Macro{unbold}: {eval_f1_macro}{newline}"
    f"{bold}F1 Micro{unbold}: {eval_f1_micro}{newline}"
)