In [None]:
import boto3
import numpy as np
from sklearn.metrics import confusion_matrix

from sagemaker.predictor import Predictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

sagemaker_session = boto3.Session().client("sagemaker")

predictor = Predictor(
    endpoint_name="pest-classifier-release"
)

print("Predictor")

def get_all_s3_objects(s3, **base_kwargs):
    continuation_token = None
    objects = []
    while True:
        list_kwargs = dict(MaxKeys=1000, **base_kwargs)
        if continuation_token:
            list_kwargs['ContinuationToken'] = continuation_token
        response = s3.list_objects_v2(**list_kwargs)
        if 'Contents' in response:
            objects += (response['Contents'])
        if not response.get('IsTruncated'):  # At the end of the list?
            break
        continuation_token = response.get('NextContinuationToken')
    print(str(base_kwargs['Bucket']) + " objects: " + str(objects))
    
    data = []
    for obj in objects:
        if obj['Key'].endswith('.jpg'):
            # Download the image data
            s3_object = s3.get_object(Bucket=base_kwargs['Bucket'], Key=obj['Key'])
            image_bytes = s3_object['Body'].read()
            data.append({
                'key': obj['Key'],
                'data': image_bytes
            })
    return data



# Get a list of all the S3 objects that contain the test data images' URIs
s3 = boto3.client('s3')
bucket_name = "flylite-model-testing-pictures" 
s3_objects = get_all_s3_objects(s3, Bucket=bucket_name, Prefix="gbm")
s3_objects1 = get_all_s3_objects(s3, Bucket=bucket_name, Prefix="neither")
s3_objects2 = get_all_s3_objects(s3, Bucket=bucket_name, Prefix="slf")

print(s3_objects)




In [None]:
# Iterate over the list of S3 objects and extract the URI of each image
data = []
for obj in s3_objects:
    if obj['Key'].endswith('.jpg'):
        # Download the image data
        s3_object = s3.get_object(Bucket=obj['Bucket'], Key=obj['Key'])
        image_bytes = s3_object['Body'].read()
        data.append({
            'key': obj['Key'],
            'data': image_bytes
        })
        
for obj in s3_objects1:
    if obj['Key'].endswith('.jpg'):
        # Download the image data
        s3_object = s3.get_object(Bucket=obj['Bucket'], Key=obj['Key'])
        image_bytes = s3_object['Body'].read()
        data.append({
            'key': obj['Key'],
            'data': image_bytes
        })
                        
for obj in s3_objects2:
    if obj['Key'].endswith('.jpg'):
        # Download the image data
        s3_object = s3.get_object(Bucket=obj['Bucket'], Key=obj['Key'])
        image_bytes = s3_object['Body'].read()
        data.append({
            'key': obj['Key'],
            'data': image_bytes
        })

# Predict the label of each image and store the predicted label for each image in a list
predicted_labels = []
for item in data:
    # Predict the label of the image
    predicted_label_one_hot = predictor.predict({"image_bytes": item['data']})["predictions"][0]
    predicted_label = np.argmax(predicted_label_one_hot)
    predicted_label = np.array(["gbm", "neither", "slf"])[predicted_label]
    predicted_labels.append(predicted_label)


# Convert the true labels from one-hot encoding to class labels
true_labels = np.argmax(test_labels, axis=1)
true_labels = np.array(["gbm", "neither", "slf"])[true_labels]

# Create the confusion matrix
cm = confusion_matrix(true_labels, predicted_labels, labels=["gbm", "slf", "neither"])

# Calculate the metrics
accuracy = np.trace(cm) / float(np.sum(cm))
precision = np.diagonal(cm) / np.sum(cm, axis=0)
recall = np.diagonal(cm) / np.sum(cm, axis=1)
f1_score = 2 * precision * recall / (precision + recall)

# Print the metrics
print("Accuracy: {:.2f}%".format(accuracy * 100))
print("Precision: {}".format(precision))
print("Recall: {}".format(recall))
print("F1 score: {}".format(f1_score))