In [2]:
import boto3
import numpy as np
from sklearn.metrics import confusion_matrix

from sagemaker.predictor import Predictor
from sagemaker.session import Session

sagemaker_session = Session()

predictor = Predictor(
    endpoint_name="pest-classifier-release",
    sagemaker_session=sagemaker_session
)

def get_all_s3_objects(session, bucket):
    continuation_token = None
    objects = []
    while True:
        list_kwargs = dict(MaxKeys=1000, Bucket=bucket)
        if continuation_token:
            list_kwargs['ContinuationToken'] = continuation_token
        response = session.list_objects_v2(**list_kwargs)
        if 'Contents' in response:
            objects += (response['Contents'])
        if not response.get('IsTruncated'):  # At the end of the list?
            break
        continuation_token = response.get('NextContinuationToken')    
    
    return objects



# Get a list of all the S3 objects that contain the test data images' URIs
s3 = boto3.client('s3')
bucket_name = "flylite-model-testing-pictures" 
uris = get_all_s3_objects(s3, bucket_name);
print("Num of uris: " + str(len(uris)))


Num of uris: 274


In [3]:
print(uris[0])

{'Key': 'gbm/04074856-3AFC-4A7A-8EEE-3604FE1E430B_1_105_c.jpeg', 'LastModified': datetime.datetime(2023, 3, 20, 19, 41, 12, tzinfo=tzlocal()), 'ETag': '"c27b1ae841ce1969fc684e6a79ca4800"', 'Size': 223885, 'StorageClass': 'STANDARD'}


In [4]:
from ast import literal_eval
import os
from PIL import Image #Make sure to pip install Pillow
import io
import base64



def get_size_format(b, factor=1024, suffix="B"):
    """
    Scale bytes to its proper byte format
    e.g:
        1253656 => '1.20MB'
        1253656678 => '1.17GB'
    """
    for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
        if b < factor:
            return f"{b:.2f}{unit}{suffix}"
        b /= factor
    return f"{b:.2f}Y{suffix}"

def compress_img(image_data, new_size_ratio=0.9, quality=90, width=None, height=None, to_jpg=True):
    # load the image to memory
    
    img = Image.open(io.BytesIO(image_data))
    # print the original image shape
    print("[*] Image shape:", img.size)
    # get the original image size in bytes
    image_size = len(image_data)
    # print the size before compression/resizing
    print("[*] Size before compression:", get_size_format(image_size))
    if new_size_ratio < 1.0:
        # if resizing ratio is below 1.0, then multiply width & height with this ratio to reduce image size
        img = img.resize((int(img.size[0] * new_size_ratio), int(img.size[1] * new_size_ratio)), Image.ANTIALIAS)
        # print new image shape
        print("[+] New Image shape:", img.size)
    elif width and height:
        # if width and height are set, resize with them instead
        img = img.resize((width, height), Image.ANTIALIAS)
        # print new image shape
        print("[+] New w/h Image shape:", img.size)
    # split the filename and extension
    filename, ext = os.path.splitext("test.jpg")
    # make new filename appending _compressed to the original file name
    #if to_jpg:
        # change the extension to JPEG
    #    new_filename = f"{filename}_compressed.jpg"
    #else:
        # retain the same extension of the original image
    #    new_filename = f"{filename}_compressed{ext}"
    #try:
        # save the image with the corresponding quality and optimize set to True
    #    img.save(new_filename, quality=quality, optimize=True)
    #except OSError:
        # convert the image to RGB mode first
    #    img = img.convert("RGB")
        # save the image with the corresponding quality and optimize set to True
    #    img.save(new_filename, quality=quality, optimize=True)
    #print("[+] New file saved:", new_filename)
    # get the new image size in bytes
    #new_image_size = os.path.getsize(new_filename)
    
    
    imgByteArr = io.BytesIO()
    # image.save expects a file-like as a argument
    img.save(imgByteArr, format='JPEG')
    # Turn the BytesIO object back into a bytes object
    imgByteArr = imgByteArr.getvalue()
    
    new_image_size = len(imgByteArr)
    # print the new size in a good format
    print("[+] Size after compression:", get_size_format(new_image_size))
    # calculate the saving bytes
    saving_diff = new_image_size - image_size
    # print the saving percentage
    #print(f"[+] Image size change: {saving_diff/image_size*100:.2f}% of the original image size.")
    print("Output type: " + str(type(img.tobytes())))
    
    img.close()
    
    return imgByteArr
    

classes = {}
for uri in uris:
    label = uri['Key'][0:(uri['Key'].find('/'))]
    if label not in classes.keys():
        classes[label] = {'TP': 0, 'FP': 0, 'FN': 0}
        print("Added label " + label)
        
for uri in uris:
    label = uri['Key'][0:(uri['Key'].find('/'))]
    
    s3_object = s3.get_object(Bucket=bucket_name, Key=uri['Key'])['Body'].read()
    #print(uri['Key'])
    #print(type(s3_object))
    
    print("Predicting image " + str(uri['Key']) + " with size " + get_size_format(len(s3_object)))
    while len(s3_object) > 4000000:
        print("Compressing...")
        s3_object = compress_img(s3_object, new_size_ratio=.9)
    
    result = predictor.predict(s3_object, {"ContentType": "application/x-image", "Accept": "application/json;verbose"})
    prediction = literal_eval(result.decode('utf-8'))['predicted_label']
    
    #print("Prediction: " + prediction)
    
    if prediction == label:
        classes[label]['TP'] += 1
    else:
        classes[prediction]['FP'] += 1
        classes[label]['FN'] += 1

print(classes)
    

Added label gbm
Added label neither
Added label slf
Predicting image gbm/04074856-3AFC-4A7A-8EEE-3604FE1E430B_1_105_c.jpeg with size 218.64KB
Predicting image gbm/0678498C-0AC9-45C4-A316-043D3B6CB1E2_1_105_c.jpeg with size 191.63KB
Predicting image gbm/067B1DCB-68C1-4DC5-84C9-F512754ECB91_1_105_c.jpeg with size 224.74KB
Predicting image gbm/0887148D-1DBB-4A15-8BCD-BC1EE166CF62_1_105_c.jpeg with size 184.77KB
Predicting image gbm/08884276-D8A8-4389-8559-8AB057823E51_1_105_c.jpeg with size 204.27KB
Predicting image gbm/0A28FE9C-0CF6-4546-8105-F49C167C3848_1_105_c.jpeg with size 244.19KB
Predicting image gbm/0BADC8A7-085E-48D3-BB23-E5D8885394FE_1_105_c.jpeg with size 247.15KB
Predicting image gbm/0F94C399-6F42-4DBF-A9BA-91AB218A5BF9_1_105_c.jpeg with size 233.56KB
Predicting image gbm/12C37139-723B-4872-B133-28BC43C60061_1_105_c.jpeg with size 184.35KB
Predicting image gbm/174B61DD-D545-4851-B348-FBAC4BAF853E_1_105_c.jpeg with size 179.00KB
Predicting image gbm/19257DFE-CEBE-4205-BC43-8C0



[+] New Image shape: (3628, 2721)
[+] Size after compression: 661.34KB
Output type: <class 'bytes'>
Predicting image gbm/IMG_2520.jpg with size 4.69MB
Compressing...
[*] Image shape: (4032, 3024)
[*] Size before compression: 4.69MB
[+] New Image shape: (3628, 2721)
[+] Size after compression: 592.29KB
Output type: <class 'bytes'>
Predicting image gbm/IMG_2521.jpg with size 5.33MB
Compressing...
[*] Image shape: (4032, 3024)
[*] Size before compression: 5.33MB
[+] New Image shape: (3628, 2721)
[+] Size after compression: 834.50KB
Output type: <class 'bytes'>
Predicting image gbm/IMG_2522.jpg with size 5.91MB
Compressing...
[*] Image shape: (4032, 3024)
[*] Size before compression: 5.91MB
[+] New Image shape: (3628, 2721)
[+] Size after compression: 765.53KB
Output type: <class 'bytes'>
Predicting image gbm/IMG_2523.jpg with size 5.76MB
Compressing...
[*] Image shape: (4032, 3024)
[*] Size before compression: 5.76MB
[+] New Image shape: (3628, 2721)
[+] Size after compression: 777.43KB
O

In [10]:

# Precision = TP / ( TP + FP) for each class
def calc_precision(TP, FP):
    return TP / (TP + FP)

# Recall = TP / ( TP + FN ) for each class
def calc_recall(TP, FN):
    return TP / (TP+FN)

# F1-score = (2 * precision * recall) / ( precision + recall)
def calc_f1_score(precision, recall):
    return (2*precision*recall) / (precision + recall)



# Create the confusion matrix
#cm = confusion_matrix(true_labels, predicted_labels, labels=["gbm", "slf", "neither"])

# Calculate the metrics
#accuracy = np.trace(cm) / float(np.sum(cm))
#precision = np.diagonal(cm) / np.sum(cm, axis=0)
#recall = np.diagonal(cm) / np.sum(cm, axis=1)
#f1_score = 2 * precision * recall / (precision + recall)

for c in classes.keys():
    precision = calc_precision(classes[c]['TP'],classes[c]['FP'])
    recall = calc_precision(classes[c]['TP'],classes[c]['FN'])
    print(c + ":")
    print("Precision: " + str(precision))
    print("Recall: " + str(recall))
    print("FScore: " + str(calc_f1_score(precision, recall)))
    print()

# Print the metrics
#print("Accuracy: {:.2f}%".format(accuracy * 100))
#print("Precision: {}".format(precision))
#print("Recall: {}".format(recall))
#print("F1 score: {}".format(f1_score))

gbm:
Precision: 1.0
Recall: 1.0
FScore: 1.0

neither:
Precision: 1.0
Recall: 0.9928057553956835
FScore: 0.996389891696751

slf:
Precision: 0.975609756097561
Recall: 1.0
FScore: 0.9876543209876543

