# Import

In [85]:
import csv
import os
import json
import posixpath
import requests
from PIL import Image

import pandas as pd
import numpy as np

import pprint
import boto3
import sagemaker

# Set Up

## Global Parameters

In [86]:
# Define Region
region = "us-east-1"

# Define Bucket
bucket = "dsba-6190-final-team-project"
key_level_1 = "imgs"
key_level_2 = "test"

# Define Endpoint
endpoint_name = 'image-classification-drivers-endpoint'

## AWS Functions

In [87]:
# Establish Boto3 Session
boto_sess = boto3.session.Session(region_name=region)

# Invoke SageMaker Runtime Client
runtime = boto_sess.client(service_name='runtime.sagemaker')

# Invoke S3 Client
s3_client = boto_sess.client(service_name='s3')

# Invoke S3 Resource
s3_res = boto_sess.resource('s3')

# Import Test Images
The test images are located in the project S3 Bucket, under with the key prefix **imgs/test**.

## Generate List of Images
First, we create a list of all objects in the project bucket, under prefix **imgs/test**. 

In [88]:
s3_list_obj_param = {
    "Bucket" : bucket,
    "Prefix" : posixpath.join(key_level_1, key_level_2)
}

response = s3_client.list_objects_v2(**s3_list_obj_param)

The initial call returns a large amount of metadata we are not interested in. So we strip it down to just the contents of the the list. The contents still encapsulate a dictionary of file name and additional metadata. So we further strip this down to only extract the file names, aka, the value associated with the key.

We also add a filter to ensure all the contents we are collecting are JPG images.

In [89]:
# Extract Contents from Item Search Response
test_imgs_contents = response['Contents']

# Initialize List of Test Images
test_imgs_list = []

# Append Keys for Test Images
for item in test_imgs_contents:
    for key, value in item.items():
        if key == 'Key' and 'jpg' in value:
            test_imgs_list.append(value)
            
# Inspect initial images in list to ensure proper execution            
print(test_imgs_list[:5])

['imgs/test/img_1.jpg', 'imgs/test/img_10.jpg', 'imgs/test/img_100.jpg', 'imgs/test/img_1000.jpg', 'imgs/test/img_100000.jpg']


In [90]:
file_name = "img_test/test_image.jpg"
s3_res.Bucket(bucket).download_file(test_imgs_list[5], file_name)

In [91]:
with open(file_name, 'rb') as f:
    payload = f.read()
    payload = bytearray(payload)

In [92]:
try: 
    response = runtime.invoke_endpoint(EndpointName=endpoint_name, 
                                       ContentType='application/x-image', 
                                       Body=payload)
    result = response['Body'].read()
    result = json.loads(result)
    pprint.pprint(result)

except:
        print("Error Raised. Check status of endpoint.")

[0.0015890575014054775,
 0.006669325288385153,
 0.0009492913377471268,
 0.8281391263008118,
 0.012725784443318844,
 0.0016796393319964409,
 0.09592464566230774,
 0.023216068744659424,
 0.028154902160167694,
 0.0009521319298073649]


# Test REST API
We want to test that our REST API is up and running. If we POST the REST API an image, we should recieve a JSON back, with a list of the probabilies of the image for each class.

First we define some of the variables.

In [93]:
host_url = "https://96gfw8ry96.execute-api.us-east-1.amazonaws.com"
page = "Test/predict"
headers = {
    "Content-Type": "image/jpg"
}

POST Inputs.

In [94]:
# Image to POST
data = open(file_name,'rb').read()

# POST Location
url = posixpath.join(host_url, page)

POST API

In [95]:
try:
    response = requests.post(url=url,
                             data=data, 
                             headers=headers)

    #Response Output
    result = json.loads(response.text)
    pprint.pprint(result)
    
except:
    print("Error Raised. Check status of REST API and SageMaker Endpoint.")

{'payload': [0.0015890575014054775,
             0.006669325288385153,
             0.0009492913377471268,
             0.8281391263008118,
             0.012725784443318844,
             0.0016796393319964409,
             0.09592464566230774,
             0.023216068744659424,
             0.028154902160167694,
             0.0009521319298073649]}


# Test Kaggle Images

In order to submit the kaggle competition, we're going to loop through the test images and return the probability JSON.

In [96]:
folder_1 = "imgs"
folder_2 = "kaggle"
folder_3 = "test"
up_dir = "../"

test_img_path = posixpath.join(up_dir, folder_1, folder_2, folder_3)

Create list of files

In [97]:
test_img_list = os.listdir(path=test_img_path)

Loop through the file list, generating image probabilities for each one.

In [98]:
dict_test_prob = {}

for image in test_img_list:
    image_path = posixpath.join(test_img_path,image)
    data = open(image_path,'rb').read()
    response = requests.post(url=url,
                             data=data, 
                             headers=headers)
    result = json.loads(response.text)
    dict_test_prob[image] = result['payload']

KeyboardInterrupt: 

Convert the dictionary of probabilities to a dataframe.

In [None]:
df_header = ['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']
df=pd.DataFrame.from_dict(dict_test_prob,orient='index')
df.index.name='img'
df.reset_index(inplace=True)
df.columns = df_header

Export dataframe to csv.

In [None]:
df.to_csv(path_or_buf="../kaggle_test_results/test_image_output.csv", index = False)