# Exploratory Data Science

Data exploration and understanding the task at hand is a fundamental step in the Machine Learning workflow.
In this notebook, we'll take an opportunity to explore the use case, data and models we'll be using.

We have been tasked with developing an application which can identify objects in static and live images. In this notebook we use a pre-trained machine learning model, and explore how it works on static photos. 

To begin, we install and import a range of python packages:

In [None]:
!pip install onnxruntime
!pip install huggingface_hub

In [None]:
from os import environ

import numpy as np
from onnxruntime import InferenceSession
from PIL import Image, ImageColor, ImageDraw, ImageFont, ImageOps
from huggingface_hub import hf_hub_download

from classes import classes

import json


print('Imported libraries')

## Import our image

In the next cell we import the image we want to test our model on.

In [None]:
sample_image = 'sample-images/AFRICAN CROWNED CRANE.jpg'
sample = Image.open(sample_image)
sample

This image shows a cat. We need to import the image as an array so the ONNX model we will use can process the image.

In [None]:
def transform(image):
    model_image_size = (260, 260)
    boxed_image = letterbox_image(image, tuple(reversed(model_image_size)))
    image_data = np.array(boxed_image, dtype='float32')
    image_data /= 255.
    image_data = np.transpose(image_data, [2, 0, 1])
    image_data = np.expand_dims(image_data, 0)
    return image_data


def letterbox_image(image, size):
    '''resize image with unchanged aspect ratio using padding'''
    iw, ih = image.size
    w, h = size
    scale = min(w/iw, h/ih)
    nw = int(iw*scale)
    nh = int(ih*scale)

    image = image.resize((nw, nh), Image.Resampling.BICUBIC)
    new_image = Image.new('RGB', size, (128, 128, 128))
    new_image.paste(image, ((w-nw)//2, (h-nh)//2))
    return new_image

In [None]:
converted_image = transform(sample)
converted_image

## Load in a model

In [None]:
def get_model_hf():
    REPO_ID = "dennisjooo/Birds-Classifier-EfficientNetB2"
    MODEL_FILENAME = "model.onnx"
    CONFIG_FILENAME = "config.json"

    model_location = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
    config_location = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)

    print(f'Downloaded model to {model_location}')
    print(f'Downloaded config to {config_location}')
    
    f = open(config_location)
    config = json.load(f)
    
    session = InferenceSession(model_location, providers=['CPUExecutionProvider'])
    
    return model_location, config_location

def get_model_s3(model_name="model.onnx", config_name="config.json"):
    import boto3
    
    s3_endpoint_url = environ.get('AWS_S3_ENDPOINT')
    s3_access_key = environ.get('AWS_ACCESS_KEY_ID')
    s3_secret_key = environ.get('AWS_SECRET_ACCESS_KEY')
    s3_bucket_name = environ.get('AWS_S3_BUCKET')

    print('Imported s3 library')

    s3 = boto3.client(
        's3', endpoint_url=s3_endpoint_url,
        aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_key,
    )
    s3.download_file(s3_bucket_name, model_name, model_name)
    s3.download_file(s3_bucket_name, config_name, config_name)
    print('Downloaded model.')
    
    f = open(config_name)
    config = json.load(f)
    
    session = InferenceSession('model.onnx')
    
    return model_name, config_name

In [None]:
# REPO_ID = "dennisjooo/Birds-Classifier-EfficientNetB2"
# MODEL_FILENAME = "model.onnx"
# CONFIG_FILENAME = "config.json"

In [None]:
# model_location = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
# config_location = hf_hub_download(repo_id=REPO_ID, filename=CONFIG_FILENAME)

# print(f'Downloaded model to {model_location}')
# print(f'Downloaded config to {config_location}')

In [None]:
# f = open(config_location)
# config = json.load(f)

You should be able to see that this file has been added to your file directory on the left hand side of the screen.

Let's now use the model to run object detection on our sample image.

In [None]:
model_location, config_location = get_model_hf()
# model_location, config_location = get_model_s3(model_name="model-latest.onnx", config_name="config-latest.json")

In [None]:
f = open(config_location)
config = json.load(f)
f.close()

In [None]:
session = InferenceSession(model_location, providers=['CPUExecutionProvider'])
raw_result = session.run(
    [], {'pixel_values': converted_image,}
)
raw_result

In [None]:
def postprocess(raw_result):
    scores, detected_classes = [], []
    for sample in raw_result:
        ind = np.argpartition(sample, -min(sample.shape[0], 4))[-4:]
        ind = ind[np.argsort(-sample[ind])]
        if (sample[ind]>0).any():
            ind = ind[sample[ind]>0]
        else:
            sample = sample - sample.min()
        
        detected_classes.append([config["id2label"][str(i)] for i in ind])
        scores.append(sample[ind]/(sample[ind].sum()))
    
    return scores, detected_classes

In [None]:
result = postprocess(*raw_result)
result

The model has returned arrays, each of which holds information about the detected objects. The information includes identifiers for the types of objects, coordinates locating the objects within the image, and detection scores, corresponding to how certain the model is about its prediction.

We can use a few functions to help us to superimpose the information in this dictionary onto the original image.

In [None]:
def draw_result(image, scores, classes):
    image_pil = Image.open(image)
    
    image_width = image_pil.width
    
    image_pil = ImageOps.expand(image_pil, border=(0,0,200,0), fill=(255,255,255))
    draw = ImageDraw.Draw(image_pil)

    text = ""
    for i in range(len(scores[0])):
        text += f"{classes[0][i]} | {round(scores[0][i]*100,1)}%\n"
    font = ImageFont.load_default()
    draw.text((image_width+5,5),text,(0,0,0),font=font)
    return image_pil    

In [None]:
draw_result(sample_image, *result)

Fantastic! So you've seen how we can use a pre-trained model to identify objects in images. In the next notebooks, we will deploy this model using RHODS Model Serving, which allows us to use it as part of a larger application.

## Upload model to S3

In [None]:
import boto3

s3_endpoint_url = environ.get('AWS_S3_ENDPOINT')
s3_access_key = environ.get('AWS_ACCESS_KEY_ID')
s3_secret_key = environ.get('AWS_SECRET_ACCESS_KEY')

print('Imported s3 library')

In [None]:
s3_bucket_name = 'model'

In [None]:
s3 = boto3.client(
    's3', endpoint_url=s3_endpoint_url,
    aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_key,
)
s3.upload_file(model_location, s3_bucket_name, 'model.onnx')
s3.upload_file(config_location, s3_bucket_name, 'config.json')