### Preprocessing Inputs of Pre-trained Models

In [14]:
# OpenCV read in BGR format, the loaded images are similar to a numpy array

import cv2
import numpy as np
import urllib

POSE_IMAGE = cv2.imread('images/img1.jpg')

# TODO: shape -> [1x3x256x456] - format [BxCxHxW]
def pose_estimation(input_image):
    image = np.copy(input_image)
    dim = (456, 256) 
    print("original shape {}".format(image.shape))
    image = cv2.resize(image, dim)
    image = image.transpose(2, 0, 1)
    print('preprocessing shape image {}'.format(image.shape))
    image = image.reshape(1, 3, 256, 456)
    return image
    
def text_detection(input_image):
    '''
    using cv2.resize()
    '''
    preprocessed_image = np.copy(input_image)

    preprocessed_image = cv2.resize(preprocessed_image, (1280, 768))
    preprocessed_image = preprocessed_image.transpose((2,0,1))
    preprocessed_image = preprocessed_image.reshape(1, 3, 768, 1280)
    return preprocessed_image
    
def main():
    #choose model
    img = pose_estimation(POSE_IMAGE)
    
if __name__ == "__main__":
    main()


original shape (513, 693, 3)
preprocessing shape image (3, 256, 456)


In [None]:
# there are 2 types of datasets: https://software.intel.com/en-us/openvino-toolkit/documentation/pretrained-models
# Public Model Set = must be run through the Model Optimizer, have a original models to train an fine-tuning
# Free Model Set = converted to Intermediate Rep formate, don not have original model, to get with Model Downloader

################### Using Model Downloader tool ######################
# Human Pose Estimation: All precision levels

#cd /opt/intel/openvino/deployment_tools/tools/model_downloader
#sudo ./downloader.py —name vehicle-attributes-recognition-barrier-0039 —precisions INT8 -o /home/workspace
# sudo ./downloader.py --name text-detection-0004 --precisions FP16 -o /home/workspace

In [None]:
def handle_pose(output, input_shape):
    '''
    Handles the output of the Pose Estimation model.
    Returns ONLY the keypoint heatmaps, and not the Part Affinity Fields.
    '''
    # TODO 1: Extract only the second blob output (keypoint heatmaps)
    heatmaps = output['Mconv7_stage2_L2']
    # TODO 2: Resize the heatmap back to the size of the input
    # Create an empty array to handle the output map
    out_heatmap = np.zeros([heatmaps.shape[1], input_shape[0], input_shape[1]])
    # Iterate through and re-size each heatmap
    for h in range(len(heatmaps[0])):
        out_heatmap[h] = cv2.resize(heatmaps[0][h], input_shape[0:2][::-1])

    return out_heatmap


def handle_text(output, input_shape):
    '''
    Handles the output of the Text Detection model.
    Returns ONLY the text/no text classification of each pixel,
        and not the linkage between pixels and their neighbors.
    '''
    # TODO 1: Extract only the first blob output (text/no text classification)
    text_classes = output['model/segm_logits/add']
    # TODO 2: Resize this output back to the size of the input
    out_text = np.empty([text_classes.shape[1], input_shape[0], input_shape[1]])
    for t in range(len(text_classes[0])):
        out_text[t] = cv2.resize(text_classes[0][t], input_shape[0:2][::-1])

    return out_text


def handle_car(output, input_shape):
    '''
    Handles the output of the Car Metadata model.
    Returns two integers: the argmax of each softmax output.
    The first is for color, and the second for type.
    '''
    # Get rid of unnecessary dimensions
    color = output['color'].flatten()
    car_type = output['type'].flatten()
    # TODO 1: Get the argmax of the "color" output
    color_pred = np.argmax(color)
    # TODO 2: Get the argmax of the "type" output
    type_pred = np.argmax(car_type)

    return color_pred, type_pred


def handle_output(model_type):
    '''
    Returns the related function to handle an output,
        based on the model_type being used.
    '''
    if model_type == "POSE":
        return handle_pose
    elif model_type == "TEXT":
        return handle_text
    elif model_type == "CAR_META":
        return handle_car
    else:
        return None
