# Cloud APIs for Computer Vision: Up and Running in 15 Minutes

This code is part of [Chapter 8- Cloud APIs for Computer Vision: Up and Running in 15 Minutes ](https://learning.oreilly.com/library/view/practical-deep-learning/9781492034858/ch08.html).

## Compile Results for OCR

In this file we will compile the results using the ground truth and the collected data for all the test images. 

Please update:

- `PATH_TO_DATA_DIR`: Add the absolute path to the `data` directory (example: `data-may-2020`).

In [1]:
data_path = "<PATH_TO_DATA_DIR>"

In [2]:
import json

# Load Ground truth image ids
with open(data_path + "val-image-ids-final.csv") as f:
    image_ids = f.readlines()

# Load Ground truth text
with open(data_path + "ground-truth.json") as f:
    ground_truth = json.load(f)

In [3]:
# Load Google OCR json file

with open(data_path + "google-ocr-jsondump.json") as f:
    google_results = json.load(f)

In [4]:
# Load Microsoft OCR json file

with open(data_path + "msft-ocr-jsondump.json") as f:
    microsoft_results = json.load(f)

In [5]:
# Load Amazon OCR json file

with open(data_path + "amazon-ocr-jsondump.json") as f:
    amazon_results = json.load(f)

In [6]:
# Helper functions to get image name from image id and converse.


def get_id_from_name(name):
    return int(name.split("/")[-1].split(".jpg")[0])


def get_name_from_id(image_id):
    filename = (
        data_path + "legible-images/COCO_train2014_" + "000000" + str(image_id) + ".jpg"
    )
    return filename

### Parsing

Each cloud provider sends the results in slightly different formats and we need to parse each of them correctly. So, we will develop a parsing function unique to each cloud provider.

#### Ground Truth Parsing

In [7]:
def parse(l):
    l1 = []
    for each in l:
        if len(each) >= 2:
            l1.append(each.lower())
    return l1

#### Parsing Google Results

In [8]:
def parse_google_response(l):
    l1 = []
    for each in l:
        l1.append(each.lower())
        k = each.lower()
        k1 = k.split()
        l1.extend(k1)
    l1 = list(set(l1))
    return l1

#### Parsing Microsoft Results

In [9]:
def microsoft_name(image_id):
    return "COCO_train2014_000000" + str(image_id) + ".jpg"


def parse_microsoft_response(results_dict):
    result = []
    if results_dict["status"] == "Succeeded":
        for each_result in results_dict["recognitionResult"]["lines"]:
            k = each_result["text"].lower()
            kl = k.split()
            result.extend(kl)
    return result

#### Parsing Amazon Results

In [10]:
def parse_amazon_response(l):
    l1 = []
    for each in l:
        if len(each) >= 1:
            k = each.lower()
            kl = k.split()
            l1.extend(kl)
    return l1

Let's create a final function that compares the ground truth value with the values returned from the cloud providers. A couple of things to keep in mind:

1. The ground truth must exist in order for a comparision to occur.
2. The predictions must be available and must exist in order to do any parsing for that cloud provider.
3. We are normalizing each word in both the ground truth and the predictions in order to do a word by word comparision. 

In [11]:
def calculate_score(ground_truth, predictions, arg):
    total = 0
    correct = 0
    for key in ground_truth.keys():
        parsed_ground_truth = parse(ground_truth[key])
        if parsed_ground_truth == None or len(parsed_ground_truth) < 1:
            continue
        total += len(parsed_ground_truth)
        if arg == "google":
            if get_name_from_id(key) in predictions:
                pred = predictions[get_name_from_id(key)]
                if pred == None or len(pred) < 2:
                    continue
                pred = parse_google_response(predictions[get_name_from_id(key)][1:])
            else:
                continue
        elif arg == "microsoft":
            if microsoft_name(key) in predictions:
                pred = json.loads(predictions[microsoft_name(key)])
                if pred == None or len(pred) == 0:
                    continue
                pred = parse_microsoft_response(
                    json.loads(predictions[microsoft_name(key)])
                )
            else:
                continue
        elif arg == "amazon":
            if get_name_from_id(key) in predictions:
                pred = predictions[get_name_from_id(key)]
                if pred == None or len(pred) < 2:
                    continue
                pred = parse_amazon_response(predictions[get_name_from_id(key)])
            else:
                continue
        for each_word in parsed_ground_truth:
            if each_word in pred:
                pred.remove(each_word)
                correct += 1
    print("Total = ", total, "\tCorrect = ", correct)
    print(float(correct) / float(total))

In [12]:
calculate_score(ground_truth, amazon_results, "amazon")

Total =  10031 	Correct =  12
0.0011962914963612801


In [13]:
calculate_score(ground_truth, google_results, "google")

Total =  10031 	Correct =  12
0.0011962914963612801


In [14]:
calculate_score(ground_truth, microsoft_results, "microsoft")

Total =  10031 	Correct =  14
0.0013956734124214933


If you want to see the results for each image, use the block of code below to print out results and ground truth for each image.

In [15]:
for key in ground_truth.keys():
    parsed_ground_truth = parse(ground_truth[key])

    # Amazon
    if get_name_from_id(key) in amazon_results:
        amazon_prediction = amazon_results[get_name_from_id(key)]
        if amazon_prediction == None or len(amazon_prediction) < 1:
            continue
        amazon_prediction = parse_amazon_response(amazon_results[get_name_from_id(key)])
    else:
        continue
    # Microsoft
    if microsoft_name(key) in microsoft_results:
        microsoft_prediction = json.loads(microsoft_results[microsoft_name(key)])
        if microsoft_prediction == None or len(microsoft_prediction) < 1:
            continue
        microsoft_prediction = parse_microsoft_response(
            json.loads(microsoft_results[microsoft_name(key)])
        )
    else:
        continue

    # Google
    if get_name_from_id(key) in google_results:
        google_prediction = google_results[get_name_from_id(key)]
        if google_prediction == None or len(google_prediction) < 2:
            continue
        google_prediction = parse_google_response(
            google_results[get_name_from_id(key)][1:]
        )
    else:
        continue

    # Scoring
    # Take the common elements i.e. intersection of two lists
    google_score = len(list(set(parsed_ground_truth).intersection(google_prediction)))
    microsoft_score = len(
        list(set(parsed_ground_truth).intersection(microsoft_prediction))
    )
    amazon_score = len(list(set(parsed_ground_truth).intersection(amazon_prediction)))

    # Stats for understanding results
    print("\nImage ID ", key)
    print("Ground Truth = ", sorted(parsed_ground_truth))
    if amazon_prediction:
        print(
            "Amazon\n\tScore = ",
            amazon_score,
            "\n\tOCR Results = ",
            sorted(amazon_prediction),
        )
    else:
        print("No Amazon Score")
    if google_prediction:
        print(
            "Google\n\tScore = ",
            google_score,
            "\n\tOCR Results = ",
            sorted(google_prediction),
        )
    else:
        print("No Google Score")
    if microsoft_prediction:
        print(
            "Microsoft\n\tScore = ",
            microsoft_score,
            "\n\tOCR Results = ",
            sorted(microsoft_prediction),
        )
    else:
        print("No Microsoft Score")
    print("-" * 100)


Image ID  580235
Ground Truth =  ['de', 'de', 'de', 'jean', 'luz', 'of', 'st', 'tourisme']
Amazon
	Score =  5 
	OCR Results =  ['de', 'de', 'de', 'de', 'de', 'de', 'jean', 'jean', 'luz', 'luz', 'office', 'office', 'st', 'st', 'tourisme', 'tourisme']
Google
	Score =  5 
	OCR Results =  ['de', 'jean', 'luz', 'o', 'office', 'st', 'tourisme']
Microsoft
	Score =  5 
	OCR Results =  ['@', 'de', 'de', 'de', 'jean', 'luz', 'office', 'st', 'tourisme']
----------------------------------------------------------------------------------------------------

Image ID  580933
Ground Truth =  ['11', 'pierrephotography', 'tejada']
Amazon
	Score =  1 
	OCR Results =  ['$1', '$1', 'pierrefhocrapny', 'pierrefhocrapny', 'tejada', 'tejada']
Google
	Score =  1 
	OCR Results =  ['pierrefho', 'rapny', 'tejada']
Microsoft
	Score =  1 
	OCR Results =  ['pierrefe', 'tejada']
----------------------------------------------------------------------------------------------------

Image ID  581018
Ground Truth =  ['even