# Cloud APIs for Computer Vision: Up and Running in 15 Minutes

This code is part of [Chapter 8- Cloud APIs for Computer Vision: Up and Running in 15 Minutes ](https://learning.oreilly.com/library/view/practical-deep-learning/9781492034858/ch08.html).

## Compile Results for OCR

In this file we will compile the results using the ground truth and the collected data for all the test images. Please edit `PATH_TO_IMAGES` with the path to the test images that have been used for the experiments. If you used different filenames for the prediction filenames, or the validation ids, please edit the filenames accordingly.

In [1]:
import json

# Load Ground truth image ids
with open('./data/val_imgIds_final.csv') as f:
    imgIds = f.readlines()

# Load Ground truth text
with open('./data/ground_truth.json') as f:
    ground_truth = json.load(f)

In [3]:
# Load Google OCR json file

with open('./data/google_ocr.json') as f:
    google = json.load(f)

In [4]:
# Load Microsoft OCR json file

with open('./data/msft_ocr.json') as f:
    microsoft = json.load(f)

In [5]:
# Load Amazon OCR json file

with open('./data/amazon_ocr.json') as f:
    amazon = json.load(f)

In [6]:
# Helper functions to get image name from image id and converse.

def get_id_from_name(name):
    return int(name.split("/")[-1].split(".jpg")[0])

def get_name_from_id(imgId):
    filename = "PATH_TO_IMAGES" + \
        "000000" + str(imgId) + ".jpg"
    return filename

### Parsing

Each cloud provider sends the results in slightly different formats and we need to parse each of them correctly. So, we will develop a parsing function unique to each cloud provider.

#### Ground Truth Parsing

In [15]:
def parse(l):
    l1 = []
    for each in l:
        if len(each) >= 2:
            l1.append(each.lower())
    return l1

#### Google Specific Parsing

In [1]:
def parse_goog(l):
    l1 = []
    for each in l:
        l1.append(each.lower())
        k = each.lower()
        k1 = k.split()
        l1.extend(k1)
    l1 = list(set(l1))
    return l1

#### Microsoft Specific Parsing

In [2]:
def msft_name(imgId):
    return "000000" + str(imgId) + ".jpg"


def parse_msft(dic):
    result = []
    if dic["status"] == "Succeeded":
        for each_result in dic["recognitionResult"]["lines"]:
            k = each_result["text"].lower()
            kl = k.split()
            result.extend(kl)
    return result

#### Amazon Specific Parsing

In [18]:
def parse_ama(l):
    l1 = []
    for each in l.split():
        if len(each) >= 1:
            k = each.lower()
            kl = k.split()
            l1.extend(kl)
    return l1

Let's create a final function that compares the ground truth value with the values returned from the cloud providers. A couple of things to keep in mind:

1. The ground truth must exist in order for a comparision to occur.
2. The predictions must be available and must exist in order to do any parsing for that cloud provider.
3. We are normalizing each word in both the ground truth and the predictions in order to do a word by word comparision. 

In [19]:
def calculate_score(gound_truth, predictions, arg):
    total = 0
    correct = 0
    for each in ground_truth.keys():
        gt = parse(ground_truth[each])
        if gt == None or len(gt) < 1:
            continue
        total += len(gt)
        if arg == "goog":
            pred = predictions[get_name_from_id(each)]
            if pred == None or len(pred) < 2:
                continue
            pred = parse_goog(predictions[get_name_from_id(each)][1:])
        elif arg == "msft":
            pred = json.loads(predictions[msft_name(each)])
            if pred == None or len(pred) == 0:
                continue
            pred = parse_msft(json.loads(predictions[msft_name(each)]))
        elif arg == "ama":
            pred = predictions[get_name_from_id(each)]
            if pred == None or len(pred) < 2:
                continue
            pred = parse_ama(predictions[get_name_from_id(each)])
        for each_word in gt:
            if each_word in pred:
                pred.remove(each_word)
                correct += 1
    print("Total = ", total, "\tCorrect = ", correct)
    print(float(correct) / float(total))

In [20]:
calculate_score(ground_truth, amazon, "ama")

Total =  10031 	Correct =  4583
0.4568836606519789


In [21]:
calculate_score(ground_truth, google, "goog")

Total =  10031 	Correct =  3192
0.3182135380321005


In [22]:
calculate_score(ground_truth, microsoft, "msft")

Total =  10031 	Correct =  5524
0.5506928521583092


If you want to see the results for each image, use the block of code below to print out results and ground truth for each image.

In [23]:
for each in ground_truth.keys():
    # ground truth
    gt = parse(ground_truth[each])

    # Amazon
    ama = amazon[get_name_from_id(each)].lower()
    if ama == None or len(ama) < 1:
        continue
    ama = parse_ama(amazon[get_name_from_id(each)])

    # Microsoft
    msft = json.loads(microsoft[msft_name(each)])
    if msft == None or len(msft) < 1:
        continue
    msft = parse_msft(json.loads(microsoft[msft_name(each)]))

    # Google
    goog = google[get_name_from_id(each)]
    if goog == None or len(goog) < 2:
        continue
    goog = parse_goog(google[get_name_from_id(each)][1:])

    # Scoring
    # Take the common elements i.e. intersection of two lists
    goog_score = len(list(set(gt).intersection(goog)))
    msft_score = len(list(set(gt).intersection(msft)))
    ama_score = len(list(set(gt).intersection(ama)))

    # Stats for understanding results
    if msft_score > goog_score and msft_score > ama_score and ama_score == 0 and goog_score == 0:
        print("\nImage ID ", each)
        #print("Ground Truth = ", ground_truth[each], "\tMicrosoft = ", microsoft[get_name_from_id(each)])
        print("Ground Truth = ", sorted(gt))
        print("Amazon\n\tScore = ", ama_score, "\n\tOCR Results = ",
              sorted(ama))
        print("Microsoft\n\tScore = ", msft_score, "\n\tOCR Results = ",
              sorted(msft))
        print("Google\n\tScore = ", goog_score, "\n\tOCR Results = ",
              sorted(goog))


Image ID  366863
Ground Truth =  ['albani', 'eu', 'raarup.']
Amazon
	Score =  0 
	OCR Results =  ['abani', 'raarup.eu']
Microsoft
	Score =  1 
	OCR Results =  ['@raarup.eu', 'albani']
Google
	Score =  0 
	OCR Results =  ['alban', 'oraarup.eu']

Image ID  494860
Ground Truth =  ['albarto', 'carrasco', 'casado', 'madrio']
Amazon
	Score =  0 
	OCR Results =  ['aibeto', 'cado', 'carrarco', 'nadrid']
Microsoft
	Score =  1 
	OCR Results =  ['all.', 'carrasco', 'ceredo', 'to']
Google
	Score =  0 
	OCR Results =  ['alberto', 'caa', 'cad', '北']

Image ID  383620
Ground Truth =  ['classico', 'eri']
Amazon
	Score =  0 
	OCR Results =  ['clssico', 'erio']
Microsoft
	Score =  1 
	OCR Results =  ['classico', 'fric']
Google
	Score =  0 
	OCR Results =  ['lassico']

Image ID  427920
Ground Truth =  ['coastal', 'coastal']
Amazon
	Score =  0 
	OCR Results =  ['ccoastal', 'sehc0', 'tcoastal']
Microsoft
	Score =  1 
	OCR Results =  ['(coastal', 'coastal']
Google
	Score =  0 
	OCR Results =  ['asta', 'ast