In [140]:
import io
import os
import json
import queue
import requests

from google.cloud import vision
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor

In [141]:
%env GOOGLE_APPLICATION_CREDENTIALS=C:\Users\Ethan\Desktop\repos\princeton-reverse-book-cover-search\api-keys\princeton-reverse-book-cover-5c6099bda6ff.json

env: GOOGLE_APPLICATION_CREDENTIALS=C:\Users\Ethan\Desktop\repos\princeton-reverse-book-cover-search\api-keys\princeton-reverse-book-cover-5c6099bda6ff.json


In [142]:
def annotate(path):
    """Returns web annotations given the path to an image."""
    client = vision.ImageAnnotatorClient()

    if path.startswith('http') or path.startswith('gs:'):
        image = vision.Image()
        image.source.image_uri = path

    else:
        with io.open(path, 'rb') as image_file:
            content = image_file.read()

        image = vision.Image(content=content)

    web_detection = client.web_detection(image=image).web_detection

    return web_detection

In [143]:
class BookCover:
    def __init__(self, image_url):
        self.image_file_name = os.path.basename(image_url)
        self.annotations = annotate(image_url)

    def __dict__(self):
        custom_annotations = {
            'pages_with_matching_images': [page.url for page in self.annotations.pages_with_matching_images],
            'full_matching_images': [image.url for image in self.annotations.full_matching_images],
            'partial_matching_images': [image.url for image in self.annotations.partial_matching_images],
            'web_entities': [{'score': entity.score, 'description': entity.description} for entity in
                             self.annotations.web_entities]}
        return {
            "image_name": self.image_file_name,
            "custom_annotations": custom_annotations,
        }

    def __str__(self):
        return json.dumps(self.__dict__())

    def __repr__(self):
        return json.dumps(self.__dict__())

In [144]:
images = os.listdir(r'./data/book-test-set/book')
results = queue.Queue()
with (tqdm(total=len(images))) as pbar:
    with ThreadPoolExecutor(max_workers=8) as executor:
        for _ in executor.map(BookCover, [f'./data/book-test-set/book/{image}' for image in images]):
            results.put(_)
            pbar.update(1)

  0%|          | 0/480 [00:00<?, ?it/s]

In [145]:
book_covers = {"book_covers": [book_cover.__dict__() for book_cover in [results.get() for _ in range(results.qsize())]]}
print("Writing to file...")
with open(r'./data/book-test-set/book_covers.json', 'w') as outfile:
    json.dump(book_covers, outfile)
print("Done!")

Writing to file...
Done!
