## Generating Annotations for Images

Uses the Google Cloud Vision API to generate annotations about what the content of the images are and visually simliar images.

In [None]:
import io
import os
import json
import queue

from google.cloud import vision
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor

In [None]:
# Setting the API keys.
%env GOOGLE_APPLICATION_CREDENTIALS=C:\Users\Ethan\Desktop\repos\princeton-reverse-book-cover-search\api-keys\princeton-reverse-book-cover-5c6099bda6ff.json

In [None]:
def annotate(path):
    """Returns web annotations given the path to an image.
    
    Uses Google Vision API to derive information about a
    given image from similar images on the web.
    
    Parameters
    ----------
    
    path : str
        Path to a file. Can be uri or filepath.
    
    """
    client = vision.ImageAnnotatorClient()

    if path.startswith("http") or path.startswith("gs:"):
        image = vision.Image()
        image.source.image_uri = path

    else:
        with io.open(path, "rb") as image_file:
            content = image_file.read()

        image = vision.Image(content=content)
    
    # FIXME: probably better to replace this with a batch request, but this is fine.
    web_detection = client.web_detection(image=image).web_detection

    return web_detection

In [None]:
class BookCover:
    def __init__(self, image_url):
        self.image_file_name = os.path.basename(image_url)
        self.annotations = annotate(image_url)

    def __dict__(self):
        response_data_string = "{" + str(self.annotations) + "}"
        
        return {
            "image_name": self.image_file_name,
            "response": dict(self.annotations),
        }

    def __str__(self):
        return json.dumps(self.__dict__())

    def __repr__(self):
        return json.dumps(self.__dict__())

#### Calling API on Each Image
Getting data for each image and storing it in a `BookCover` object, which just contains the filename and API reponse.

In [None]:
IMAGES_DIR = r"./data/all-book-set/covers"
images = os.listdir(IMAGES_DIR)[:5]
results = queue.Queue()
with (tqdm(total=len(images))) as pbar:
    with ThreadPoolExecutor(max_workers=16) as executor:
        for _ in executor.map(BookCover, [f'{IMAGES_DIR}/{image}' for image in images]):
            results.put(_)
            pbar.update(1)

#### Writing all Responses to File
Writing all the gathered data to an output file.

In [None]:
responses = []
while not results.empty():
    responses.append(results.get())

In [None]:
book_cover_data = {"data": [book_cover.__dict__() for book_cover in responses]}
book_cover_data

In [None]:
OUTPUT_DIR = r"./data/all-book-set"
FILE_NAME = r"book_covers.json"
print("Writing to file...")
with open(f"{OUTPUT_DIR}/{FILE_NAME}", "w", encoding="utf-8") as outfile:
    # certain book titles have unicode characters in them, so we need to encode them as utf-8
    json.dump(book_cover_data, outfile, ensure_ascii=False, indent=4)
print("Done!")