In [12]:
import io
from google.cloud import vision
from google.cloud.vision import types

from enum import Enum
from PIL import Image, ImageDraw

In [3]:
with io.open("./convert/7.012noteslindrew-0.png", 'rb') as image_file:
    image = types.Image(content = image_file.read())

In [5]:
client = vision.ImageAnnotatorClient()



In [6]:
response = client.document_text_detection(image=image)

In [11]:
with open("reponse.txt", "w") as f:
    f.write(str(response.full_text_annotation))

In [14]:
class FeatureType(Enum):
    PAGE = 1
    BLOCK = 2
    PARA = 3
    WORD = 4
    SYMBOL = 5

In [15]:
def draw_boxes(image, bounds, color):
    """Draw a border around the image using the hints in the vector list."""
    draw = ImageDraw.Draw(image)

    for bound in bounds:
        draw.polygon([
            bound.vertices[0].x, bound.vertices[0].y,
            bound.vertices[1].x, bound.vertices[1].y,
            bound.vertices[2].x, bound.vertices[2].y,
            bound.vertices[3].x, bound.vertices[3].y], None, color)
    return image

In [16]:
def get_document_bounds(response, feature):
    """Returns document bounds given an image."""

    bounds = []
    document = response.full_text_annotation

    # Collect specified feature bounds by enumerating all document features
    for page in document.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                for word in paragraph.words:
                    for symbol in word.symbols:
                        if (feature == FeatureType.SYMBOL):
                            bounds.append(symbol.bounding_box)

                    if (feature == FeatureType.WORD):
                        bounds.append(word.bounding_box)

                if (feature == FeatureType.PARA):
                    bounds.append(paragraph.bounding_box)

            if (feature == FeatureType.BLOCK):
                bounds.append(block.bounding_box)

        if (feature == FeatureType.PAGE):
            bounds.append(block.bounding_box)

    # The list `bounds` contains the coordinates of the bounding boxes.
    return bounds

In [17]:
def render_doc_text(filein, fileout):
    with io.open(filein, 'rb') as image_file:
        image = types.Image(content = image_file.read())
    response = client.document_text_detection(image=image)
    
    image = Image.open(filein)
    bounds = get_document_bounds(response, FeatureType.PAGE)
    draw_boxes(image, bounds, 'blue')
    bounds = get_document_bounds(response, FeatureType.PARA)
    draw_boxes(image, bounds, 'red')
    bounds = get_document_bounds(response, FeatureType.WORD)
    draw_boxes(image, bounds, 'yellow')
    
    if fileout is not 0:
        image.save(fileout)
    else:
        image.show()

In [18]:
render_doc_text("./convert/7.012noteslindrew-3.png", "tmp3.png")