In [104]:
import argparse
from enum import Enum
import io
import os
from pprint import pprint

from google.cloud import vision
from PIL import Image, ImageDraw


class FeatureType(Enum):
    PAGE = 1
    BLOCK = 2
    PARA = 3
    WORD = 4
    SYMBOL = 5

# Grant credentials to notebook
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'credentials.json'


def draw_boxes(image, bounds, color):
    """Draw a border around the image using the hints in the vector list."""
    draw = ImageDraw.Draw(image)

    for bound in bounds:
        draw.polygon(
            [
                bound.vertices[0].x,
                bound.vertices[0].y,
                bound.vertices[1].x,
                bound.vertices[1].y,
                bound.vertices[2].x,
                bound.vertices[2].y,
                bound.vertices[3].x,
                bound.vertices[3].y,
            ],
            None,
            color,
        )
    return image

def get_document_bounds(image_file, feature):
    document = getAnnotations(image_file)
    
    bounds = []

    # Collect specified feature bounds by enumerating all document features
    # Lmao this is so hacky
    for page in document.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                for word in paragraph.words:
                    for symbol in word.symbols:
                        if feature == FeatureType.SYMBOL:
                            bounds.append(symbol.bounding_box)

                    if feature == FeatureType.WORD:
                        bounds.append(word.bounding_box)

                if feature == FeatureType.PARA:
                    bounds.append(paragraph.bounding_box)

            if feature == FeatureType.BLOCK:
                bounds.append(block.bounding_box)

    # The list `bounds` contains the coordinates of the bounding boxes.
    return bounds

def getAnnotations(image_file):
    """Returns document bounds given an image."""
    client = vision.ImageAnnotatorClient()

    # Convert to io byte format
    buffer = io.BytesIO()
    image_file.save(buffer, format="PNG")

    image = vision.Image(content=buffer.getvalue())

    response = client.document_text_detection(image=image)
    document = response.full_text_annotation
    return document


def render_doc_text(filein, fileout):
    image = Image.open(filein)
    render_doc_text_fromimg(image, fileout)

def render_doc_text_fromimg(img, fileout):
    bounds = get_document_bounds(img, FeatureType.BLOCK)
    draw_boxes(img, bounds, "blue")
    bounds = get_document_bounds(img, FeatureType.PARA)
    draw_boxes(img, bounds, "red")
    bounds = get_document_bounds(img, FeatureType.WORD)
    draw_boxes(img, bounds, "yellow")

    if fileout != 0:
        img.save(fileout)
    else:
        img.show()

def get_doc_metadata(img):
    annotations = getAnnotations(img)
    
    return annotations

In [105]:
# Execution sandbox
# Create a pdf with the img as background
from PIL import Image, ImageDraw
from io import BytesIO
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from PyPDF2 import PdfMerger
import math

# render_doc_text("resources/ocrtest2.png", "out/doc_ocr_out.png")

import reportlab.rl_config
reportlab.rl_config.warnOnMissingFontGlyphs = 0

from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.cidfonts import UnicodeCIDFont
pdfmetrics.registerFont(UnicodeCIDFont('HeiseiMin-W3'))

def imgToPdf(img, pdf_merger, alpha=0.0):
    
    annotations = get_doc_metadata(img)

    # Create in-memory PDF files
    pdf_buffer = BytesIO()
    can = canvas.Canvas(pdf_buffer)
    fontSize = 16
    can.setFont('HeiseiMin-W3', fontSize)

    # Also throw the image onto the canvas
    imgReader = ImageReader(img)

# Write img to the canvas (stretch to fill page)
    can.drawImage(imgReader, 0, 0, can._pagesize[0], can._pagesize[1])
    textObj = can.beginText(0, 0)
    textObj.setFillColor('black', alpha=alpha)
    
    
    for (i, page) in enumerate(annotations.pages):
    # Just gonna assume we're on page 1 here with the request
        for (j, block) in enumerate(page.blocks):
            for (k, paragraph) in enumerate(block.paragraphs):
            # Get paragraph text
                for (l, word) in enumerate(paragraph.words):
                    for (m, symbol) in enumerate(word.symbols):
                        text = symbol.text
                
                        # Draw paragraph text
                        poly = symbol.bounding_box.vertices
                        coords = [(p.x, img.size[1]-p.y) for p in poly]
                        
                        # First quadrant
                        # coords = [(p.x, p.y) for p in poly]
                    
                    # Coords are currently image-size, convert to pdf-size
                        pdf_coords = [(p[0] * can._pagesize[0] / img.size[0], p[1] * can._pagesize[1] / img.size[1]) for p in coords]
                
                        # Width is height, square characters (get dist between corners)
                        charDims = pdf_coords[1][0] - pdf_coords[0][0]
                        fontScale = charDims / fontSize
                
                        # Move pointer to where the char should be
                        minx = min([p[0] for p in pdf_coords])
                        maxy = max([p[1] for p in pdf_coords])
                        desiredPos = (minx, maxy - charDims)
                        
                        # Set font to scale with char height
                        textObj.setTextTransform(fontScale, 0, 0, fontScale, desiredPos[0], desiredPos[1])
                        # textObj.setTextTransform(textObj.getX(), textObj.getY(), 1, 1, 1 ,1)
                        
                        textObj.textOut(text)
    can.drawText(textObj)


    
    # Test draw string
    # can.drawString(100, 100, "你好")
    # can.drawString(130, 100, "你好")

    # Save PDF file
    can.save()
    pdf_buffer.seek(0)

    # Merge PDF files
    pdf_merger.append(pdf_buffer)

In [110]:
# Break apart cbz into individual images

from zipfile import ZipFile
from PIL import Image
import io

pdf_merger = PdfMerger()

fname = 'Chapter 1_ 人生万事塞弱が虎'
with ZipFile('resources/'+fname+".cbz", 'r') as zip:
    with zip.open("001.jpg") as file:
        img_data = io.BytesIO(file.read())
        img = Image.open(img_data)
        imgToPdf(img, pdf_merger, alpha=0)
        
        # render_doc_text_fromimg(img, "out/1pageboxes.png")
    

# store pdf to file
pdf_merger.write('out/1pagetest.pdf')
                

In [109]:
# Break apart cbz into individual images

from zipfile import ZipFile
from PIL import Image
import io

pdf_merger = PdfMerger()

fname = 'BSD/Chapter 2 Huo ruBao Dan - Unknown'
with ZipFile('resources/'+fname+".cbz", 'r') as zip:
    nameList = zip.namelist()
    
    #Lexicographic sort
    nameList.sort()
    
    for name in nameList:
        
        if name.endswith('.jpg') or name.endswith('.png'):
            with zip.open(name) as file:
                img_data = io.BytesIO(file.read())
                img = Image.open(img_data)
                imgToPdf(img, pdf_merger)
            
            print("Page "+name+" processed.")

# store pdf to file
pdf_merger.write('out/'+fname+'.pdf')
                

Page 001.jpg processed.
Page 002.jpg processed.
Page 003.jpg processed.
Page 004.jpg processed.
Page 005.jpg processed.
Page 006.jpg processed.
Page 007.jpg processed.
Page 008.jpg processed.
Page 009.jpg processed.
Page 010.jpg processed.
Page 011.jpg processed.
Page 012.jpg processed.
Page 013.jpg processed.
Page 014.jpg processed.
Page 015.jpg processed.
Page 016.jpg processed.
Page 017.jpg processed.
Page 018.jpg processed.
Page 019.jpg processed.
Page 020.jpg processed.
Page 021.jpg processed.
Page 022.jpg processed.
Page 023.jpg processed.
Page 024.jpg processed.
Page 025.jpg processed.
Page 026.jpg processed.
Page 027.jpg processed.
Page 028.jpg processed.
Page 029.jpg processed.
Page 030.jpg processed.
Page 031.jpg processed.
Page 032.jpg processed.
Page 033.jpg processed.
Page 034.jpg processed.
Page 035.jpg processed.
Page 036.jpg processed.
Page 037.jpg processed.
Page 038.jpg processed.
Page 039.jpg processed.
Page 040.jpg processed.
Page 041.jpg processed.
Page 042.jpg pro

FileNotFoundError: [Errno 2] No such file or directory: 'out/BSD/Chapter 2 Huo ruBao Dan - Unknown.pdf'

