In [None]:
save = True

In [None]:
import os, io
import cv2
import json
import numpy as np # scientific computing
import pandas as pd
import matplotlib.pyplot as plt # plotting
import matplotlib.image as mpimg # reading images
from collections import deque
from skimage.color import rgb2gray # converting rgb images to grayscale

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'japaneseocr-1-8883b9dcab0a.json'

In [None]:
img1 = rgb2gray(mpimg.imread('images/test_image.png')[:,:,:3])
img2 = rgb2gray(mpimg.imread('images/bird.png')[:,:,:3])
img3 = rgb2gray(mpimg.imread('images/two_words.jpg')[:,:,:3])

In [None]:
def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]

    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))

    return cv2.resize(image, dim, interpolation=inter)

In [None]:
resize = ResizeWithAspectRatio(img1, width=700) # Resize by width OR
cv2.imshow('Original', resize)
cv2.waitKey(0)
cv2.destroyAllWindows()
if save:
    result = cv2.normalize(resize, dst=None, alpha=0, beta=255,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    cv2.imwrite('images/original_image.png', result)    # mention the path you want to save the result


In [None]:
'''
def plot_result(img, figsize_, method, filename, save=False):
    plt.figure(figsize=figsize_)
    plt.axis('off')
    plt.title(method)
    plt.imshow(img, cmap='gray')
    if save:
        plt.savefig(filename)
    plt.show()

def plot_results(result1, result2, figsize_, method, save=False):
    fig, axs = plt.subplots(2, 2, figsize=figsize_)
    axs[0, 0].set_title('Before ' + method)
    axs[0, 0].axis('off')
    axs[0, 0].imshow(result1, cmap='gray')
    axs[0, 1].set_title('After ' + method)
    axs[0, 1].imshow(result2, cmap='gray')
    if save:
        plt.savefig(method + '_results.png')
    plt.show()
'''

In [None]:
resize = ResizeWithAspectRatio(img3, width=700) # Resize by width OR
cv2.imshow('Test', resize)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Sharpen

In [None]:
def sharpen(img):
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]], np.float32)
    sharpened_image = cv2.filter2D(img, -1, kernel)
    return sharpened_image

def laplacian(img):
    kernel = np.array([[-1, -1, -1],
                       [-1, 9, -1],
                       [-1, -1, -1]], np.float32) 
    sharpened_image = cv2.filter2D(img, -1, kernel)
    return sharpened_image

def same(img):
    kernel = np.array([[0, 0, 0],
                       [0, 1, 0],
                       [0, 0, 0]], np.float32) 
    img = cv2.filter2D(img, -1, kernel)
    return img


In [None]:
sharpened_image = sharpen(img1)
resize = ResizeWithAspectRatio(sharpened_image, width=700) # Resize by width OR
cv2.imshow('Sharpened', resize)
cv2.waitKey(0)
cv2.destroyAllWindows()
if save:
    result = cv2.normalize(resize, dst=None, alpha=0, beta=255,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    cv2.imwrite('images/sharpened_image.png', result)    # mention the path you want to save the result


In [None]:
laplacian_image = laplacian(img1)
resize = ResizeWithAspectRatio(laplacian_image, width=700) # Resize by width OR
cv2.imshow('Laplacian', resize)
cv2.waitKey(0)
cv2.destroyAllWindows()
if save:
    result = cv2.normalize(resize, dst=None, alpha=0, beta=255,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    cv2.imwrite('images/laplacian_image.png', result)    # mention the path you want to save the result


In [None]:
same = same(img1)
resize = ResizeWithAspectRatio(same, width=700) # Resize by width OR
cv2.imshow('Same', resize)
cv2.waitKey(0)
cv2.destroyAllWindows()
if save:
    result = cv2.normalize(resize, dst=None, alpha=0, beta=255,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    cv2.imwrite('images/same_image.png', result)    # mention the path you want to save the result


In [None]:
gaussian = cv2.GaussianBlur(img1, (0, 0), 2.0)
unsharp_image = cv2.addWeighted(img1, 2.5, gaussian, -1.5, 0)

resize = ResizeWithAspectRatio(unsharp_image, width=700) # Resize by width OR
cv2.imshow('Unsharp Image', resize)
cv2.waitKey(0)
cv2.destroyAllWindows()
if save:
    result = cv2.normalize(resize, dst=None, alpha=0, beta=255,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    cv2.imwrite('images/unsharp_image.png', result)    # mention the path you want to save the result


### Shifting (as a percentage of the picture)

In [None]:
def shiftUp(img, percent):
    height, width = img.shape[:2]
    tx, ty = 0, -(height*percent/100)
    translation_matrix = np.array([[1, 0, tx],
                                    [0, 1, ty]
                                ], dtype=np.float32)
    translated_image = cv2.warpAffine(src=img, M=translation_matrix, dsize=(width, height))
    return translated_image

def shiftDown(img, percent):
    height, width = img.shape[:2]
    tx, ty = 0, (height*percent/100)
    translation_matrix = np.array([[1, 0, tx],
                                    [0, 1, ty]
                                ], dtype=np.float32)
    translated_image = cv2.warpAffine(src=img, M=translation_matrix, dsize=(width, height))
    return translated_image

def shiftLeft(img, percent):
    height, width = img.shape[:2]
    tx, ty = -(height*percent/100), -0
    translation_matrix = np.array([[1, 0, tx],
                                    [0, 1, ty]
                                ], dtype=np.float32)
    translated_image = cv2.warpAffine(src=img, M=translation_matrix, dsize=(width, height))
    return translated_image

def shiftRight(img, percent):
    height, width = img.shape[:2]
    tx, ty = (height*percent/100), 0
    translation_matrix = np.array([[1, 0, tx],
                                    [0, 1, ty]
                                ], dtype=np.float32)
    translated_image = cv2.warpAffine(src=img, M=translation_matrix, dsize=(width, height))
    return translated_image

In [None]:
shifted_image1 = shiftUp(img1, 7)
shifted_image1 = shiftDown(shifted_image1, 88)
shifted_image1 = shiftRight(shifted_image1, 34)
shifted_image1 = shiftLeft(shifted_image1, 55)
resize = ResizeWithAspectRatio(shifted_image1, width=700) # Resize by width OR
cv2.imshow('Shifted', resize)
cv2.waitKey(0)
cv2.destroyAllWindows()
if save:
    result = cv2.normalize(resize, dst=None, alpha=0, beta=255,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    cv2.imwrite('images/shifted_image1.png', result)    # mention the path you want to save the result


In [None]:
shifted_image2 = shiftUp(img1, 6)
shifted_image2 = shiftDown(shifted_image2, 88)
shifted_image2 = shiftRight(shifted_image2, 5)
shifted_image2 = shiftLeft(shifted_image2, 45)
resize = ResizeWithAspectRatio(shifted_image2, width=700) # Resize by width OR
cv2.imshow('Shifted', resize)
cv2.waitKey(0)
cv2.destroyAllWindows()
if save:
    result = cv2.normalize(resize, dst=None, alpha=0, beta=255,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    cv2.imwrite('images/shifted_image2.png', result)    # mention the path you want to save the result


In [None]:
def crop_image(img, height_percent, width_percent):
    h1, h2 = height_percent
    w1, w2 = width_percent
    height, width = img.shape[:2]
    cropped_image = img[int(height*h1/100):int(height*h2/100), int(width*w1/100):int(width*w2/100)]
    return cropped_image
    

In [None]:
cropped = crop_image(img3, (0,50), (0,35))
resize = ResizeWithAspectRatio(cropped, width=700) # Resize by width OR
cv2.imshow('Cropped', resize)
cv2.waitKey(0)
cv2.destroyAllWindows()

### Function to Process Data into Anki Card Format

In [None]:
# Data comes in in json format which is obtained from running Google Document AI
def format_anki(data):
    cards = []
    kana, kanji, jap_sen, eng_trans, eng_ex_sen = deque(), deque(), deque(), deque(), deque()
    for obj in data:
        #Add the text and relevant metadata to respective arrays
        match obj["type"]:
            case "English":
                d = {
                    "text": obj["mentionText"],
                    "bounding": obj["pageAnchor"]["pageRefs"][0]["boundingPoly"]["normalizedVertices"]
                }
                eng_trans.append(d)

            case "Kana":
                d = {
                    "text": obj["mentionText"],
                }
                kana.append(d)

            case "Kanji":
                d = {
                    "text": obj["mentionText"],
                    "bounding": obj["pageAnchor"]["pageRefs"][0]["boundingPoly"]["normalizedVertices"]
                }
                kanji.append(d)

            case "Sentence_English":
                d = {
                    "text": obj["mentionText"],
                }
                eng_ex_sen.append(d)

            case "Sentence_Japanese":
                d = {
                    "text": obj["mentionText"],
                }
                jap_sen.append(d)
                
    # Create the fields of the anki card
    while kana:
        kanaAdd = kana.popleft()['text']
        if kanji[0]['bounding'][0]['y'] < eng_trans[0]['bounding'][0]['y']:
            kanjiAdd = kanji.popleft()['text']
        else:
            kanjiAdd = ''
        japAdd = jap_sen.popleft()['text']
        engTransAdd = eng_trans.popleft()['text']
        engExSenAdd = eng_ex_sen.popleft()['text']

        card = [
            kanaAdd,
            kanjiAdd,
            japAdd,
            engTransAdd,
            engExSenAdd
        ]
        cards.append(card)

    return cards


### Image OCR using Google Cloud Vision API (Not being used)

In [None]:
from importlib.resources import path
from google.cloud import vision
from google.cloud import vision_v1
from google.cloud.vision_v1 import types

client = vision.ImageAnnotatorClient()

def detectText(img):
    with io.open(img, 'rb') as image_file:
        content = image_file.read()

    image = vision_v1.types.Image(content = content)
    response = client.text_detection(image = image)
    texts = response.text_annotations
    print("Texts:")

    df = pd.DataFrame(columns = ['locale', 'description'])
    '''
    for text in texts:
        df = df._append(
            dict(
                local = text.locale,
                description = text.description
            ),
            ignore_index = True
        )
    return df
    '''
    for text in texts:
        print(f'\n"{text.description}"')

        vertices = [
            f"({vertex.x},{vertex.y})" for vertex in text.bounding_poly.vertices
        ]

        print("bounds: {}".format(",".join(vertices)))

    if response.error.message:
        raise Exception(
            "{}\nFor more info on error messages, check: "
            "https://cloud.google.com/apis/design/errors".format(response.error.message)
        )

FILE_NAME = 'images/test_image1.jpg'
FOLDER_PATH = r'./'

print(detectText(FILE_NAME))

### Image OCR using Google Document AI API

In [None]:
from typing import Optional
from google.api_core.client_options import ClientOptions
from google.cloud import documentai  # type: ignore
from google.cloud import documentai_v1

# TODO(developer): Uncomment these variables before running the sample.
project_id = "japaneseocr-1"
location = "us" # Format is "us" or "eu"
processor_id = "eaf216b404f6b455" # Create processor before running sample
file_path = "images/test_image1.jpg"
mime_type = "image/jpeg" # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types
field_mask = "text,entities"  #,pages.pageNumber"  # Optional. The fields to return in the Document object.
# processor_version_id = "YOUR_PROCESSOR_VERSION_ID" # Optional. Processor version to use
process_data = True


def process_document_sample(
    project_id: str,
    location: str,
    processor_id: str,
    file_path: str,
    mime_type: str,
    field_mask: Optional[str] = None,
    processor_version_id: Optional[str] = None,
) -> None:
    # You must set the `api_endpoint` if you use a location other than "us".
    opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")

    client = documentai.DocumentProcessorServiceClient(client_options=opts)

    if processor_version_id:
        # The full resource name of the processor version, e.g.:
        # `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
        name = client.processor_version_path(
            project_id, location, processor_id, processor_version_id
        )
    else:
        # The full resource name of the processor, e.g.:
        # `projects/{project_id}/locations/{location}/processors/{processor_id}`
        name = client.processor_path(project_id, location, processor_id)

    # Read the file into memory
    with open(file_path, "rb") as image:
        image_content = image.read()

    # Load binary data
    raw_document = documentai.RawDocument(content=image_content, mime_type=mime_type)

    # For more information: https://cloud.google.com/document-ai/docs/reference/rest/v1/ProcessOptions
    # Optional: Additional configurations for processing.
    process_options = documentai.ProcessOptions(
        # Process only specific pages
        individual_page_selector=documentai.ProcessOptions.IndividualPageSelector(
            pages=[1]
        )
    )

    # Configure the process request
    request = documentai.ProcessRequest(
        name=name,
        raw_document=raw_document,
        field_mask=field_mask,
        process_options=process_options,
    )

    result = client.process_document(request=request)

    # For a full list of `Document` object attributes, reference this page:
    # https://cloud.google.com/document-ai/docs/reference/rest/v1/Document
    document = result.document

    # Read the text recognition output from the processor
#    print("The document contains the following text:")
#    print(document.text)
    
    # Write document to data.json
    json_string = documentai_v1.Document.to_json(document)
    dict_obj = json.loads(json_string)

    if process_data:
        filtered_data = []
    #   filtered_data["text"] = (dict_obj["text"])   # Uncomment if you want the first line of the json to contain all the text
        for e in dict_obj["entities"]:
            data = {
                "type": e["type"],
                "mentionText": e["mentionText"],
                "pageAnchor": e["pageAnchor"],
                "id": e["id"],
            }
            filtered_data.append(data)
            
        final_data = format_anki(filtered_data)
            
        with open("data.json", mode='w') as my_file:
            json.dump(final_data, my_file)
    else:
        with open("data.json", mode='w') as my_file:
            json.dump(dict_obj, my_file)

process_document_sample(
    project_id,
    location,
    processor_id,
    file_path,
    mime_type,
    field_mask
)