In [None]:
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os

# import matplotlib.pyplot as plt
import tesserocr
from tesserocr import PyTessBaseAPI, RIL
import pytesseract
from IPython.display import HTML
import easyocr
import numpy as np

# from paddleocr import PaddleOCR

In [None]:
MANGA_IMAGES_DIR = r"C:\Workspace\Learning\Projects\Anuvad\python\sampleImages\manga\shinju_no_necktar\chapter_0085"
MANHUA_IMAGES_DIR = r"C:\Workspace\Learning\Projects\Anuvad\python\sampleImages\manhua\my_harem_grew_so_large\chapter_0001"
manga_images = os.listdir(MANGA_IMAGES_DIR)
manga_images_sorted = sorted(manga_images)
manhua_images = os.listdir(MANHUA_IMAGES_DIR)
manhua_images_sorted = sorted(manhua_images)
print("Manga Images in Sorted Order:\n", manga_images_sorted, "\n\n")
print("Manhua Images in Sorted Order:\n", manhua_images_sorted, "\n\n")

In [None]:
def preprocess_image(img):
    img = (
        img.convert("L")
        .resize([3 * _ for _ in img.size], Image.BICUBIC)
        .point(lambda p: p > 75 and p + 100)
    )
    return img

    # # Apply a binary threshold
    # threshold = 128
    # binary_img = img.point(lambda p: 255 if p > threshold else 0)

    # return binary_img

    # # Convert to grayscale
    # img = img.convert("L")

    # # Apply a median filter for noise reduction
    # img = img.filter(ImageFilter.MedianFilter(size=3))

    # # Enhance contrast
    # enhancer = ImageEnhance.Contrast(img)
    # img = enhancer.enhance(2)  # Adjust this value as needed

    # # Apply binary thresholding
    # img = img.point(lambda p: 255 if p > 128 else 0)

    # # Convert to numpy array for morphological operations
    # img_np = np.array(img)

    # # Apply dilation and erosion (morphological operations)
    # img_np = ImageOps.invert(
    #     Image.fromarray(img_np)
    # )  # Invert colors for morphological operations
    # img_np = img_np.filter(ImageFilter.MinFilter(3))  # Dilation
    # img_np = img_np.filter(ImageFilter.MaxFilter(3))  # Erosion

    # # Convert back to binary
    # img_np = ImageOps.invert(img_np)
    # img_np = img_np.point(lambda p: 255 if p > 128 else 0)

    # # Convert back to PIL image
    # processed_img = Image.fromarray(np.array(img_np))

    # return processed_img


def pil_to_cv2(img):
    # Convert the PIL image to RGB mode (assuming it's not already)
    rgb_image = img.convert("RGB")
    # Get the image data as a NumPy array
    open_cv_image = np.array(rgb_image)

    # Convert color space from RGB (PIL) to BGR (OpenCV)
    return cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)


def drawImage(img, bboxes=[], scalling_factor=1.0, threashold=70.0):
    if img.mode == "L":
        img = img.convert("RGB")
    draw = ImageDraw.Draw(img)
    font_path = os.path.join(r"C:\Windows\Fonts", "simsun.ttc")
    font = ImageFont.truetype(font_path, size=20)
    for box in bboxes:
        x1, y1, x2, y2, text, conf = (
            box["x1"],
            box["y1"],
            box["x2"],
            box["y2"],
            box["text"],
            box["conf"],
        )
        if conf <= threashold:
            continue
        draw.rectangle(((x1, y1), (x2, y2)), outline=(0, 255, 0), width=2)
        draw.text(
            (x1, y1 - 10),
            text,
            font=font,
            fill=(255, 0, 0),
            stroke_width=0,
        )
        draw.text(
            (x1, y1 - 30),
            f"{conf:.0f}",
            font=font,
            fill=(0, 0, 255),
            stroke_width=1,
        )
    display(img.resize(int(scalling_factor * s) for s in img.size))


def drawImageFile(image_path, bboxes=[], scalling_factor=1.0, threashold=70.0):
    image = Image.open(image_path)

    drawImage(image, bboxes, scalling_factor, threashold)


def use_tessorocr(image_path, lang="eng", do_preprocess=False):
    img = Image.open(image_path)
    if do_preprocess:
        img = preprocess_image(image_path)

    # print(tesserocr.image_to_text(img, lang=lang))
    with PyTessBaseAPI(lang=lang) as api:  # type:ignore
        api.SetImage(img)

        print(api.GetUTF8Text())
        print(api.AllWordConfidences())
        regions = api.GetRegions()
        boxes = api.GetComponentImages(RIL.TEXTLINE, True)
        print("regions\n\n")
        for i in regions:
            display(i[0])
        print("\n\nboxes\n\n")
        print(boxes, "\n\n")
        for i in boxes:
            display(i[0])


def use_pytesseract(
    image_path,
    lang="eng",
    config="--oem 1 --psm 3",
    scalling_factor=1.0,
    threashold=70.0,
    do_preprocess=False,
):

    img = Image.open(image_path)
    if do_preprocess:
        img = preprocess_image(img)

    ocr_text = pytesseract.image_to_string(img, lang=lang, config=config)

    print("Text Output: \n\n------------\n", ocr_text, "\n")
    data = pytesseract.image_to_data(img, lang=lang, config=config).split("\n")[1:]
    temp_boxes = []
    for d in data:
        tmp_d = d.split("\t")
        if len(tmp_d) > 1:
            temp_boxes.append(
                {
                    "level": int(tmp_d[0]),
                    "page_num": int(tmp_d[1]),
                    "block_num": int(tmp_d[2]),
                    "par_num": int(tmp_d[3]),
                    "line_num": int(tmp_d[4]),
                    "word_num": int(tmp_d[5]),
                    "x1": int(tmp_d[6]),
                    "y1": int(tmp_d[7]),
                    "x2": int(tmp_d[6]) + int(tmp_d[8]),
                    "y2": int(tmp_d[7]) + int(tmp_d[9]),
                    "width": int(tmp_d[8]),
                    "height": int(tmp_d[9]),
                    "conf": float(tmp_d[10]) if tmp_d[10] else 0.0,
                    "text": tmp_d[11],
                }
            )
    drawImage(img, temp_boxes, scalling_factor, threashold)


def use_EasyOCR(
    image_path, lang, scalling_factor=1.0, threashold=70.0, do_preprocess=False
):
    img = Image.open(image_path)
    if do_preprocess:
        img = preprocess_image(img)
    reader = easyocr.Reader(["en", lang])
    data = reader.readtext(np.array(img) if img.mode == "L" else img)
    tmp_text = ""
    tmp_boxes = []
    for box in data:
        tmp_data = {}
        tmp_coords, text, conf = box
        if len(tmp_coords) > 3:
            x1, y1, x2, y2 = (
                tmp_coords[0][0],
                tmp_coords[0][1],
                tmp_coords[2][0],
                tmp_coords[2][1],
            )
            tmp_data["x1"], tmp_data["y1"], tmp_data["x2"], tmp_data["y2"] = (
                x1,
                y1,
                x2,
                y2,
            )
        tmp_data["text"] = text
        tmp_data["conf"] = conf * 100
        tmp_boxes.append(tmp_data)
        tmp_text += text + "\n"
    print("\n Extracted Text:\n\n", tmp_text)
    drawImage(
        img, bboxes=tmp_boxes, scalling_factor=scalling_factor, threashold=threashold
    )


# def use_PaddleOCR(
#     image_path, lang, scalling_factor=1.0, threashold=70.0, do_preprocess=False
# ):
#     img = Image.open(image_path)
#     if do_preprocess:
#         img = preprocess_image(img)
#     ocr = PaddleOCR(use_angle_cls=True, lang="en")
#     data = ocr.ocr(np.array(img) if img.mode == "L" else img)
#     print(data)
#     return
#     tmp_text = ""
#     tmp_boxes = []
#     for box in data:
#         tmp_data = {}
#         tmp_coords, text, conf = box
#         if len(tmp_coords) > 3:
#             x1, y1, x2, y2 = (
#                 tmp_coords[0][0],
#                 tmp_coords[0][1],
#                 tmp_coords[2][0],
#                 tmp_coords[2][1],
#             )
#             tmp_data["x1"], tmp_data["y1"], tmp_data["x2"], tmp_data["y2"] = (
#                 x1,
#                 y1,
#                 x2,
#                 y2,
#             )
#         tmp_data["text"] = text
#         tmp_data["conf"] = conf * 100
#         tmp_boxes.append(tmp_data)
#         tmp_text += text + "\n"
#     print("\n Extracted Text:\n\n", tmp_text)
#     drawImage(
#         img, bboxes=tmp_boxes, scalling_factor=scalling_factor, threashold=threashold
#     )

In [None]:
img_idx = 6
mode = "manhua"
ocr_tool = "easyocr"
LANG = "eng"
if mode == "manhua":
    if ocr_tool == "easyocr":
        LANG = "ch_sim"
    elif ocr_tool == "tesseract":
        LANG = "chi_sim"
elif mode == "manga":
    if ocr_tool == "easyocr":
        LANG = "ja"
    elif ocr_tool == "tesseract":
        LANG = "jpn"

image_path = (
    os.path.join(MANHUA_IMAGES_DIR, manhua_images_sorted[img_idx])
    if mode == "manhua"
    else os.path.join(MANGA_IMAGES_DIR, manga_images_sorted[img_idx])
)
# use_tessorocr(image_path, lang="chi_sim")
# use_pytesseract(image_path, lang=LANG, config="--oem 1 --psm 6", threashold=50.0)
use_EasyOCR(
    image_path, lang=LANG, threashold=0.0, scalling_factor=1.0, do_preprocess=False
)

# use_PaddleOCR(
#     image_path, lang=LANG, threashold=0.0, scalling_factor=1.0, do_preprocess=False
# )

In [None]:
# import os


# def prepend_zeros(filename: str, char_to_extract_num: str = ".") -> str:
#     numeric_part, rest = filename.split(char_to_extract_num, 1)
#     # Convert the numeric part to an integer
#     num = int(numeric_part)

#     # Format the numeric part to have exactly 6 digits with leading zeros
#     new_numeric_part = f"{num:06d}"

#     # Construct the new filename
#     new_filename = f"{new_numeric_part}{char_to_extract_num}{rest}"
#     return new_filename


# def rename_images_in_folder(folder, char_to_extract_num="."):
#     for filename in os.listdir(folder):
#         # Check if it's an image file
#         if os.path.isfile(os.path.join(folder, filename)) and filename.lower().endswith(
#             (".jpg", ".jpeg", ".png", ".bmp", ".tiff")
#         ):
#             # Generate new filename with sequence number
#             new_filename = prepend_zeros(filename, char_to_extract_num)
#             source = os.path.join(folder, filename)
#             destination = os.path.join(folder, new_filename)
#             # Rename the file
#             os.rename(source, destination)
#             print(f"{filename}  ->   {new_filename}")


# rename_images_in_folder(
#     r"C:\Workspace\Learning\Projects\Anuvad\python\sampleImages\manga\shinju_no_necktar\chapter_0085",
#     "-",
# )
# rename_images_in_folder(
#     r"C:\Workspace\Learning\Projects\Anuvad\python\sampleImages\manhua\my_harem_grew_so_large\chapter_0001",
#     ".",
# )

In [None]:
# import glob
# from PIL import Image

# image_paths = glob.iglob("sampleImages/manhua/*/*/*", recursive=True)
# # for image_path in image_paths:
# #     print(image_path)
# print(next(image_paths))
# img = Image.open(next(image_paths))
# print(img.mode)
# img = img.convert("RGB")
# print(img.mode)

In [4]:
import numpy as np

boxes = [
    [[131.0, 40.0], [178.0, 40.0], [178.0, 61.0], [131.0, 61.0]],
    [[131.0, 77.0], [182.0, 77.0], [182.0, 97.0], [131.0, 97.0]],
    [[214.0, 77.0], [309.0, 77.0], [309.0, 97.0], [214.0, 97.0]],
    [[102.0, 276.0], [404.0, 276.0], [404.0, 320.0], [102.0, 320.0]],
    [[110.0, 327.0], [402.0, 327.0], [402.0, 366.0], [110.0, 366.0]],

    [[103.0, 369.0], [404.0, 373.0], [404.0, 419.0], [102.0, 415.0]],
    [[108.0, 428.0], [324.0, 428.0], [324.0, 467.0], [108.0, 467.0]],

    [[455.0, 1439.0], [763.0, 1439.0], [763.0, 1488.0], [455.0, 1488.0]],
    [[174.0, 1462.0], [263.0, 1452.0], [286.0, 1647.0], [197.0, 1657.0]],
    [[456.0, 1488.0], [761.0, 1492.0], [760.0, 1536.0], [455.0, 1532.0]],
    [[458.0, 1536.0], [640.0, 1542.0], [638.0, 1588.0], [457.0, 1582.0]],
]



texts = [

    "免",
    "aoz",
    "nh.con",

    "我，夜孤楼，世",
    "上唯二能修炼纯",
    "阳功之人，慕仙",
    "门现任门主",
    "符剑双修，年纪",
    "-L",
    "轻轻就晋入陆地",
    "神仙境界",
]



scores = [
    0.9947852492332458,
    0.9692819714546204,
    0.9721117615699768,
    0.9939132928848267,

    0.9856699109077454,
    0.992316722869873,
    0.9826822280883789,

    0.9934597015380859,
    0.616097092628479,
    0.9820842742919922,

    0.999176561832428,

]


def calculate_centroid(box):
    x_coords = [point[0] for point in box]
    y_coords = [point[1] for point in box]
    centroid_x = sum(x_coords) / len(box)
    centroid_y = sum(y_coords) / len(box)
    return centroid_x, centroid_y


def are_boxes_close(box1, box2, threshold):
    centroid1 = calculate_centroid(box1)
    centroid2 = calculate_centroid(box2)
    distance = np.sqrt(
        (centroid1[0] - centroid2[0]) ** 2 + (centroid1[1] - centroid2[1]) ** 2
    )
    return distance < threshold


def merge_boxes(boxes, texts, threshold=100):
    merged_boxes = []
    merged_texts = []
    used = [False] * len(boxes)

    for i, box in enumerate(boxes):
        if used[i]:
            continue
        current_group = [box]
        current_texts = [texts[i]]
        used[i] = True
        for j in range(i + 1, len(boxes)):
            if used[j]:
                continue
            if are_boxes_close(box, boxes[j], threshold):
                current_group.append(boxes[j])
                current_texts.append(texts[j])
                used[j] = True

        if len(current_group) == 1:
            merged_boxes.append(current_group[0])
            merged_texts.append(current_texts[0])
        else:
            all_x = [point[0] for b in current_group for point in b]
            all_y = [point[1] for b in current_group for point in b]
            merged_box = [
                [min(all_x), min(all_y)],
                [max(all_x), min(all_y)],
                [max(all_x), max(all_y)],
                [min(all_x), max(all_y)],
            ]
            merged_boxes.append(merged_box)
            merged_texts.append(" ".join(current_texts))

    return merged_boxes, merged_texts


merged_boxes, merged_texts = merge_boxes(boxes, texts)
print("\n\nMerged Boxes:\n", merged_boxes, "\n\nMerged Texts:\n", merged_texts)



Merged Boxes:
 [[[131.0, 40.0], [182.0, 40.0], [182.0, 97.0], [131.0, 97.0]], [[214.0, 77.0], [309.0, 77.0], [309.0, 97.0], [214.0, 97.0]], [[102.0, 276.0], [404.0, 276.0], [404.0, 419.0], [102.0, 419.0]], [[108.0, 428.0], [324.0, 428.0], [324.0, 467.0], [108.0, 467.0]], [[455.0, 1439.0], [763.0, 1439.0], [763.0, 1536.0], [455.0, 1536.0]], [[174.0, 1462.0], [263.0, 1452.0], [286.0, 1647.0], [197.0, 1657.0]], [[458.0, 1536.0], [640.0, 1542.0], [638.0, 1588.0], [457.0, 1582.0]]] 

Merged Texts:
 ['免 aoz', 'nh.con', '我，夜孤楼，世 上唯二能修炼纯 阳功之人，慕仙', '门现任门主', '符剑双修，年纪 轻轻就晋入陆地', '-L', '神仙境界']
