# Define data/result directory

In [1]:
import os

data_folder = "../data"
result_folder = "../results"
manga_list = "vi/Ruri Dragon (Oneshot)/Ch. None"  # Will be changed to list later

# Ruri Dragon
manga_folder = os.path.join(data_folder, manga_list)

individual_result_folder = os.path.join(result_folder, manga_list)
json_output_dir = os.path.join(individual_result_folder, "json_results")
result_image_output_dir = os.path.join(individual_result_folder, "image_results")

cut_texts_dir = os.path.join(individual_result_folder, "cut_texts")

raw_images = os.listdir(manga_folder)
json_files = os.listdir(json_output_dir)
easy_ocr_viet_ocr_result_dir = os.path.join(
    individual_result_folder, "easy_ocr_viet_ocr_result"
)
os.makedirs(easy_ocr_viet_ocr_result_dir, exist_ok=True)

# Initialize models

In [2]:
import easyocr
from vietocr.tool.config import Cfg

# Initialize EasyOCR (Vietnamese)
reader = easyocr.Reader(["vi"])

config = Cfg.load_config_from_name("vgg_transformer")  # Load YAML config file
from vietocr.tool.predictor import Predictor

config["weights"] = "../models/viet_ocr/pretrained_weight/vgg_transformer.pth"
# config['weights'] = "../models/viet_ocr/custom_weight/transformerocr-1.pth"
config["cnn"]["pretrained"] = False
detector = Predictor(config)



In [None]:
# Experimental function code, havent test yet

import os
import json
from PIL import Image


def get_transcript_from_image(img_path, reader, detector):
    """
    Run OCR pipeline on a single cropped image:
    1. Use EasyOCR to detect line regions
    2. Use VietOCR to recognize each cropped line
    3. Return merged transcript (string)
    """
    results = reader.readtext(img_path, detail=1, paragraph=False)

    if not results:
        return ""  # no text detected

    line_texts = []
    for coords, _, _ in results:  # coords = [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
        x_min = min([pt[0] for pt in coords])
        y_min = min([pt[1] for pt in coords])
        x_max = max([pt[0] for pt in coords])
        y_max = max([pt[1] for pt in coords])

        img = Image.open(img_path).convert("RGB")
        cropped = img.crop((x_min, y_min, x_max, y_max))

        try:
            text_viet = detector.predict(cropped)
        except Exception as e:
            print(f"‚ö†Ô∏è VietOCR failed on {img_path}: {e}")
            text_viet = ""

        if text_viet.strip():
            line_texts.append(text_viet)

    return " ".join(line_texts)


def process_json_file(
    json_file,
    json_output_dir,
    cut_texts_dir,
    easy_ocr_viet_ocr_result_dir,
    reader,
    detector,
):
    """
    Process one JSON file:
    - Load original annotations
    - Replace essential text OCR results with EasyOCR+VietOCR pipeline output
    - Save updated JSON to result folder
    """
    base_name = os.path.splitext(json_file)[0]
    json_path = os.path.join(json_output_dir, json_file)
    cut_page_dir = os.path.join(cut_texts_dir, base_name)

    if not os.path.exists(cut_page_dir):
        print(f"‚ö†Ô∏è No cut_texts found for {json_file}, skipping...")
        return

    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    new_ocr = []
    for idx, (bbox, is_essential) in enumerate(
        zip(data["texts"], data["is_essential_text"])
    ):
        if not is_essential:
            new_ocr.append(data["ocr"][idx])
            continue

        cut_img_path = os.path.join(cut_page_dir, f"{base_name}_{idx:03}.png")
        if not os.path.exists(cut_img_path):
            print(f"‚ö†Ô∏è Missing cut image {cut_img_path}, keeping original OCR.")
            new_ocr.append(data["ocr"][idx])
            continue

        try:
            vi_text = get_transcript_from_image(cut_img_path, reader, detector)
            if not vi_text:
                vi_text = data["ocr"][idx]
        except Exception as e:
            print(f"‚ùå OCR pipeline failed for {cut_img_path}: {e}")
            vi_text = data["ocr"][idx]

        new_ocr.append(vi_text)

    data["ocr"] = new_ocr

    out_path = os.path.join(easy_ocr_viet_ocr_result_dir, json_file)
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

    print(f"‚úÖ Saved result for {json_file} ‚Üí {out_path}")


for json_file in json_files:
    if json_file.endswith(".json"):
        process_json_file(
            json_file,
            json_output_dir,
            cut_texts_dir,
            easy_ocr_viet_ocr_result_dir,
            reader,
            detector,
        )

‚úÖ Saved result for 04.json ‚Üí ../results\vi/Ruri Dragon (Oneshot)/Ch. None\easy_ocr_viet_ocr_result\04.json


Individual test

In [9]:
import os

test_image_dir = "../data/vi/test_data/multiple_line"
test_images = os.listdir(test_image_dir)

for test_image in test_images:
    img_path = os.path.join(test_image_dir, test_image)

    # üîÑ Reuse the function
    final_text = get_transcript_from_image(img_path, reader, detector)

    print(f"{test_image}: {final_text if final_text else '(empty)'}")

03_006.png: ·ª™, TR√îNG CON GI·ªêNG H·ªÜT B·ªê CON ƒê·∫§Y
04_001.png: V√å CON L√Ä CON LAI GI·ªÆA NG∆Ø·ªúI V√Ä R·ªíNG.
05_004.png: B√åNH Tƒ®NH N√ÉO, ƒê·∫¶U PH·∫¢I N√ì S·∫º GI·∫æT CON HAY G√å ƒê√ÇU
vnexpress.png: M·ªôt ph·∫ßn nguy√™n nh√¢n khi·∫øn M·ªπ kh√¥ng nƒÉm ƒë∆∞·ª£c v·ª• Israel t·∫≠p k√≠ch l√£nh th·ªï Qatar l√† do t·∫≠p trung gi√°m s√°t n∆°i kh√°c, theo quan ch·ª©c CENTCOM. "ƒê√≤n t·∫•n c√¥ng c·ªßa Israel nh·∫±m v√†o m·ª•c ti√™u Hamas ·ªü Qatar ho√†n to√†n kh√¥ng c√≥ d·∫•u hi·ªáu hay c·∫£nh b√°o tr∆∞·ªõc, v√¨ to√†n b·ªô ph∆∞∆°ng ti·ªán gi√°m s√°t v√† s·ª± ch√∫ √Ω c·ªßa ch√∫ng t√¥i ƒë·ªÅu kh√¥ng n·∫±m ·ªü ƒë√≥. Kh√¥ng ai nghƒ© ƒëi·ªÅu n√†y s·∫Ω di·ªÖn ra", t∆∞·ªõng Derek France, ch·ªâ huy l·ª±c l∆∞·ª£ng kh√¥ng qu√¢n thu·ªôc B·ªô t∆∞ l·ªánh Trung t√¢m M·ªπ (CENTCOM), cho bi·∫øt h√¥m 24/9. CENTCOM l√† ƒë∆°n v·ªã ƒë·∫∑c tr√°ch to√†n b·ªô ho·∫°t ƒë·ªông c·ªßa qu√¢n ƒë·ªôi M·ªπ t·∫°i Trung ƒê√¥ng. Ph√°t bi·ªÉu ƒë∆∞·ª£c t∆∞·ªõng France ƒë∆∞a ra trong bu·ªïi th·∫£o lu·∫≠n v·ªÅ h·ªá qu·∫£ 

In [12]:
import os

test_image_dir = "../data/vi/test_data/one_line"
test_images = os.listdir(test_image_dir)

for test_image in test_images:
    img_path = os.path.join(test_image_dir, test_image)

    # üîÑ Reuse the function
    final_text = get_transcript_from_image(img_path, reader, detector)

    print(f"{test_image}: {final_text if final_text else '(empty)'}")

04_001.png: V√å CON L√Ä
05_004.png: B√åNH Tƒ®NH
