In [1]:
import paddle
print("paddle:", paddle.__version__)
print("cuda available:", paddle.is_compiled_with_cuda())
print("device:", paddle.device.get_device())




paddle: 3.2.2
cuda available: True
device: gpu:0


In [2]:
from pathlib import Path

DATA_DIR = Path("train_data/rec")
TRAIN_GT = DATA_DIR / "rec_gt_train.txt"
VAL_GT   = DATA_DIR / "rec_gt_val.txt"

assert TRAIN_GT.exists() and VAL_GT.exists(), "ไม่เจอ rec_gt_train/val.txt"
assert (DATA_DIR/"train").exists() and (DATA_DIR/"val").exists(), "ไม่เจอโฟลเดอร์ train/val"

def read_gt(p):
    lines = p.read_text(encoding="utf-8").splitlines()
    pairs = []
    for i, line in enumerate(lines[:]):
        if "\t" not in line:
            raise ValueError(f"บรรทัดที่ {i} ไม่มี TAB (\\t): {line[:120]}")
        img_rel, text = line.split("\t", 1)
        img_path = DATA_DIR / img_rel
        if not img_path.exists():
            raise FileNotFoundError(f"ไม่เจอรูป: {img_path}")
        pairs.append((img_rel, text))
    return pairs

train_pairs = read_gt(TRAIN_GT)
val_pairs = read_gt(VAL_GT)

print("train lines:", len(train_pairs))
print("val lines:", len(val_pairs))
print("sample:", train_pairs[0])

# สร้าง dict จากข้อความทั้งหมด (ตัด space ออก แล้วใช้ use_space_char=True ตอนเทรน)
chars = set()
max_len = 0
for _, t in train_pairs + val_pairs:
    max_len = max(max_len, len(t))
    for ch in t:
        if ch != " ":
            chars.add(ch)

DICT_PATH = DATA_DIR / "dict_custom.txt"
DICT_PATH.write_text("".join([c + "\n" for c in sorted(chars, key=lambda x: ord(x))]), encoding="utf-8")

print("dict saved:", DICT_PATH)
print("charset_size:", len(chars))
print("max_text_length in labels:", max_len)


train lines: 50
val lines: 128
sample: ('train/train_000000.jpg', 'TOTAL ฿ 1,234.50')
dict saved: train_data/rec/dict_custom.txt
charset_size: 109
max_text_length in labels: 84


In [3]:
import yaml
from pathlib import Path

PRETRAIN = Path("pretrain_models/th_PP-OCRv5_mobile_rec_pretrained.pdparams")
assert PRETRAIN.exists(), f"ไม่เจอไฟล์ pretrained: {PRETRAIN}"

# หา config ไทยอัตโนมัติ (กัน path ไม่ตรงเวอร์ชัน)
candidates = list(Path("configs").rglob("th_PP-OCRv5_mobile_rec*.y*ml"))
assert candidates, "หา config th_PP-OCRv5_mobile_rec ไม่เจอใน configs/"
BASE_CFG = candidates[0]
print("base config:", BASE_CFG)

cfg = yaml.safe_load(BASE_CFG.read_text(encoding="utf-8"))

# ---------- Global ----------
cfg.setdefault("Global", {})
cfg["Global"]["pretrained_model"] = str(PRETRAIN)
cfg["Global"]["save_model_dir"] = "./output/th_rec_finetune"
cfg["Global"]["epoch_num"] = 20
cfg["Global"]["use_space_char"] = True
# cfg["Global"]["character_dict_path"] = str(DICT_PATH)   # ใช้ dict ที่สร้างจาก label
cfg["Global"]["print_batch_step"] = 50

# ถ้า label ยาวกว่าค่าเดิมมาก ๆ ให้ปรับ max_text_length
# (บาง config เก็บใน Train/Eval transforms แต่หลายอันอยู่ใน Global/หรือ PostProcess)
# ใส่ไว้ใน Global เผื่อ config รองรับ:
cfg["Global"]["max_text_length"] = int(max_len) + 5
cfg["Global"]["use_gpu"] = True
# ---------- Dataset paths ----------
cfg.setdefault("Train", {}).setdefault("dataset", {})
cfg["Train"]["dataset"]["data_dir"] = "./train_data/rec"
cfg["Train"]["dataset"]["label_file_list"] = ["./train_data/rec/rec_gt_train.txt"]

cfg.setdefault("Eval", {}).setdefault("dataset", {})
cfg["Eval"]["dataset"]["data_dir"] = "./train_data/rec"
cfg["Eval"]["dataset"]["label_file_list"] = ["./train_data/rec/rec_gt_val.txt"]
cfg["Eval"]["loader"]["batch_size_per_card"] = 1

# ---------- Learning rate (fine-tune แนะนำลด LR) ----------
cfg.setdefault("Optimizer", {}).setdefault("lr", {})
if isinstance(cfg["Optimizer"]["lr"], dict):
    cfg["Optimizer"]["lr"]["learning_rate"] = 1e-4

OUT_CFG = Path("configs/rec/my_th_rec_finetune.yaml")
OUT_CFG.parent.mkdir(parents=True, exist_ok=True)
OUT_CFG.write_text(yaml.safe_dump(cfg, sort_keys=False, allow_unicode=True), encoding="utf-8")
print("saved:", OUT_CFG)


base config: configs/rec/PP-OCRv5/multi_language/th_PP-OCRv5_mobile_rec.yaml
saved: configs/rec/my_th_rec_finetune.yaml


In [4]:
import os, sys, subprocess


cmd = [sys.executable, "-m", "paddle.distributed.launch", "tools/train.py",
       "-c", "configs/rec/my_th_rec_finetune.yaml"]
p = subprocess.run(cmd, text=True, capture_output=True)

print("returncode =", p.returncode)
print("\n===== STDOUT (tail) =====")
print(p.stdout[-4000:])
print("\n===== STDERR (tail) =====")
print(p.stderr[-4000:])


returncode = 0

===== STDOUT (tail) =====
34 MB
[2025/12/17 10:27:07] ppocr INFO: save model in ./output/th_rec_finetune/latest
[2025/12/17 10:27:12] ppocr INFO: epoch: [13/20], global_step: 39, lr: 0.000088, acc: 0.120000, norm_edit_dis: 0.668568, CTCLoss: 19.720331, NRTRLoss: 1.145633, loss: 20.867764, avg_reader_cost: 0.03702 s, avg_batch_cost: 0.11233 s, avg_samples: 4.0, ips: 35.61085 samples/s, eta: 0:00:39, max_mem_reserved: 13529 MB, max_mem_allocated: 13234 MB
[2025/12/17 10:27:13] ppocr INFO: save model in ./output/th_rec_finetune/latest
[2025/12/17 10:27:18] ppocr INFO: epoch: [14/20], global_step: 42, lr: 0.000082, acc: 0.120000, norm_edit_dis: 0.669518, CTCLoss: 18.214540, NRTRLoss: 1.128917, loss: 19.333504, avg_reader_cost: 0.04475 s, avg_batch_cost: 0.12087 s, avg_samples: 4.0, ips: 33.09323 samples/s, eta: 0:00:34, max_mem_reserved: 13529 MB, max_mem_allocated: 13234 MB
[2025/12/17 10:27:19] ppocr INFO: save model in ./output/th_rec_finetune/latest
[2025/12/17 10:27:23

In [5]:
from pathlib import Path
import yaml

cfg = yaml.safe_load(Path("configs/rec/my_th_rec_finetune.yaml").read_text(encoding="utf-8"))
dict_path = Path(cfg["Global"]["character_dict_path"])
use_space = cfg["Global"].get("use_space_char", False)

n_lines = len(dict_path.read_text(encoding="utf-8").splitlines())
print("dict_path:", dict_path)
print("dict lines:", n_lines)
print("use_space_char:", use_space)
print("effective charset size (incl space if enabled):", n_lines + (1 if use_space else 0))


dict_path: ppocr/utils/dict/ppocrv5_th_dict.txt
dict lines: 524
use_space_char: True
effective charset size (incl space if enabled): 525


In [6]:
import sys, subprocess
best = "./output/th_rec_finetune/iter_epoch_20.pdparams"
cmd = [sys.executable, "tools/eval.py", "-c", "configs/rec/my_th_rec_finetune.yaml",
       "-o", f"Global.pretrained_model={best}"]
with open("output/th_rec_finetune/eval.log", "w", encoding="utf-8") as f:
    subprocess.run(cmd, stdout=f, stderr=subprocess.STDOUT, text=True, check=True)
p = subprocess.run(cmd, text=True, capture_output=True)

print("returncode =", p.returncode)
print("\n===== STDOUT (tail) =====")
print(p.stdout[-4000:])
print("\n===== STDERR (tail) =====")
print(p.stderr[-4000:])


returncode = 0

===== STDOUT (tail) =====
5/12/17 10:29:13] ppocr INFO:     loss_config_list : 
[2025/12/17 10:29:13] ppocr INFO:         CTCLoss : None
[2025/12/17 10:29:13] ppocr INFO:         NRTRLoss : None
[2025/12/17 10:29:13] ppocr INFO:     name : MultiLoss
[2025/12/17 10:29:13] ppocr INFO: Metric : 
[2025/12/17 10:29:13] ppocr INFO:     ignore_space : False
[2025/12/17 10:29:13] ppocr INFO:     main_indicator : acc
[2025/12/17 10:29:13] ppocr INFO:     name : RecMetric
[2025/12/17 10:29:13] ppocr INFO: Optimizer : 
[2025/12/17 10:29:13] ppocr INFO:     beta1 : 0.9
[2025/12/17 10:29:13] ppocr INFO:     beta2 : 0.999
[2025/12/17 10:29:13] ppocr INFO:     lr : 
[2025/12/17 10:29:13] ppocr INFO:         learning_rate : 0.0001
[2025/12/17 10:29:13] ppocr INFO:         name : Cosine
[2025/12/17 10:29:13] ppocr INFO:         warmup_epoch : 5
[2025/12/17 10:29:13] ppocr INFO:     name : Adam
[2025/12/17 10:29:13] ppocr INFO:     regularizer : 
[2025/12/17 10:29:13] ppocr INFO:        

In [7]:
import os, sys, subprocess

best = r"./output/th_rec_finetune/iter_epoch_20.pdparams"

cmd = [
    sys.executable, "tools/export_model.py",
    "-c", "configs/rec/my_th_rec_finetune.yaml",
    "-o",
    f"Global.pretrained_model={best}",
    "Global.save_inference_dir=./inference/th_rec_finetune"]


p = subprocess.run(cmd, text=True ,capture_output=True)

print("returncode =", p.returncode)
print("\n===== STDOUT (tail) =====")
print(p.stdout[-4000:])
print("\n===== STDERR (tail) =====")
print(p.stderr[-4000:])


returncode = 0

===== STDOUT (tail) =====
Skipping import of the encryption module.
[2025/12/17 10:33:13] ppocr INFO: load pretrain successful from ./output/th_rec_finetune/iter_epoch_20
[2025/12/17 10:33:13] ppocr INFO: Export inference config file to ./inference/th_rec_finetune/inference.yml
Skipping import of the encryption module
[2025/12/17 10:33:16] ppocr INFO: inference model is saved to ./inference/th_rec_finetune/inference


===== STDERR (tail) =====
W1217 10:33:12.769980 15205 gpu_resources.cc:114] Please NOTE: device: 0, GPU Compute Capability: 7.5, Driver API Version: 13.0, Runtime API Version: 12.6
W1217 10:33:14.638943 15205 eager_utils.cc:3441] Paddle static graph(PIR) not support input out tensor for now!!!!!

