In [1]:
import sys
sys.path.append("./EasyOCR")

In [2]:
import torch
from torchvision import transforms
from PIL import Image
from collections import OrderedDict
import importlib
import numpy as np
import torch.nn.functional as F

from easyocr.utils import CTCLabelConverter
from easyocr.config import recognition_models

# === Step 1: Load Your Custom Character Set from File ===
def load_character_set(file_path='./models/synth/custom_char.txt'):
    """Loads the character set from a text file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            # .strip() removes any potential leading/trailing whitespace
            characters = f.read()
        return characters
    except FileNotFoundError:
        print(f"⚠️ Error: Character set file not found at '{file_path}'")
        return None
    
character = load_character_set()

# === Step 2: Define Model Path and Parameters ===
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_config = recognition_models['gen1']['thai_g1']
model_path = f'./models/synth/{model_config["filename"]}'
# model_path = f'./models/synth/checkpoint_10.pth'

# Initialize converter with your custom character set
converter = CTCLabelConverter(character)
num_class = len(converter.character)
ignore_idx = []

# Network parameters must match your trained model's architecture
network_params = {
    'input_channel': 1,
    'output_channel': 512,
    'hidden_size': 512,
    'num_class': num_class  # Pass the correct number of classes
}


# === Step 3: Build the Model and Load Your Weights ===
model_pkg = importlib.import_module("easyocr.model.model")
model = model_pkg.Model(**network_params)

# Load the saved weights
state_dict = torch.load(model_path, map_location=device)

# Remove 'module.' prefix if it exists (from DataParallel training)
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k[7:] if k.startswith('module.') else k
    new_state_dict[name] = v

# Load the weights into the model
# Use strict=True because the model architecture and state_dict should match perfectly.
model.load_state_dict(new_state_dict, strict=True)

# Set the model to evaluation mode
model = model.to(device)
model.eval()

print("✅ Custom model loaded successfully for inference!")

⚠️ Error: Character set file not found at './models/synth/custom_char.txt'


TypeError: 'NoneType' object is not iterable

In [3]:
# === Step 4: Preprocessing ===
def preprocess_image(pil_img, contrast_factor=1.0):
    transform = transforms.Compose([
        transforms.Resize((64, 600)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    return transform(pil_img).unsqueeze(0).to(device)

def custom_mean(probs):
    return float(torch.mean(probs).item()) if len(probs) > 0 else 0.0

# === Step 5: Prediction Function (EasyOCR-style) ===
def predict_text(image_path, contrast_ths=0.1, adjust_contrast=0.5, decoder='greedy', beamWidth=5):
    pil_img = Image.open(image_path).convert('L')
    return _predict_with_contrast_retry(pil_img, contrast_ths, adjust_contrast, decoder, beamWidth)

def _predict_with_contrast_retry(pil_img, contrast_ths, adjust_contrast, decoder, beamWidth):
    decoded_text, confidence = _predict(pil_img, decoder, beamWidth, contrast_factor=1.0)
    if confidence < contrast_ths:
        decoded_text, confidence = _predict(pil_img, decoder, beamWidth, contrast_factor=adjust_contrast)
    return decoded_text, confidence

def _predict(pil_img, decoder='greedy', beamWidth=5, contrast_factor=1.0):
    image_tensor = preprocess_image(pil_img, contrast_factor)
    batch_size = image_tensor.size(0)
    batch_max_length = 25

    with torch.no_grad():
        text_for_pred = torch.LongTensor(batch_size, batch_max_length + 1).fill_(0).to(device)
        preds = model(image_tensor, text_for_pred)  # [B, C, T]
        preds_size = torch.IntTensor([preds.size(1)] * batch_size)

        probs = F.softmax(preds, dim=2)
        probs[:, :, ignore_idx] = 0.0  # filter out ignored characters
        norm = probs.sum(dim=2, keepdim=True)
        probs = probs / norm

        if decoder == 'greedy':
            _, preds_index = probs.max(2)
            preds_index_flat = preds_index.view(-1).cpu().numpy()
            preds_str = converter.decode_greedy(preds_index_flat, preds_size.cpu())[0]
        elif decoder == 'beamsearch':
            preds_np = probs.cpu().numpy()
            preds_str = converter.decode_beamsearch(preds_np, beamWidth=beamWidth)[0]
        elif decoder == 'wordbeamsearch':
            preds_np = probs.cpu().numpy()
            preds_str = converter.decode_wordbeamsearch(preds_np, beamWidth=beamWidth)[0]

        # Confidence calculation
        values, indices = probs.max(2)
        mask = indices != 0  # ignore blank tokens
        filtered = [v[m] for v, m in zip(values, mask)]
        confidence = custom_mean(filtered[0]) if filtered else 0.0

    return preds_str, confidence

In [4]:
# === Step 6: Run Prediction ===
image_path = './Receipts/test_tiny.jpg'
text, conf = predict_text(image_path)
print(f"📝 Predicted Text: {text}")
print(f"📈 Confidence: {conf:.2f}")

📝 Predicted Text: นหส์ทแปะย่โปรงวินญุจศจหนนธิททาๆหลือชู้
📈 Confidence: 0.74


In [5]:
image_path = './Receipts/clear_sample.jpg'
text, conf = predict_text(image_path)
print(f"📝 Predicted Text: {text}")
print(f"📈 Confidence: {conf:.2f}")

📝 Predicted Text: บริษัทโฟลว์แอคเคาท์ทดสอบ
📈 Confidence: 0.96


In [6]:
import re

def keep_thai_eng_num(text):
    # Thai: \u0E00-\u0E7F
    # English letters: a-zA-Z
    # Digits: 0-9
    return ''.join(re.findall(r'[\u0E00-\u0E7Fa-zA-Z0-9]+', text))


raw_text = text
clean_text = keep_thai_eng_num(raw_text)
print(f"📝 Clean Thai text: {clean_text}")

📝 Clean Thai text: บริษัทโฟลว์แอคเคาท์ทดสอบ
