Load PARSeq small v1.0 model from Pytorch Hub:

In [None]:
import torch

torch.hub.list('baudm/parseq')
model = torch.hub.load('baudm/parseq', 'parseq', pretrained=True)

Check WAR (word accuracy rate) and CER (character error rate) on 1000 samples from the WordArt dataset:

In [None]:
from torchvision import transforms
from PIL import Image
import editdistance

img_transform = transforms.Compose([
    transforms.Resize((32, 128)),  # parseq small/tiny
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])

dataset_path = "dataset_wordart_test/"
label_file = dataset_path + "labels.txt"

total_images = 0
war_correct_predictions = 0
cer_error = 0
cer_char_count = 0

limit = 1000

# open label file
with open(label_file, "r", encoding="utf-8") as f:
    for line in f:
        # stop after N inferences
        if limit <= 0:
            break

        parts = line.strip().split()
        # skip invalid lines
        if len(parts) < 2:
            continue

        img_path = dataset_path + parts[0]
        ground_truth = " ".join(parts[1:])

        try:
            # get image
            img = Image.open(img_path).convert("RGB")
            img = img_transform(img).unsqueeze(0)

            # inference
            logits = model(img)
            pred = logits.softmax(-1)
            predicted_label, _ = model.tokenizer.decode(pred)

            # compare prediction with gt
            if predicted_label[0].strip().lower() == ground_truth.strip().lower():
                war_correct_predictions += 1
            cer_error += editdistance.eval(predicted_label[0].strip().lower(), ground_truth.strip().lower())
            cer_char_count += len(ground_truth.strip())
            total_images += 1

        except Exception as e:
            print(f"Error processing {img_path}: {e}")

        limit -= 1


war = (war_correct_predictions / total_images * 100) if total_images != 0 else 0
cer = (cer_error / cer_char_count) if cer_char_count != 0 else 0
print(f"\nWord accuracy rate: {war:.4f} %")
print(f"Character error rate: {cer:.4f}")


Word accuracy rate: 79.9000 %
Character error rate: 0.1008
