In [1]:
from jiwer import wer, cer
import pandas as pd
import re
from tqdm import tqdm

In [2]:
def pmr(gt, pred):
    gt_words = gt.split()
    pred_words = pred.split()
    length = min(len(gt_words), len(pred_words))
    matches = sum(1 for i in range(length) if gt_words[i] == pred_words[i])
    
    if(length == 0): 
        print('length 0')
        return 0
    return matches / length 

In [3]:
YOLO_DIR = '../../yolo/yolo_res'
GT_DIR = '../../data/raw/ground_truth'
BASELINE_DIR = '../../data/raw/ocr_result'

In [4]:
with open('../eval_list.txt', 'r') as file:
    content = file.read()

test_files = []
for file in content.split('\n'):
    test_files.append(file.split('.')[0])
len(test_files)

100

In [5]:
# baseline
baseline_cer, baseline_wer, baseline_pmr  = [], [], []
# yolo
yolo_cer, yolo_wer, yolo_pmr  = [], [], []

In [6]:
def read_file(path):
    try:
        return open(path, 'r', encoding='utf-8').read()
    except UnicodeDecodeError:
        return open(path, 'r', encoding='utf-8', errors='ignore').read()

In [7]:
for filename in tqdm(test_files):
    baseline = read_file(f'{BASELINE_DIR}/ocr_{filename}.txt')
    gt = read_file(f'{GT_DIR}/gt_{filename}.txt')
    yolo_str = read_file(f'{YOLO_DIR}/res_{filename}.txt')

    baseline = re.sub(r"\s+", " ", baseline.replace("\n", " ")).strip().lower()
    gt = re.sub(r"\s+", " ", gt.replace("\n", " ")).strip().lower()
    yolo_str = re.sub(r"\s+", " ", yolo_str.replace("\n", " ")).strip().lower()

    if(len(gt) == 0): print(filename)

    baseline_wer.append(wer(gt, baseline))
    baseline_cer.append(cer(gt, baseline))
    baseline_pmr.append(pmr(gt, baseline))

    yolo_wer.append(wer(gt, yolo_str))
    yolo_cer.append(cer(gt, yolo_str))
    yolo_pmr.append(pmr(gt, yolo_str))
    

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:01<00:00, 89.53it/s]


In [9]:
data = {
  "name": test_files,
  "baseline_wer": baseline_wer,
  "baseline_cer" : baseline_cer,
  "baseline_pmr" : baseline_pmr,
  "yolo_wer": yolo_wer,
  "yolo_cer" : yolo_cer,
  "yolo_pmr" : yolo_pmr,
}

df = pd.DataFrame(data)
df.head()

Unnamed: 0,name,baseline_wer,baseline_cer,baseline_pmr,yolo_wer,yolo_cer,yolo_pmr
0,522,0.16129,0.113295,0.290323,0.056452,0.030058,0.297521
1,479,21.230769,16.631868,0.0,1.0,0.813187,0.0
2,528,1.166667,0.885895,0.362745,0.127451,0.123613,0.677083
3,365,0.362694,0.294331,0.005181,0.056995,0.059593,0.005181
4,478,1.12782,1.413115,0.0,0.007519,0.001093,0.992481


In [10]:
df.describe()

Unnamed: 0,baseline_wer,baseline_cer,baseline_pmr,yolo_wer,yolo_cer,yolo_pmr
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.432745,0.319161,0.172997,0.16843,0.120514,0.394061
std,2.128334,1.668447,0.28395,0.308024,0.248285,0.405142
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.042914,0.023549,0.007589,0.014489,0.002504,0.015687
50%,0.078184,0.041788,0.017484,0.043764,0.014668,0.232961
75%,0.21019,0.163096,0.241223,0.119462,0.070707,0.877111
max,21.230769,16.631868,1.0,1.0,0.990772,1.0


In [15]:
df.to_csv('yolo_final_result.csv',index=False)

In [12]:
from scipy.stats import ttest_rel

print(ttest_rel(df["baseline_wer"], df["yolo_wer"]).pvalue)
print(ttest_rel(df["baseline_cer"], df["yolo_cer"]).pvalue)
print(ttest_rel(df["baseline_pmr"], df["yolo_pmr"]).pvalue)

0.19868446859056393
0.21986245013932545
6.448513293024843e-08


In [13]:
from scipy.stats import wilcoxon

print(wilcoxon(df["baseline_wer"], df["yolo_wer"]).pvalue)
print(wilcoxon(df["baseline_cer"], df["yolo_cer"]).pvalue)
print(wilcoxon(df["baseline_pmr"], df["yolo_pmr"]).pvalue)

7.865480512508471e-11
5.083730236452544e-09
3.3423661308153497e-07


In [14]:
import numpy as np

diff = df["baseline_wer"] - df["yolo_wer"]
cohen_d = diff.mean() / diff.std()
cohen_d

np.float64(0.1293975442631026)