In [1]:
from jiwer import wer, cer
import pandas as pd
import re
from tqdm import tqdm
from bert_score import score
import torch
from transformers import AutoTokenizer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import textdistance

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def pmr(gt, pred):
    gt_words = gt.split()
    pred_words = pred.split()
    length = min(len(gt_words), len(pred_words))
    matches = sum(1 for i in range(length) if gt_words[i] == pred_words[i])
    
    if(length == 0): 
        print('length 0')
        return 0
    return matches / length 

def compute_jaro_winkler(reference, prediction):
    return textdistance.jaro_winkler(reference, prediction)

In [3]:
def compute_bleu(reference, prediction):
    ref_tokens = [reference.split()]
    pred_tokens = prediction.split()
    smoothie = SmoothingFunction().method4
    return sentence_bleu(ref_tokens, pred_tokens, smoothing_function=smoothie)

def compute_cosine_similarity(reference, prediction):
    vectorizer = TfidfVectorizer().fit([reference, prediction])
    vectors = vectorizer.transform([reference, prediction])
    return cosine_similarity(vectors[0], vectors[1])[0][0]

In [4]:
tokenizer_id = "indobenchmark/indobert-large-p1"
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)

def truncate_to_512(text):
    tokens = tokenizer.encode(text, add_special_tokens=True)
    if len(tokens) > 512:
        tokens = tokens[:511] + [tokenizer.sep_token_id]  
    return tokenizer.decode(tokens, skip_special_tokens=True)

def compute_indobert_large_score(refs, preds, batch_size=8):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    refs = [truncate_to_512(r) for r in refs]
    preds = [truncate_to_512(p) for p in preds]

    P, R, F1 = score(
        preds,
        refs,
        model_type=tokenizer_id,
        num_layers=24,
        lang="id",
        device=device,
        batch_size=batch_size,
        verbose=False,
    )
    return float(P.mean()), float(R.mean()), float(F1.mean())

In [5]:
YOLO_DIR = '../../yolo/yolo_res'
GT_DIR = '../../data/raw/ground_truth'
BASELINE_DIR = '../../data/raw/ocr_result'

In [6]:
with open('../eval_list.txt', 'r') as file:
    content = file.read()

test_files = []
for file in content.split('\n'):
    test_files.append(file.split('.')[0])
len(test_files)

100

In [7]:
baseline_cer, baseline_wer, baseline_pmr, baseline_bleu, baseline_cosine, baseline_indobert, baseline_jw  = [], [], [], [], [], [], []
yolo_cer, yolo_wer, yolo_pmr, yolo_bleu, yolo_cosine, yolo_indobert, yolo_jw  = [], [], [], [], [], [], []

In [8]:
def read_file(path):
    try:
        return open(path, 'r', encoding='utf-8').read()
    except UnicodeDecodeError:
        return open(path, 'r', encoding='utf-8', errors='ignore').read()

def clean_text(x):
    return re.sub(r"\s+", " ", x.replace("\n", " ")).strip().lower()

In [9]:
for filename in tqdm(test_files):
    baseline = clean_text(read_file(f'{BASELINE_DIR}/ocr_{filename}.txt'))
    gt = clean_text(read_file(f'{GT_DIR}/gt_{filename}.txt'))
    yolo = clean_text(read_file(f'{YOLO_DIR}/res_{filename}.txt'))

    if(len(gt) == 0): print(filename)

    baseline_wer.append(wer(gt, baseline))
    baseline_cer.append(cer(gt, baseline))
    baseline_pmr.append(pmr(gt, baseline))
    baseline_bleu.append(compute_bleu(gt, baseline))
    baseline_cosine.append(compute_cosine_similarity(gt, baseline))
    baseline_indobert.append(compute_indobert_large_score([gt],[baseline]))
    baseline_jw.append(compute_jaro_winkler(gt, baseline))

    yolo_wer.append(wer(gt, yolo))
    yolo_cer.append(cer(gt, yolo))
    yolo_pmr.append(pmr(gt, yolo))
    yolo_bleu.append(compute_bleu(gt, yolo))
    yolo_cosine.append(compute_cosine_similarity(gt, yolo))
    yolo_indobert.append(compute_indobert_large_score([gt],[yolo]))
    yolo_jw.append(compute_jaro_winkler(gt, yolo))
    

100%|██████████| 100/100 [42:39<00:00, 25.60s/it]


In [10]:
data = {
  "name": test_files,
  "baseline_wer": baseline_wer,
  "baseline_cer" : baseline_cer,
  "baseline_pmr" : baseline_pmr,
  "baseline_bleu": baseline_bleu,
  "baseline_cosine" : baseline_cosine,
  "baseline_indobert" : baseline_indobert,
  "baseline_jw" : baseline_jw,

  "yolo_wer": yolo_wer,
  "yolo_cer" : yolo_cer,
  "yolo_pmr" : yolo_pmr,
  "yolo_bleu": yolo_bleu,
  "yolo_cosine" : yolo_cosine,
  "yolo_indobert" : yolo_indobert,
  "yolo_jw" : yolo_jw,
}

df = pd.DataFrame(data)
df.head()

Unnamed: 0,name,baseline_wer,baseline_cer,baseline_pmr,baseline_bleu,baseline_cosine,baseline_indobert,baseline_jw,yolo_wer,yolo_cer,yolo_pmr,yolo_bleu,yolo_cosine,yolo_indobert,yolo_jw
0,522,0.16129,0.113295,0.290323,0.838709,0.928364,"(0.9131332635879517, 0.9442889094352722, 0.928...",0.920078,0.056452,0.030058,0.297521,0.919825,0.978303,"(0.9716838002204895, 0.9828124642372131, 0.977...",0.928412
1,479,21.230769,16.631868,0.0,0.001806,0.012009,"(0.3306380808353424, 0.42230814695358276, 0.37...",0.513429,1.0,0.813187,0.0,0.0,0.0,"(0.36276501417160034, 0.36647772789001465, 0.3...",0.596005
2,528,1.166667,0.885895,0.362745,0.407504,0.709366,"(0.5526434183120728, 0.788463294506073, 0.6498...",0.841161,0.127451,0.123613,0.677083,0.839401,0.938608,"(0.9472967386245728, 0.9506863355636597, 0.948...",0.916212
3,365,0.362694,0.294331,0.005181,0.710469,0.9312,"(0.8656865954399109, 0.9295729398727417, 0.896...",0.771601,0.056995,0.059593,0.005181,0.928273,0.987054,"(0.9405235052108765, 0.9671791791915894, 0.953...",0.831194
4,478,1.12782,1.413115,0.0,0.448918,0.62955,"(0.6963430047035217, 0.7169560790061951, 0.706...",0.642682,0.007519,0.001093,0.992481,0.980877,0.995194,"(0.9955625534057617, 0.9964922666549683, 0.996...",0.991904


In [12]:
df[['baseline_indobert_P', 
    'baseline_indobert_R', 
    'baseline_indobert_F1']] = pd.DataFrame(df['baseline_indobert'].tolist(), index=df.index)

df[['yolo_indobert_P', 
    'yolo_indobert_R', 
    'yolo_indobert_F1']] = pd.DataFrame(df['yolo_indobert'].tolist(), index=df.index)

df.head()

Unnamed: 0,name,baseline_wer,baseline_cer,baseline_pmr,baseline_bleu,baseline_cosine,baseline_indobert,baseline_jw,yolo_wer,yolo_cer,...,yolo_bleu,yolo_cosine,yolo_indobert,yolo_jw,baseline_indobert_P,baseline_indobert_R,baseline_indobert_F1,yolo_indobert_P,yolo_indobert_R,yolo_indobert_F1
0,522,0.16129,0.113295,0.290323,0.838709,0.928364,"(0.9131332635879517, 0.9442889094352722, 0.928...",0.920078,0.056452,0.030058,...,0.919825,0.978303,"(0.9716838002204895, 0.9828124642372131, 0.977...",0.928412,0.913133,0.944289,0.92845,0.971684,0.982812,0.977216
1,479,21.230769,16.631868,0.0,0.001806,0.012009,"(0.3306380808353424, 0.42230814695358276, 0.37...",0.513429,1.0,0.813187,...,0.0,0.0,"(0.36276501417160034, 0.36647772789001465, 0.3...",0.596005,0.330638,0.422308,0.370893,0.362765,0.366478,0.364612
2,528,1.166667,0.885895,0.362745,0.407504,0.709366,"(0.5526434183120728, 0.788463294506073, 0.6498...",0.841161,0.127451,0.123613,...,0.839401,0.938608,"(0.9472967386245728, 0.9506863355636597, 0.948...",0.916212,0.552643,0.788463,0.64982,0.947297,0.950686,0.948988
3,365,0.362694,0.294331,0.005181,0.710469,0.9312,"(0.8656865954399109, 0.9295729398727417, 0.896...",0.771601,0.056995,0.059593,...,0.928273,0.987054,"(0.9405235052108765, 0.9671791791915894, 0.953...",0.831194,0.865687,0.929573,0.896493,0.940524,0.967179,0.953665
4,478,1.12782,1.413115,0.0,0.448918,0.62955,"(0.6963430047035217, 0.7169560790061951, 0.706...",0.642682,0.007519,0.001093,...,0.980877,0.995194,"(0.9955625534057617, 0.9964922666549683, 0.996...",0.991904,0.696343,0.716956,0.706499,0.995563,0.996492,0.996027


In [13]:
df.describe()

Unnamed: 0,baseline_wer,baseline_cer,baseline_pmr,baseline_bleu,baseline_cosine,baseline_jw,yolo_wer,yolo_cer,yolo_pmr,yolo_bleu,yolo_cosine,yolo_jw,baseline_indobert_P,baseline_indobert_R,baseline_indobert_F1,yolo_indobert_P,yolo_indobert_R,yolo_indobert_F1
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,0.432745,0.319161,0.172997,0.796237,0.883098,0.846417,0.16843,0.120514,0.394061,0.814645,0.871534,0.886929,0.918677,0.937881,0.927779,0.937319,0.934492,0.935334
std,2.128334,1.668447,0.28395,0.26613,0.263253,0.091307,0.308024,0.248285,0.405142,0.310095,0.305163,0.115084,0.104062,0.086416,0.095525,0.13374,0.151013,0.143713
min,0.0,0.0,0.0,0.0,0.0,0.513429,0.0,0.0,0.0,0.0,0.0,0.508825,0.330638,0.422308,0.370893,0.362765,0.259289,0.312846
25%,0.042914,0.023549,0.007589,0.800474,0.954217,0.811311,0.014489,0.002504,0.015687,0.852085,0.973253,0.840494,0.91421,0.940922,0.926669,0.959044,0.960545,0.958535
50%,0.078184,0.041788,0.017484,0.895241,0.982927,0.838156,0.043764,0.014668,0.232961,0.934593,0.992529,0.917964,0.950369,0.963137,0.956041,0.978369,0.982097,0.978863
75%,0.21019,0.163096,0.241223,0.94625,0.992754,0.91611,0.119462,0.070707,0.877111,0.973279,0.998722,0.95844,0.969418,0.977181,0.972817,0.993008,0.993031,0.992626
max,21.230769,16.631868,1.0,1.0,1.0,1.0,1.0,0.990772,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [14]:
df.to_csv('yolo_final_result.csv',index=False)