# EGRA evaluation

Herman Kamper, 2025

Requirements:

    conda install anaconda::jiwer 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from sklearn import metrics
import jiwer

import dp_align

## Example

In [3]:
can = "ga u la e ka ha"
ref = "ga ga u e ka hi hi"
hyp = "ga u la i ka hi ho"
# can = "ga u la e ka ha ka na ya cha a ja da ra sa i za pa ma sa fa ba za o wa na pa fa ta da la ma ya ja ta va ha ra ga wa"
# ref = "ga ga u la e ka ha ku na ya cha a ji da ra si i zi bi di em si ef bi zi o wai eni pi efu ti di i em ya ya ti mbi hi ara g dablyu"
# hyp = "u la ee a a a rac im sf bbo waf b"

can = can.split()
ref = ref.split()
hyp = hyp.split()

**Align reference to canonical:**

In [4]:
ref_to_can_errors, ref_to_can_alignment = dp_align.dp_align(can, ref, output_align=True)

In [5]:
ref_to_can_alignment

[('ga', '-', 'i'),
 ('ga', 'ga', 'c'),
 ('u', 'u', 'c'),
 ('-', 'la', 'd'),
 ('e', 'e', 'c'),
 ('ka', 'ka', 'c'),
 ('hi', '-', 'i'),
 ('hi', 'ha', 's')]

In [6]:
print("wer_can_ref:", ref_to_can_errors.get_wer())
print("wer_can_ref with JiWER:", jiwer.wer(" ".join(can), " ".join(ref)))
print("acc_can_ref:", ref_to_can_errors.n_match / ref_to_can_errors.n_total)

wer_can_ref: 0.6666666666666666
wer_can_ref with JiWER: 0.6666666666666666
acc_can_ref: 0.6666666666666666


**Align hypothesis to canonical:**

In [7]:
hyp_to_can_errors, hyp_to_can_alignment = dp_align.dp_align(can, hyp, output_align=True)

In [8]:
hyp_to_can_alignment

[('ga', 'ga', 'c'),
 ('u', 'u', 'c'),
 ('la', 'la', 'c'),
 ('i', 'e', 's'),
 ('ka', 'ka', 'c'),
 ('hi', '-', 'i'),
 ('ho', 'ha', 's')]

In [9]:
print("acc_hyp_can:", hyp_to_can_errors.n_match / hyp_to_can_errors.n_total)

acc_hyp_can: 0.6666666666666666


**Align hypothesis to reference:**

In [10]:
print("wer_can_ref with JiWER:", jiwer.wer(" ".join(ref), " ".join(hyp)))

wer_can_ref with JiWER: 0.5714285714285714


In [11]:
ref_to_can_alignment_sequence = [i[-1] for i in ref_to_can_alignment]
hyp_to_can_alignment_sequence = [i[-1] for i in hyp_to_can_alignment]

mer_errors, mer_alignment = dp_align.dp_align(ref_to_can_alignment_sequence, hyp_to_can_alignment_sequence, output_align=True)

In [12]:
mer_alignment

[('-', 'i', 'd'),
 ('c', 'c', 'c'),
 ('c', 'c', 'c'),
 ('-', 'd', 'd'),
 ('c', 'c', 'c'),
 ('s', '-', 'i'),
 ('c', 'c', 'c'),
 ('i', 'i', 'c'),
 ('s', 's', 'c')]

In [13]:
print("MER with JiWER:", jiwer.wer(" ".join(ref_to_can_alignment_sequence), " ".join(hyp_to_can_alignment_sequence)))

MER with JiWER: 0.375


In [14]:
true_msid = [i[1] for i in mer_alignment]
pred_msid = [i[0] for i in mer_alignment]

In [15]:
true_msid

['i', 'c', 'c', 'd', 'c', '-', 'c', 'i', 's']

In [16]:
pred_msid

['-', 'c', 'c', '-', 'c', 's', 'c', 'i', 's']

In [17]:
print(metrics.classification_report(true_msid, pred_msid, zero_division=0))

              precision    recall  f1-score   support

           -       0.00      0.00      0.00         1
           c       1.00      1.00      1.00         4
           d       0.00      0.00      0.00         1
           i       1.00      0.50      0.67         2
           s       0.50      1.00      0.67         1

    accuracy                           0.67         9
   macro avg       0.50      0.50      0.47         9
weighted avg       0.72      0.67      0.67         9



In [18]:
true_mistakes = []
for i in true_msid:
    if i in ["i", "d", "s"]:
        true_mistakes.append("x")
    else:
        true_mistakes.append(i)

pred_mistakes = []
for i in pred_msid:
    if i in ["i", "d", "s"]:
        pred_mistakes.append("x")
    else:
        pred_mistakes.append(i)

In [19]:
print(metrics.classification_report(true_mistakes, pred_mistakes, zero_division=0))

              precision    recall  f1-score   support

           -       0.00      0.00      0.00         1
           c       1.00      1.00      1.00         4
           x       0.67      0.50      0.57         4

    accuracy                           0.67         9
   macro avg       0.56      0.50      0.52         9
weighted avg       0.74      0.67      0.70         9

