#### from : https://martin-thoma.com/word-error-rate-calculation/

In [1]:
def wer(r, h):
    """
    Calculation of WER with Levenshtein distance.

    Works only for iterables up to 254 elements (uint8).
    O(nm) time ans space complexity.

    Parameters
    ----------
    r : list
    h : list

    Returns
    -------
    int

    Examples
    --------
    >>> wer("who is there".split(), "is there".split())
    1
    >>> wer("who is there".split(), "".split())
    3
    >>> wer("".split(), "who is there".split())
    3
    """
    # initialisation
    import numpy
    d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8)
    d = d.reshape((len(r)+1, len(h)+1))
    for i in range(len(r)+1):
        for j in range(len(h)+1):
            if i == 0:
                d[0][j] = j
            elif j == 0:
                d[i][0] = i

    # computation
    for i in range(1, len(r)+1):
        for j in range(1, len(h)+1):
            if r[i-1] == h[j-1]:
                d[i][j] = d[i-1][j-1]
            else:
                substitution = d[i-1][j-1] + 1
                insertion    = d[i][j-1] + 1
                deletion     = d[i-1][j] + 1
                d[i][j] = min(substitution, insertion, deletion)

    return d[len(r)][len(h)]

In [2]:
print wer("who is there".split(), "is there".split())
print wer("who is there".split(), "".split())
print wer("".split(), "who is there".split())

1
3
3


In [3]:
import numpy as np

In [4]:
list_ref = []

with open('../data/processed/IEMOCAP/four_category/FC_trans.txt') as f:
    list_ref = f.readlines()
list_ref = [ x.strip() for x in list_ref ]
list_ref = [ [ t for t in x.split(' ')] for x in list_ref ]
print len(list_ref)
print list_ref[0]

5531
['Excuse', 'me.']


In [5]:
list_target = []


with open('../data/processed/IEMOCAP/four_category_G/FC_trans.txt') as f:
    list_target = f.readlines()
list_target = [ x.strip() for x in list_target ]
list_target = [ [ t for t in x.split(' ')] for x in list_target ]
print len(list_target)
print list_target[0]

5531
['give', 'me']


In [6]:
list_wer = []

for i in xrange( len(list_ref )) :
    list_wer.append( wer(list_ref[i], list_target[i])  )
print np.mean(list_wer)    

5.5367926234


In [7]:
list_wer_lower = []
list_target_lower = []

list_ref_lower = [ [t.lower() for t in x] for x in list_ref ]
list_target_lower = [ [t.lower() for t in x] for x in list_target ]

list_wer_lower = []

for i in xrange( len(list_ref_lower )) :
    list_wer_lower.append( wer(list_ref_lower[i], list_target_lower[i])  )
print np.mean(list_wer_lower)    

4.92442596276


In [8]:
# just check
list_wer_check = []

for i in xrange( len(list_ref )) :
    list_wer_check.append( wer(list_ref[i], list_ref[i])  )
print np.mean(list_wer_check)    

0.0
