In [5]:
!pip install --upgrade pip



In [6]:
!pip install cython



In [12]:
%load_ext Cython

In [14]:
%%cython

def levenshtein_distance(y_true: str, y_pred: str, normalize: bool = False):
    """
    Computes the Levenshtein Edit Distance between two strings.
    :param str y_true: ground_truth/inference string
    :param str y_pred: predicted/reference string
    :param bool normalize: Boolean for normalizing the distance by length of longer string
    :returns:
        distance (float): The computed Levenshtein Edit Distance between y_true and y_pred
                        (normalized distance between 0.0 and 1.0 will be returned if normalize=True)
    """
#     cdef int i1, int i2
    s1 = y_true
    s2 = y_pred
    if len(s1) > len(s2):
        s1, s2 = s2, s1

    distances = range(len(s2) + 1)
    for i2, c2 in enumerate(s2):
        distances_ = [i2 + 1]
        for i1, c1 in enumerate(s1):
            if c1 == c2:
                distances_.append(distances[i1])
            else:
                distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
        distances = distances_
    if normalize:
        eps = 1e-10
        return 1 - distances[-1] / (len(s2) + eps)
    return distances[-1]



In [15]:
import timeit

In [18]:

def levenshtein_distance2(y_true: str, y_pred: str, normalize: bool = False):
    """
    Computes the Levenshtein Edit Distance between two strings.
    :param str y_true: ground_truth/inference string
    :param str y_pred: predicted/reference string
    :param bool normalize: Boolean for normalizing the distance by length of longer string
    :returns:
        distance (float): The computed Levenshtein Edit Distance between y_true and y_pred
                        (normalized distance between 0.0 and 1.0 will be returned if normalize=True)
    """
#     cdef int i1, int i2
    s1 = y_true
    s2 = y_pred
    if len(s1) > len(s2):
        s1, s2 = s2, s1

    distances = range(len(s2) + 1)
    for i2, c2 in enumerate(s2):
        distances_ = [i2 + 1]
        for i1, c1 in enumerate(s1):
            if c1 == c2:
                distances_.append(distances[i1])
            else:
                distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
        distances = distances_
    if normalize:
        eps = 1e-10
        return 1 - distances[-1] / (len(s2) + eps)
    return distances[-1]

In [23]:
%%cython

def levenshtein_distance3(y_true: str, y_pred: str, normalize: bool = False):
    """
    Computes the Levenshtein Edit Distance between two strings.
    :param str y_true: ground_truth/inference string
    :param str y_pred: predicted/reference string
    :param bool normalize: Boolean for normalizing the distance by length of longer string
    :returns:
        distance (float): The computed Levenshtein Edit Distance between y_true and y_pred
                        (normalized distance between 0.0 and 1.0 will be returned if normalize=True)
    """
    %cython cdef int i1, int i2
    s1 = y_true
    s2 = y_pred
    if len(s1) > len(s2):
        s1, s2 = s2, s1

    distances = range(len(s2) + 1)
    for i2, c2 in enumerate(s2):
        distances_ = [i2 + 1]
        for i1, c1 in enumerate(s1):
            if c1 == c2:
                distances_.append(distances[i1])
            else:
                distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
        distances = distances_
    if normalize:
        eps = 1e-10
        return 1 - distances[-1] / (len(s2) + eps)
    return distances[-1]




Error compiling Cython file:
------------------------------------------------------------
...
    :param bool normalize: Boolean for normalizing the distance by length of longer string
    :returns:
        distance (float): The computed Levenshtein Edit Distance between y_true and y_pred
                        (normalized distance between 0.0 and 1.0 will be returned if normalize=True)
    """
    %cython cdef int i1, int i2
   ^
------------------------------------------------------------

C:\Users\sophros\.ipython\cython\_cython_magic_32e433b9a5ec26d080d3e23d0eb2a168.pyx:12:4: Expected an identifier or literal


In [21]:
%%timeit
levenshtein_distance("abcdefghijklmnoprstuvw"*10, "abcdefghijklnopstuw"*10)

5.64 ms ± 377 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
%%timeit
levenshtein_distance2("abcdefghijklmnoprstuvw"*10, "abcdefghijklnopstuw"*10)

18.4 ms ± 867 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
