In [7]:
"""
http://www.dreamxu.com/books/dsa/dp/edit-distance.html
https://en.wikipedia.org/wiki/Levenshtein_distance
https://www.quora.com/How-do-I-figure-out-how-to-iterate-over-the-parameters-and-write-bottom-up-solutions-to-dynamic-programming-related-problems/answer/Michal-Danil%C3%A1k?srid=3OBi&share=1

编辑距离(Levenshtein distance 莱文斯坦距离)
给定 2 个字符串 a, b. 编辑距离是将 a 转换为 b 的最少操作次数，操作只允许如下 3 种：

1. 插入一个字符，例如：fj -> fxj
2. 删除一个字符，例如：fxj -> fj
3. 替换一个字符，例如：jxj -> fyj
"""

def levenshtein_distance_recursice(str_a, str_b):
    cost = 0
    len_a = len(str_a)
    len_b = len(str_b)
    
    # 有某一个字符串为空
    if len_a == 0:
        return len_b
    if len_b == 0:
        return len_a
    
    # 最后一个字母是否相同
    if str_a[-1] == str_b[-1]:
        cost = 0
    else:
        cost = 1
    
    return min(levenshtein_distance_recursice(str_a[0:-1], str_b) + 1,
              levenshtein_distance_recursice(str_a, str_b[0:-1]) + 1,
              levenshtein_distance_recursice(str_a[0:-1], str_b[0:-1]) + cost)

import time
start = time.time()
levenshtein_distance_recursice('abc'*3, 'bcd'*3)
print(time.time() - start)

1.1841719150543213


In [8]:
"""
Interative with full matrix
"""

import numpy as np

def levenshtein_distance_matrix(src, dst):
    m = len(src)
    n = len(dst)
    # 初始化二位数组，保存中间值。多一维可以用来处理 src/dst 为空字符串的情况
    # d[i, j] 表示 src[0,i] 与 dst[0,j] 之间的距离 
    d = np.zeros((n+1, m+1))
#     print(d.shape)

    
    # 第一列赋值
    for i in range(1, n+1):
        d[i][0] = i
    
    # 第一行赋值
    for j in range(1, m+1):
        d[0][j] = j
    
    print(d)
    for j in range(1, m+1):
        for i in range(1, n+1):
            if src[j-1] == dst[i-1]:
                cost = 0
            else:
                cost = 1
            d[i, j] = min(d[i-1, j] + 1,
                         d[i, j-1] + 1,
                         d[i-1, j-1] + cost)
    
    print(d[-1][-1])
    return d[-1][-1]

import time
start = time.time()
assert levenshtein_distance_matrix("kitten", "sitting") == 3
assert levenshtein_distance_matrix('ebab', 'abcd') == 3
assert levenshtein_distance_matrix('1234', '') == 4
assert levenshtein_distance_matrix('kilo', 'kilogram') == 4
print(time.time() - start)

[[0. 1. 2. 3. 4. 5. 6.]
 [1. 0. 0. 0. 0. 0. 0.]
 [2. 0. 0. 0. 0. 0. 0.]
 [3. 0. 0. 0. 0. 0. 0.]
 [4. 0. 0. 0. 0. 0. 0.]
 [5. 0. 0. 0. 0. 0. 0.]
 [6. 0. 0. 0. 0. 0. 0.]
 [7. 0. 0. 0. 0. 0. 0.]]
3.0
[[0. 1. 2. 3. 4.]
 [1. 0. 0. 0. 0.]
 [2. 0. 0. 0. 0.]
 [3. 0. 0. 0. 0.]
 [4. 0. 0. 0. 0.]]
3.0
[[0. 1. 2. 3. 4.]]
4.0
[[0. 1. 2. 3. 4.]
 [1. 0. 0. 0. 0.]
 [2. 0. 0. 0. 0.]
 [3. 0. 0. 0. 0.]
 [4. 0. 0. 0. 0.]
 [5. 0. 0. 0. 0.]
 [6. 0. 0. 0. 0.]
 [7. 0. 0. 0. 0.]
 [8. 0. 0. 0. 0.]]
4.0
0.0034019947052001953
