# Mục 1: Edit distance

In [None]:
import numpy as np

def find_minimum_edit_distance(source_string, target_string):
    # Create a dp matrix of dimension (source_string + 1) x (destination_matrix + 1)
    dp = [[0] * (len(source_string) + 1) for i in range(len(target_string) + 1)]

    # Initialize the required values of the matrix
    for i in range(1, len(target_string) + 1):
        dp[i][0] = dp[i - 1][0] + 1
    for j in range(1, len(source_string) + 1):
        dp[0][j] = dp[0][j - 1] + 1
        
    operations_performed = []
    # Build the matrix following the algorithm
    for i in range(1, len(target_string) + 1):
        for j in range(1, len(source_string) + 1):
            if target_string[i - 1] == source_string[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]
            else:
                dp[i][j] = min(
                    dp[i - 1][j] + 1,
                    dp[i][j - 1] + 1,
                    dp[i - 1][j - 1] + 2
                )

    # Initialization for backtracking
    i = len(target_string)
    j = len(source_string)

    while (i != 0 and j != 0):
        if target_string[i - 1] == source_string[j - 1]:
            i -= 1
            j -= 1
        elif dp[i][j] == dp[i - 1][j - 1] + 2:
            operations_performed.append(('SUBSTITUTE', source_string[j - 1], target_string[i - 1]))
            i -= 1
            j -= 1
        elif dp[i][j] == dp[i - 1][j] + 1:
            operations_performed.append(('INSERT', target_string[i - 1]))
            i -= 1
        else:
            operations_performed.append(('DELETE', source_string[j - 1]))
            j -= 1

    # If we reach top-most row of the matrix
    while (j != 0):
        operations_performed.append(('DELETE', source_string[j - 1]))
        j -= 1

    # If we reach left-most column of the matrix    
    while (i != 0):
        operations_performed.append(('INSERT', target_string[i - 1]))
        i -= 1

    # Reverse the list of operations performed as we have operations in reverse
    # order because of backtracking
    operations_performed.reverse()
    return dp[len(target_string)][len(source_string)], operations_performed

if __name__ == "__main__":
    # Get the source and target string
    print("Enter the source string: ", source_string := input().strip())
    print("Enter the target string: ", target_string := input().strip())

    # Find the minimum edit distance and operations performed
    distance, operations_performed = find_minimum_edit_distance(source_string, target_string)

    # Count the number of individual operations
    insertions, deletions, substitutions = 0, 0, 0
    for op in operations_performed:
        if op[0] == 'INSERT':
            insertions += 1
        elif op[0] == 'DELETE':
            deletions += 1
        elif op[0] == 'SUBSTITUTE':
            substitutions += 1

    # Print the results
    print("\nMinimum edit distance: {}".format(distance))
    print("Number of insertions: {}".format(insertions))
    print("Number of deletions: {}".format(deletions))
    print("Number of substitutions: {}".format(substitutions))
    print("Total number of operations: {}".format(insertions + deletions + substitutions))
    print("Actual Operations:")
    for idx, op in enumerate(operations_performed, 1):
        if op[0] == 'INSERT':
            print(f"({idx}) INSERT: {op[1]}")
        elif op[0] == 'DELETE':
            print(f"({idx}) DELETE: {op[1]}")
        else:
            print(f"({idx}) SUBSTITUTE: {op[1]} -> {op[2]}")


Enter the source string:  intention
Enter the target string:  execution

Minimum edit distance: 8
Number of insertions: 1
Number of deletions: 1
Number of substitutions: 3
Total number of operations: 5
Actual Operations:
(1) DELETE: i
(2) SUBSTITUTE: n -> e
(3) SUBSTITUTE: t -> x
(4) INSERT: c
(5) SUBSTITUTE: n -> u


# Mục 2: LCSS distance

In [10]:
def LCSS(X, Y):
    m, n = len(X), len(Y)
    # Bảng LCSS (giá trị)
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    # Lặp qua từng ký tự
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if X[i - 1] == Y[j - 1]:
                dp[i][j] = dp[i - 1][j - 1] + 1
            elif dp[i - 1][j] >= dp[i][j - 1]:
                dp[i][j] = dp[i - 1][j]
            else:
                dp[i][j] = dp[i][j - 1]

    return dp

X = "ACADB"
Y = "CBDA"

# TẠO BẢNG
dp = LCSS(X, Y)

# IN BẢNG GIÁ TRỊ
print("Bảng giá trị LCSS:")
print("     " + "  ".join(" " + ch for ch in Y))
for i in range(len(dp)):
    row_label = X[i - 1] if i > 0 else " "
    row = [f"{dp[i][j]:2}" for j in range(len(Y) + 1)]
    print(f"{row_label}  " + "  ".join(row))
print("Khoảng cách dãy con chung dài nhất là: ", dp[-1][-1])

Bảng giá trị LCSS:
      C   B   D   A
    0   0   0   0   0
A   0   0   0   0   1
C   0   1   1   1   1
A   0   1   1   1   2
D   0   1   1   2   2
B   0   1   2   2   2
Khoảng cách dãy con chung dài nhất là:  2


# Mục 3: DTW distance

In [15]:
def dtw_recursive_with_path(s1, s2):
    n, m = len(s1), len(s2)
    dp = [[-1 for _ in range(m)] for _ in range(n)]        # Bảng chi phí DTW
    path = [[None for _ in range(m)] for _ in range(n)]    # Lưu hướng đi

    def recurse(i, j):
        if i < 0 or j < 0:
            return float('inf')
        if i == 0 and j == 0:
            dp[i][j] = abs(s1[0] - s2[0])
            path[i][j] = None
            return dp[i][j]
        if dp[i][j] != -1:
            return dp[i][j]

        cost = abs(s1[i] - s2[j])
        # Các lựa chọn: từ trên, trái, chéo
        options = [
            (recurse(i - 1, j), (i - 1, j)),     # chèn (insert)
            (recurse(i, j - 1), (i, j - 1)),     # xóa (delete)
            (recurse(i - 1, j - 1), (i - 1, j - 1))  # khớp (match)
        ]
        min_cost, prev_pos = min(options, key=lambda x: x[0])
        dp[i][j] = cost + min_cost
        path[i][j] = prev_pos
        return dp[i][j]

    recurse(n - 1, m - 1)

    # Truy vết đường đi từ ô cuối về ô đầu
    i, j = n - 1, m - 1
    path_coords = []
    while i is not None and j is not None:
        path_coords.append((i, j))
        i, j = path[i][j] if path[i][j] else (None, None)

    return dp, path_coords

sequence_1 = [1, 7, 4, 8, 2, 9, 6, 5, 2, 0]
sequence_2 = [1, 2, 8, 5, 5, 1, 9, 4, 6, 5]

dp_table, optimal_path = dtw_recursive_with_path(sequence_1, sequence_2)

# IN BẢNG CHI PHÍ DTW
print("DTW Cost Table:")
for row in dp_table:
    print(row)

# IN KHOẢNG CÁCH DTW
print("\nDTW Distance:", dp_table[-1][-1])

# IN ĐƯỜNG ĐI TỐI ƯU
path_coords = [dp_table[i][j] for i, j in optimal_path]  # Lấy giá trị từ đường đi tối ưu
print("Giá trị trên đường đi:", path_coords)


DTW Cost Table:
[0, 1, 8, 12, 16, 16, 24, 27, 32, 36]
[6, 5, 2, 4, 6, 12, 14, 17, 18, 20]
[9, 7, 6, 3, 4, 7, 12, 12, 14, 15]
[16, 13, 6, 6, 6, 11, 8, 12, 14, 17]
[17, 13, 12, 9, 9, 7, 14, 10, 14, 17]
[25, 20, 13, 13, 13, 15, 7, 12, 13, 17]
[30, 24, 15, 14, 14, 18, 10, 9, 9, 10]
[34, 27, 18, 14, 14, 18, 14, 10, 10, 9]
[35, 27, 24, 17, 17, 15, 21, 12, 14, 12]
[36, 29, 32, 22, 22, 16, 24, 16, 18, 17]

DTW Distance: 17
Giá trị trên đường đi: [17, 12, 9, 9, 9, 7, 7, 6, 3, 2, 1, 0]
