Before you turn this problem in, make sure everything runs as expected. First, **restart the kernel** (in the menubar, select Kernel$\rightarrow$Restart) and then **run all cells** (in the menubar, select Cell$\rightarrow$Run All).

Make sure you fill in any place that says `YOUR CODE HERE` or "YOUR ANSWER HERE", as well as your name and collaborators below:

In [11]:
NAME = "311352004童政瑜"
COLLABORATORS = ""

# Dynamic Programming - Key Concept
<h4>A problem that can be solved efficiently by DP often has the following properties:</h4>
<ul>
<li>Optimal Substructure(allows recursion): </li>
<ul>
Solution to the problem contains optimal solutions to subproblems.
</ul>
</ul>
<ul>
<li>Overlapping Subproblems(allows speed up): </li>
<ul>
A recursive algorithm revisits the same subproblem over and over again.
</ul>
</ul>


###  <h2>Exercise 1: Longest Common Subsequence <h2>
<h4> Find out the longest common subsequence between two input sequences</h4>
<h4> Example: Input Sequences “ABCDGH” and “AEDFHR” </h4>
<ul>
<li>Output: 3 (Sequences “ADH”)</li>
</ul>
<h4> Example: Input Sequences "AGGTAB” and “GXTXAYB”  </h4>
<ul>
<li>Output: 4 (Sequences “GTAB”)</li>
</ul>

In [12]:
def longest_common_subsequence(seq_1, seq_2, seq_1_len, seq_2_len):
    '''
    longest_common_subsequence():
    Input argument:
        seq_1, seq_2: Two input sequences (type: str)
        seq_1_len, seq_2_len: Length of two input sequences (type: int)
    Return: 
        Longest Common Subsequence length between seq_1 and seq_2 (type: int)
    '''
    # YOUR CODE HERE
    DP, rows = [], []

    for i in range(seq_1_len+1):
        rows = [0] * (seq_2_len+1)
        DP.append(rows)
    #print(DP)
    
    #Record length of longest common subsequences
    for i in range(1, seq_1_len+1):
        for j in range(1, seq_2_len+1):
            if seq_1[i-1] == seq_2[j-1]:
                DP[i][j] = DP[i-1][j-1] + 1
            else:
                DP[i][j] = max(DP[i-1][j], DP[i][j-1])
            #print(DP)
    
    return DP[seq_1_len][seq_2_len]
    #return DP
    #raise NotImplementedError("error")

# seq_X = "AGGTAB"
# seq_Y = "GXTXAYB"
# longest_common_subsequence(seq_X , seq_Y, len(seq_X), len(seq_Y))

In [13]:
seq_X = "AGGTAB"
seq_Y = "GXTXAYB"

In [14]:
assert longest_common_subsequence(seq_X , seq_Y, len(seq_X), len(seq_Y)) == 4

In [15]:
#Further Testing
test_seq_X = "ABCDGH"
test_seq_Y = "AEDFHR"

In [16]:
assert longest_common_subsequence(test_seq_X, test_seq_Y, len(test_seq_X), len(test_seq_Y)) == 3

###  <h2>Exercise 2: Levenshtein Distance <h2>
<h4>Given 2 different sequences, and there are three ways to edit first sequence:</h4>
<ul>
<li>Insert a charactor ("SIP" -> "SHIP")</li>
</ul>
<ul>
<li>Remove a charactor ("SHIP" -> "HIP")</li>
</ul>
<ul>
<li>Replace a charactor ("CAN" -> "BAN")</li>
</ul>
<h4> Find out the minimum operations required to convert first sequence into second one</h4>

<h4> Example: Input sequences "GOAT" and "GET" </h4>
<ul>
<li>Output: 2 (Remove "O" + Replace "A" to "E")</li>
</ul>
<h4> Example: Input sequences "CAT" and "CUT" </h4>
<ul>
<li>Output: 1 (Replace "A" to "U")</li>
</ul>

In [17]:
def levenshtein_distance(seq_1, seq_2, seq_1_len, seq_2_len):
    '''
    levenshtein_distance():
    Input argument:
        seq_1, seq_2: Tow input sequences (type: str)
        seq_1_len, seq_2_len: Length of two input sequences (type: int)
    Return: 
        minimum number of operations required to convert seq_1 into seq_2
    '''
    # YOUR CODE HERE

    distance = []
    for i in range(seq_1_len+1):
        rows = []
        for j in range(seq_2_len+1):
            if j == 0:
                rows.append(i)
            elif i == 0:
                rows.append(j)
            else:
                rows.append(0)
        distance.append(rows)

    for i in range(1, seq_1_len + 1):
        for j in range(1, seq_2_len + 1):
            if seq_1[i-1] == seq_2[j-1]:
                distance[i][j] = distance[i-1][j-1]
            else:
                distance[i][j] = 1 + min(distance[i][j-1], distance[i-1][j], distance[i-1][j-1])

    return distance[seq_1_len][seq_2_len]
    # return distance
    #raise NotImplementedError("error")

seq_X = "SUNDAY"
seq_Y = "SATURDAY"
levenshtein_distance(seq_X , seq_Y, len(seq_X), len(seq_Y))

3

In [18]:
seq_X = "SUNDAY"
seq_Y = "SATURDAY"

In [19]:
assert levenshtein_distance(seq_X , seq_Y, len(seq_X), len(seq_Y)) == 3

In [20]:
test_seq_1 = "GOAT"
test_seq_2 = "GET"
assert levenshtein_distance(test_seq_1, test_seq_2, len(test_seq_1), len(test_seq_2)) == 2
# levenshtein_distance(test_seq_1, test_seq_2, len(test_seq_1), len(test_seq_2))

test_seq_3 = "CAT"
test_seq_4 = "CUT"
assert levenshtein_distance(test_seq_3, test_seq_4, len(test_seq_3), len(test_seq_4)) == 1
# levenshtein_distance(test_seq_3, test_seq_4, len(test_seq_3), len(test_seq_4))