In [53]:
import numpy as np

def alignProteins(seq1, seq2, printMatrix=True):
    
    # compute dp matrix
    
    n1, n2 = len(seq1), len(seq2)
    dp = np.empty((n1 + 1, n2 + 1), dtype=int)

    dp[:, 0] = np.arange(n1 + 1)
    dp[0, :] = np.arange(n2 + 1)

    for i in range(1, n1 + 1):
        for j in range(1, n2 + 1):
            dp[i, j] = min(dp[i - 1, j - 1] + int(seq1[i - 1] != seq2[j - 1]), 
                           dp[i - 1, j] + 1, 
                           dp[i, j - 1] + 1)
            
    if printMatrix:
        print("\t\t\tDP matrix:\n")
        
        seq1_list = [''] + list(seq1)
        seq2_list = ['', ''] + list(seq2)

        print('\t')
        print("\t".join(seq2_list))

        for i in range(n1 + 1):
            print(seq1_list[i], '\t', '\t'.join(list(dp[i].astype(str))))
            
    # back step
    
    i, j = n1, n2     
    ans = [(i, j)]

    while i != 0 and j != 0:
        if dp[i - 1, j - 1] + int(seq1[i - 1] != seq2[j - 1]) == dp[i, j]:
            ans.append((i - 1, j - 1))
            i, j = i - 1, j - 1
        elif dp[i - 1, j] + 1 == dp[i, j]:
            ans.append((i - 1, j))
            i, j = i - 1, j
        else:
            ans.append((i, j - 1))
            i, j = i, j - 1

    if i == 0:
        while j != 0:
            ans.append((0, j - 1))
            j -= 1
    elif j == 0:
        while i != 0:
            ans.append((i - 1, 0))
            i -= 1

    ans = list(reversed(ans))
    
    if printMatrix:
        print("\n\t\t\tPath:\n")
        print(ans)
    
    # create output
    
    prev_i, prev_j = 0, 0
    seq1_aligned_list = []
    seq2_aligned_list = []

    for i, j in ans[1:]:        
        if i == prev_i + 1 and j == prev_j + 1:
            seq1_aligned_list.append(seq1[i - 1])
            seq2_aligned_list.append(seq2[j - 1])
        elif i == prev_i + 1 and j == prev_j:
            seq1_aligned_list.append(seq1[i - 1])
            seq2_aligned_list.append('-')
        else:
            seq1_aligned_list.append('-')
            seq2_aligned_list.append(seq2[j - 1])
        prev_i, prev_j = i, j
            
    seq1_aligned = ''.join(seq1_aligned_list)
    seq2_aligned = ''.join(seq2_aligned_list)
            
    return seq1_aligned, seq2_aligned, dp[n1, n2]

In [54]:
seq1 = "AGCTTGCA"
seq2 = "TGCAAG"

seq1_aligned, seq2_aligned, weight = alignProteins(seq1, seq2)
print("\n\t\t\tAlignment:\n")
print(seq1_aligned)
print(seq2_aligned)
print("\nWeight =", weight)

			DP matrix:

	
		T	G	C	A	A	G
 	 0	1	2	3	4	5	6
A 	 1	1	2	3	3	4	5
G 	 2	2	1	2	3	4	4
C 	 3	3	2	1	2	3	4
T 	 4	3	3	2	2	3	4
T 	 5	4	4	3	3	3	4
G 	 6	5	4	4	4	4	3
C 	 7	6	5	4	5	5	4
A 	 8	7	6	5	4	5	5

			Path:

[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 6), (8, 6)]

			Alignment:

AGCTTGCA
TGCAAG--

Weight = 5


In [55]:
seq1 = "AAAGGG"
seq2 = "GAGC"

seq1_aligned, seq2_aligned, weight = alignProteins(seq1, seq2)
print("\n\t\t\tAlignment:\n")
print(seq1_aligned)
print(seq2_aligned)
print("\nWeight =", weight)

			DP matrix:

	
		G	A	G	C
 	 0	1	2	3	4
A 	 1	1	1	2	3
A 	 2	2	1	2	3
A 	 3	3	2	2	3
G 	 4	3	3	2	3
G 	 5	4	4	3	3
G 	 6	5	5	4	4

			Path:

[(0, 0), (1, 0), (2, 1), (3, 2), (4, 2), (5, 3), (6, 4)]

			Alignment:

AAAGGG
-GA-GC

Weight = 4


In [56]:
seq1 = "GATC"
seq2 = "TGCAAG"

seq1_aligned, seq2_aligned, weight = alignProteins(seq1, seq2)
print("\n\t\t\tAlignment:\n")
print(seq1_aligned)
print(seq2_aligned)
print("\nWeight =", weight)

			DP matrix:

	
		T	G	C	A	A	G
 	 0	1	2	3	4	5	6
G 	 1	1	1	2	3	4	5
A 	 2	2	2	2	2	3	4
T 	 3	2	3	3	3	3	4
C 	 4	3	3	3	4	4	4

			Path:

[(0, 0), (0, 1), (1, 2), (1, 3), (2, 4), (3, 5), (4, 6)]

			Alignment:

-G-ATC
TGCAAG

Weight = 4
