Building the scoring matrix with given sequences and scoring scheme.

In [1]:
import numpy as np
import pandas as pd
import random

y = "GGCAGTA"
x = "GATGCGCAG"
match = 2
mismatch = -1
gap = -3


# From here the function definatioin will start.
nx = len(x)      # no. of columns, with x as one of the sequence
ny = len(y)      # no. of rows, with y as the other sequence

# Initialization of the 2D matrix.
F = np.zeros((nx + 1, ny + 1))

# Gap ko ek positive no. assume kiya jaata hai, and uska net effect is -ve (by placing - before it)
F[:, 0] = np.linspace(0, 0, nx + 1)
F[0, :] = np.linspace(0, 0, ny + 1)


# Till here, matrix with its 1st col and row is obtained.

# Pointers to trace through an optimal alignment.
# Matrix filling.
for i in range(1, nx + 1):
    for j in range(1, ny + 1):
        #complete the code
        # Calculating loss due to mismatch :
        resultant_if_mismatch = F [i-1, j-1] + mismatch
        resultant_if_gap = max([F [i, j-1] + gap, F [i-1, j] + gap])

        if (y[j-1] == x[i-1]):
            # Checking the case of Match
            F[i, j] = F [i-1, j-1] + match

        else :
            # Comparing whether the loss due to mismatch is more or due to upper/lower gaps.
            # Cell should always contain the maximum possible value.
            cell_value = max ([resultant_if_gap, resultant_if_mismatch])
            if (cell_value < 0):
              F[i, j] = 0
            else :
              F[i, j] = cell_value


Score = F[nx, ny]

# Method 1 : Printing row by row with better formatting
# for row in F:
#     print(' '.join(map(str, row)))

# Method 2 : Or using NumPy's formatting options
# np.set_printoptions(precision=2, suppress=True)
# print(F)

# Method 3 : Print scoring matrix using pandas DataFrame
print("Scoring Matrix:")
df = pd.DataFrame(F, index=['-'] + list(x), columns=['-'] + list(y))
print(df)

Scoring Matrix:
     -    G    G    C    A    G    T    A
-  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
G  0.0  2.0  2.0  0.0  0.0  2.0  0.0  0.0
A  0.0  0.0  1.0  1.0  2.0  0.0  1.0  2.0
T  0.0  0.0  0.0  0.0  0.0  1.0  2.0  0.0
G  0.0  2.0  2.0  0.0  0.0  2.0  0.0  1.0
C  0.0  0.0  1.0  4.0  1.0  0.0  1.0  0.0
G  0.0  2.0  2.0  1.0  3.0  3.0  0.0  0.0
C  0.0  0.0  1.0  4.0  1.0  2.0  2.0  0.0
A  0.0  0.0  0.0  1.0  6.0  3.0  1.0  4.0
G  0.0  2.0  2.0  0.0  3.0  8.0  5.0  2.0


Using Dynamic Programing to find the most optimal local alignment.

In [2]:
# Retracing
i = nx
j = ny
# print(nx, ny)
# print(F[7, 8])
index = [0, 0]
max = 0
for ith in range(nx):
    # print(F[ith, ny], [ith, ny], end = "---")
    if (F[ith, ny] > max) :
        index = [ith, ny]
        max = F[ith, ny]
    
# print()

for jth in range (ny) : 
    # print(F[nx, jth], [nx, jth], end = "---")
    if (F[nx, jth] > max) : 
        index = [nx, jth]
        max = F[nx, jth]
# print("DEWQFCRWFVRWEFVREWFCERWDF")
# print (index, F[index[0], index[1]])
    
# # Modifying the index, so that we obtain the index from where we have to retrace.
# if (index[0] == nx) :
#     index[0] = nx - (ny - index[1])
#     index[1] = ny
    
# elif (index[1] == ny) :
#     index[1] = ny - (nx - index[0]) 
#     index[0] = nx

# print (index)
i = index[0]
j = index[1]

rx = []
ry = []
# print (x, y)
# print (i, j, end = "\n\n\n")
while i>0 or j>0:
    # print("Started", end = "...")
    if (F[i, j]) :
        if (x[i-1] == y[j-1]):
            # Case when its a match
            rx.append(x[i-1])
            ry.append(y[j-1])
            # Shifted the pointer to previous diagonal cell.
            i = i-1
            j = j-1
        # we know its a mismatch/gap, hence checking which is the most suitable nearby cell, from where the transition would have taken.
        elif (F[i-1, j-1] + mismatch == F[i, j]):
            # Case when its a mismatch
            rx.append(x[i-1])
            ry.append(y[j-1])
    
            # Shifted the pointer to previous diagonal cell.
            i = i-1
            j = j-1
        elif (F[i, j-1] + gap == F[i, j]):
            # Case of gap when transition made upwards
            rx.append("-")
            ry.append(y[j-1])
    
            # Shifting the pointer
            i = i
            j -= 1
        elif (F[i-1, j] + gap == F[i, j]):
            # Case of gap when transition made leftwards
            rx.append(x[i-1])
            ry.append("-")
            
            # Shifing the pointer.
            i -= 1
            j = j
        # print("Done", "({}, {})".format(i, j))
    else : 
        break
# Reverse the strings.
rx = ''.join(rx)[::-1]
ry = ''.join(ry)[::-1]
#Printing the best alignment in the required form.
k = random.randint(1,5)
print ("\nThe Required Local Alignment is : \n"+ k*"-" + rx + (8-k)*"-" + "\n"+ k*" " +4*"|"+"\n"+ k*"-"  + ry +  (8-k)*"-" )
print("\nAlignment Score : ", max)


The Required Local Alignment is : 
-----GCAG---
     ||||
-----GCAG---

Alignment Score :  8.0
