In [1]:
import numpy
import pandas


def readBLOSUM62():

    text_file = open("blosum62.txt", "r")
    A = text_file.read().split("\n")
    blosum62 = numpy.zeros([23, 23])
    for i in range(1,24):
        lineSplit = A[i].split("\t")
        for j in range(1,24):
            blosum62[i-1,j-1] = lineSplit[j]

    # The first 20rows/columns are amino acids and the last 3 represent unknowns
    names = ["A","R","N","D","C","Q","E","G","H","I","L","K","M","F","P","S","T","W","Y","V","B","Z","X"]
    BLOSUM_df = pandas.DataFrame(blosum62, index=names, columns=names)
    
    return BLOSUM_df



# Read in the matrix from the blosum62.txt file. 
BLOSUM62 = readBLOSUM62()

# Get the score for pair [F,P]
BLOSUM62["F"]["P"]

# c.

def matrix1(x, y, mt, gp):
    matrix = []
    d = gp
    for i in range(len(x)+1):
        sm = []
        for j in range(len(y)+1):
            sm.append(0)
        matrix.append(sm)
    for i in range(1, len(x)+1):
        for j in range(1, len(y)+1):
            xi = x[i-1]
            yj = y[j-1]
            m = matrix[i-1][j-1] + mt[xi][yj]
            delete = matrix[i-1][j] + d
            insert = matrix[i][j-1] + d
            matrix[i][j] = max(m, delete, insert)
    return matrix

def alignment(x, y, matrix, gp):
    d = gp
    align_x = ""
    align_y = ""
    m = 0
    c = 0
    r = 0
    for i in range(len(x)+1):
        if max(matrix[i]) > m:
            m = max(matrix[i])
            c = matrix[i].index(m)
            r = i
    while r > 0 or c > 0:
        if r > 0 and c > 0 and matrix[r][c] == matrix[r-1][c-1] + BLOSUM62[x[r-1]][y[c-1]]:
            align_x = x[r-1] + align_x
            align_y = y[c-1] + align_y
            r = r - 1
            c = c - 1
        elif r > 0 and matrix[r][c] == matrix[r-1][c] + d:
            align_x = x[r-1] + align_x
            align_y = "-" + align_y
            r = r-1
        else:
            align_x = "-" + align_x
            align_y = y[c-1] + align_y
            c = c - 1
    return align_x, align_y, m



a. The F matrix lines have to change from F(i,0) <- d * i to F(i,0) = 0. Same goes for F(0,j). The other change is i and j for the backtrack and form alignment section. Instead of the whole lengths of A and B, the length is from the largest number's index in the matrix. 

b. It would be different due to overlap alignments being local alignments. 

d. 

N1J540:
MHPAQLSKLLSIPDSHAQEILDYAATLSKDEAVNHFREFLGNSLEMRNFISAYELQREGSMTSQNTGAPANVTRISNKNTASHKSMSKGVPATSTSKISQKKMTQNDYTAASKKTQFGTSTHSSNPSGPIAHPSNLYPGLNQSKEKNARNGSPAFPNKVKIAISGGQSMHGTSTTISEIEAAIRSLEISTNSSLSSQDPSKRACNCIATQHPLLTAAPNCLSCGKVICVKEGFGPCTYCGEPLLSAVEVQKMISVLREDCGREKMLANDQRQKHATASSNSKPFPQSQPINTQISRAELEARTHRDKLLAFQAQNAKRTTVRDEVAEVNVDLAAVERDMIWATPVERARALKKQQKLLQEQEWNARPEYERKRMVVSLNVVGGKVVKNIGRTERRPQADLIAAAEKSSLEKEEMEPVAQEQGHSTQVFRRNPLELQQQKKPAYTWRRVQDNQDDNESYILDGGLKGREVD

H0Y8P2:
XILESPEHLPSNFLAQPVNDSAPHPESDATCQQPVKQMRLKKAIHLKKLNFLKSQKYAEQVSEPKSDDGLTKRLESASKNTLEKASSQSAEEKESEEVVSCENFNCISETERPEDPAALEDQSQTLQSQRQYACELCGKPFKHPSNLELHKRSHTGKCFGGSGDLRRHVRTHTG


e.

In [2]:
print("N1J540 and zinc-finger")
x = "MHPAQLSKLLSIPDSHAQEILDYAATLSKDEAVNHFREFLGNSLEMRNFISAYELQREGSMTSQNTGAPANVTRISNKNTASHKSMSKGVPATSTSKISQKKMTQNDYTAASKKTQFGTSTHSSNPSGPIAHPSNLYPGLNQSKEKNARNGSPAFPNKVKIAISGGQSMHGTSTTISEIEAAIRSLEISTNSSLSSQDPSKRACNCIATQHPLLTAAPNCLSCGKVICVKEGFGPCTYCGEPLLSAVEVQKMISVLREDCGREKMLANDQRQKHATASSNSKPFPQSQPINTQISRAELEARTHRDKLLAFQAQNAKRTTVRDEVAEVNVDLAAVERDMIWATPVERARALKKQQKLLQEQEWNARPEYERKRMVVSLNVVGGKVVKNIGRTERRPQADLIAAAEKSSLEKEEMEPVAQEQGHSTQVFRRNPLELQQQKKPAYTWRRVQDNQDDNESYILDGGLKGREVD"
y = "YECENCAKVFTDPSNLQ"
matrix = matrix1(y,x,BLOSUM62,-8)
x_align, y_align, score = alignment(y,x,matrix,-8)

print(x_align)
print()
print(y_align)
print("Score = {:.0f}".format(score))

x = "XILESPEHLPSNFLAQPVNDSAPHPESDATCQQPVKQMRLKKAIHLKKLNFLKSQKYAEQVSEPKSDDGLTKRLESASKNTLEKASSQSAEEKESEEVVSCENFNCISETERPEDPAALEDQSQTLQSQRQYACELCGKPFKHPSNLELHKRSHTGKCFGGSGDLRRHVRTHTG"
y = "YECENCAKVFTDPSNLQ"
matrix = matrix1(y,x,BLOSUM62,-8)
x_align, y_align, score = alignment(y,x,matrix,-8)
print()
print("H0Y8P2 and zinc-finger")
print(x_align)
print()
print(y_align)
print("Score = {:.0f}".format(score))

N1J540 and zinc-finger
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YECENCAKV

MHPAQLSKLLSIPDSHAQEILDYAATLSKDEAVNHFREFLGNSLEMRNFISAYELQREGSMTSQNTGAPANVTRISNKNTASHKSMSKGVPATSTSKISQKKMTQNDYTAASKKTQFGTSTHSSNPSGPIAHPSNLYPGLNQSKEKNARNGSPAFPNKVKIAISGGQSMHGTSTTISEIEAAIRSLEISTNSSLSSQDPSKRACNCIATQHPLLTAAPNCLSCGKV
Score = 22

H0Y8P2 and zinc-finger
-----------------------------------------------------------------------------------------------------------------------------------YECENCAKVFTDPSNLQ

XILESPEHLPSNFLAQPVNDSAPHPESDATCQQPVKQMRLKKAIHLKKLNFLKSQKYAEQVSEPKSDDGLTKRLESASKNTLEKASSQSAEEKESEEVVSCENFNCISETERPEDPAALEDQSQTLQSQRQYACELCGKPFKHPSNLE
Score = 56


f. Based on the scores of each alignment, I believe that H0Y8P2 has a zinc-finger. This is because of the alignment score being higher for H0Y8P2 (score = 56) compared to N1J540 (score = 22). The residues that contain the zinc-finger in H0Y8P2 are YACELCGKPFKHPSNLE, which is quite similar to YECENCAKVFTDPSNLQ. 