In [2]:
import numpy as np
import matplotlib.pyplot as plt
import biotite.sequence as seq
import biotite.sequence.align as align
import biotite.sequence.phylo as phylo
import biotite.sequence.graphics as graphics

# Obtain BLOSUM62
matrix = align.SubstitutionMatrix.std_protein_matrix()
print(matrix)

    A   C   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   T   V   W   Y   B   Z   X   *
A   4   0  -2  -1  -2   0  -2  -1  -1  -1  -1  -2  -1  -1  -1   1   0   0  -3  -2  -2  -1   0  -4
C   0   9  -3  -4  -2  -3  -3  -1  -3  -1  -1  -3  -3  -3  -3  -1  -1  -1  -2  -2  -3  -3  -2  -4
D  -2  -3   6   2  -3  -1  -1  -3  -1  -4  -3   1  -1   0  -2   0  -1  -3  -4  -3   4   1  -1  -4
E  -1  -4   2   5  -3  -2   0  -3   1  -3  -2   0  -1   2   0   0  -1  -2  -3  -2   1   4  -1  -4
F  -2  -2  -3  -3   6  -3  -1   0  -3   0   0  -3  -4  -3  -3  -2  -2  -1   1   3  -3  -3  -1  -4
G   0  -3  -1  -2  -3   6  -2  -4  -2  -4  -3   0  -2  -2  -2   0  -2  -3  -2  -3  -1  -2  -1  -4
H  -2  -3  -1   0  -1  -2   8  -3  -1  -3  -2   1  -2   0   0  -1  -2  -3  -2   2   0   0  -1  -4
I  -1  -1  -3  -3   0  -4  -3   4  -3   2   1  -3  -3  -3  -3  -2  -1   3  -3  -1  -3  -3  -1  -4
K  -1  -3  -1   1  -3  -2  -1  -3   5  -2  -1   0  -1   1   2   0  -1  -2  -3  -2   0   1  -1  -4
L  -1  -1  -4  -3   

In [3]:
matrix = align.SubstitutionMatrix(
    seq.Alphabet(matrix.get_alphabet1().get_symbols()[:-4]),
    seq.Alphabet(matrix.get_alphabet2().get_symbols()[:-4]),
    matrix.score_matrix()[:-4, :-4]
)
similarities = matrix.score_matrix()
print(matrix)

    A   C   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   T   V   W   Y
A   4   0  -2  -1  -2   0  -2  -1  -1  -1  -1  -2  -1  -1  -1   1   0   0  -3  -2
C   0   9  -3  -4  -2  -3  -3  -1  -3  -1  -1  -3  -3  -3  -3  -1  -1  -1  -2  -2
D  -2  -3   6   2  -3  -1  -1  -3  -1  -4  -3   1  -1   0  -2   0  -1  -3  -4  -3
E  -1  -4   2   5  -3  -2   0  -3   1  -3  -2   0  -1   2   0   0  -1  -2  -3  -2
F  -2  -2  -3  -3   6  -3  -1   0  -3   0   0  -3  -4  -3  -3  -2  -2  -1   1   3
G   0  -3  -1  -2  -3   6  -2  -4  -2  -4  -3   0  -2  -2  -2   0  -2  -3  -2  -3
H  -2  -3  -1   0  -1  -2   8  -3  -1  -3  -2   1  -2   0   0  -1  -2  -3  -2   2
I  -1  -1  -3  -3   0  -4  -3   4  -3   2   1  -3  -3  -3  -3  -2  -1   3  -3  -1
K  -1  -3  -1   1  -3  -2  -1  -3   5  -2  -1   0  -1   1   2   0  -1  -2  -3  -2
L  -1  -1  -4  -3   0  -4  -3   2  -2   4   2  -3  -3  -2  -2  -2  -1   1  -2  -1
M  -1  -1  -3  -2   0  -3  -2   1  -1   2   5  -2  -2   0  -1  -1  -1   1  -1  -1
N  -2  -3   1   

In [4]:
def get_distance(similarities, i, j):
    s_max = (similarities[i,i] + similarities[j,j]) / 2
    return s_max - similarities[i,j]

distances = np.zeros(similarities.shape)
for i in range(distances.shape[0]):
    for j in range(distances.shape[1]):
        distances[i,j] = get_distance(similarities, i, j)

In [5]:
print(distances)

[[ 0.   6.5  7.   5.5  7.   5.   8.   5.   5.5  5.   5.5  7.   6.5  5.5
   5.5  3.   4.5  4.  10.5  7.5]
 [ 6.5  0.  10.5 11.   9.5 10.5 11.5  7.5 10.   7.5  8.  10.5 11.  10.
  10.   7.5  8.   7.5 12.  10. ]
 [ 7.  10.5  0.   3.5  9.   7.   8.   8.   6.5  9.   8.5  5.   7.5  5.5
   7.5  5.   6.5  8.  12.5  9.5]
 [ 5.5 11.   3.5  0.   8.5  7.5  6.5  7.5  4.   7.5  7.   5.5  7.   3.
   5.   4.5  6.   6.5 11.   8. ]
 [ 7.   9.5  9.   8.5  0.   9.   8.   5.   8.5  5.   5.5  9.  10.5  8.5
   8.5  7.   7.5  6.   7.5  3.5]
 [ 5.  10.5  7.   7.5  9.   0.   9.   9.   7.5  9.   8.5  6.   8.5  7.5
   7.5  5.   7.5  8.  10.5  9.5]
 [ 8.  11.5  8.   6.5  8.   9.   0.   9.   7.5  9.   8.5  6.   9.5  6.5
   6.5  7.   8.5  9.  11.5  5.5]
 [ 5.   7.5  8.   7.5  5.   9.   9.   0.   7.5  2.   3.5  8.   8.5  7.5
   7.5  6.   5.5  1.  10.5  6.5]
 [ 5.5 10.   6.5  4.   8.5  7.5  7.5  7.5  0.   6.5  6.   5.5  7.   4.
   3.   4.5  6.   6.5 11.   8. ]
 [ 5.   7.5  9.   7.5  5.   9.   9.   2.   6.5  0.   2.5  

In [11]:
print(similarities)
print(distances.shape)
get_distance(similarities, 0, 1)

[[ 4  0 -2 -1 -2  0 -2 -1 -1 -1 -1 -2 -1 -1 -1  1  0  0 -3 -2]
 [ 0  9 -3 -4 -2 -3 -3 -1 -3 -1 -1 -3 -3 -3 -3 -1 -1 -1 -2 -2]
 [-2 -3  6  2 -3 -1 -1 -3 -1 -4 -3  1 -1  0 -2  0 -1 -3 -4 -3]
 [-1 -4  2  5 -3 -2  0 -3  1 -3 -2  0 -1  2  0  0 -1 -2 -3 -2]
 [-2 -2 -3 -3  6 -3 -1  0 -3  0  0 -3 -4 -3 -3 -2 -2 -1  1  3]
 [ 0 -3 -1 -2 -3  6 -2 -4 -2 -4 -3  0 -2 -2 -2  0 -2 -3 -2 -3]
 [-2 -3 -1  0 -1 -2  8 -3 -1 -3 -2  1 -2  0  0 -1 -2 -3 -2  2]
 [-1 -1 -3 -3  0 -4 -3  4 -3  2  1 -3 -3 -3 -3 -2 -1  3 -3 -1]
 [-1 -3 -1  1 -3 -2 -1 -3  5 -2 -1  0 -1  1  2  0 -1 -2 -3 -2]
 [-1 -1 -4 -3  0 -4 -3  2 -2  4  2 -3 -3 -2 -2 -2 -1  1 -2 -1]
 [-1 -1 -3 -2  0 -3 -2  1 -1  2  5 -2 -2  0 -1 -1 -1  1 -1 -1]
 [-2 -3  1  0 -3  0  1 -3  0 -3 -2  6 -2  0  0  1  0 -3 -4 -2]
 [-1 -3 -1 -1 -4 -2 -2 -3 -1 -3 -2 -2  7 -1 -2 -1 -1 -2 -4 -3]
 [-1 -3  0  2 -3 -2  0 -3  1 -2  0  0 -1  5  1  0 -1 -2 -2 -1]
 [-1 -3 -2  0 -3 -2  0 -3  2 -2 -1  0 -2  1  5 -1 -1 -3 -3 -2]
 [ 1 -1  0  0 -2  0 -1 -2  0 -2 -1  1 -1  0 -1  4  1 -2

6.5