In [29]:
import numpy as np

In [30]:
sequence_length = 3
number_states = 4
mutation_rate = 0.3

In [31]:
# create every possible sequence
sequences = np.array(np.meshgrid(*[np.arange(number_states)]*sequence_length)).T.reshape(-1, sequence_length)
print(sequences)

[[0 0 0]
 [0 1 0]
 [0 2 0]
 [0 3 0]
 [1 0 0]
 [1 1 0]
 [1 2 0]
 [1 3 0]
 [2 0 0]
 [2 1 0]
 [2 2 0]
 [2 3 0]
 [3 0 0]
 [3 1 0]
 [3 2 0]
 [3 3 0]
 [0 0 1]
 [0 1 1]
 [0 2 1]
 [0 3 1]
 [1 0 1]
 [1 1 1]
 [1 2 1]
 [1 3 1]
 [2 0 1]
 [2 1 1]
 [2 2 1]
 [2 3 1]
 [3 0 1]
 [3 1 1]
 [3 2 1]
 [3 3 1]
 [0 0 2]
 [0 1 2]
 [0 2 2]
 [0 3 2]
 [1 0 2]
 [1 1 2]
 [1 2 2]
 [1 3 2]
 [2 0 2]
 [2 1 2]
 [2 2 2]
 [2 3 2]
 [3 0 2]
 [3 1 2]
 [3 2 2]
 [3 3 2]
 [0 0 3]
 [0 1 3]
 [0 2 3]
 [0 3 3]
 [1 0 3]
 [1 1 3]
 [1 2 3]
 [1 3 3]
 [2 0 3]
 [2 1 3]
 [2 2 3]
 [2 3 3]
 [3 0 3]
 [3 1 3]
 [3 2 3]
 [3 3 3]]


In [32]:
sequences = np.array([[0, 0, 0], [0,0,2], [1,3,0], [2,1,1]])

In [33]:
# initial frequencies is vector of zeros and 1 at the first sequence
initial_frequencies = np.zeros(sequences.shape[0])
initial_frequencies[0] = 1
print(initial_frequencies)

[1. 0. 0. 0.]


In [34]:
# initial transition matrix
transition_matrix = np.zeros((sequences.shape[0], sequences.shape[0]))
# fill the transition matrix with the number of different elements in the sequences
for i in range(sequences.shape[0]):
    for j in range(sequences.shape[0]):
        transition_matrix[i, j] = np.sum(sequences[i] != sequences[j])

print(transition_matrix)

[[0. 1. 2. 3.]
 [1. 0. 3. 3.]
 [2. 3. 0. 3.]
 [3. 3. 3. 0.]]


In [35]:
# element-wise apply (mu^c)*(1-mu)^(l-c) to the transition matrix
transition_matrix = ((mutation_rate/(number_states-1))**transition_matrix)*(1-mutation_rate)**(sequence_length-transition_matrix)
print(transition_matrix)
# check sums of rows
print(np.sum(transition_matrix, axis=0))

[[0.343 0.049 0.007 0.001]
 [0.049 0.343 0.001 0.001]
 [0.007 0.001 0.343 0.001]
 [0.001 0.001 0.001 0.343]]
[0.4   0.394 0.352 0.346]


In [36]:
# normalize the rows of the transition matrix
transition_matrix = transition_matrix/np.sum(transition_matrix, axis=0)
print(transition_matrix)
# check sums of rows
print(np.sum(transition_matrix, axis=0))

[[0.8575     0.12436548 0.01988636 0.00289017]
 [0.1225     0.87055838 0.00284091 0.00289017]
 [0.0175     0.00253807 0.97443182 0.00289017]
 [0.0025     0.00253807 0.00284091 0.99132948]]
[1. 1. 1. 1.]


In [37]:
frequencies_2 = np.dot(initial_frequencies.T, transition_matrix)
print(frequencies_2)
# check sum of frequencies
print(np.sum(frequencies_2))

[0.8575     0.12436548 0.01988636 0.00289017]
1.0046420192802705


In [39]:
frequencies_3 = np.dot(frequencies_2.T, transition_matrix)
print(frequencies_3)
# check sum of frequencies
print(np.sum(frequencies_3))

[0.75089626 0.21496862 0.03679198 0.00576035]
1.0084172147483697
