In [431]:
import numpy as np

In [432]:
sequence_length = 3
number_states = 2
mutation_rate = 0.2

In [433]:
# create every possible sequence
sequences = np.array(np.meshgrid(*[np.arange(number_states)]*sequence_length)).T.reshape(-1, sequence_length)
print(sequences)

[[0 0 0]
 [0 1 0]
 [1 0 0]
 [1 1 0]
 [0 0 1]
 [0 1 1]
 [1 0 1]
 [1 1 1]]


In [434]:
sequences = np.array([[0, 0, 0], [0,0,1], [1,0,0], [1,1,0]])

In [435]:
# initial frequencies is vector of zeros and 1 at the first sequence
initial_frequencies = np.zeros(sequences.shape[0])
initial_frequencies[0] = 1
print(initial_frequencies)

[1. 0. 0. 0.]


In [436]:
# initial transition matrix
transition_matrix = np.zeros((sequences.shape[0], sequences.shape[0]))
# fill the transition matrix with the number of different elements in the sequences
for i in range(sequences.shape[0]):
    for j in range(sequences.shape[0]):
        transition_matrix[i, j] = np.sum(sequences[i] != sequences[j])

print(transition_matrix)

[[0. 1. 1. 2.]
 [1. 0. 2. 3.]
 [1. 2. 0. 1.]
 [2. 3. 1. 0.]]


In [437]:
# element-wise apply (mu^c)*(1-mu)^(l-c) to the transition matrix
transition_matrix = ((mutation_rate/(number_states-1))**transition_matrix)*(1-mutation_rate)**(sequence_length-transition_matrix)
print(transition_matrix)
# check sums of rows
print(np.sum(transition_matrix, axis=1))

[[0.512 0.128 0.128 0.032]
 [0.128 0.512 0.032 0.008]
 [0.128 0.032 0.512 0.128]
 [0.032 0.008 0.128 0.512]]
[0.8  0.68 0.8  0.68]


In [438]:
# normalize the rows of the transition matrix
# transition_matrix = transition_matrix/np.sum(transition_matrix, axis=0)
print(transition_matrix)
# check sums of rows
print(np.sum(transition_matrix, axis=1))

[[0.512 0.128 0.128 0.032]
 [0.128 0.512 0.032 0.008]
 [0.128 0.032 0.512 0.128]
 [0.032 0.008 0.128 0.512]]
[0.8  0.68 0.8  0.68]


In [439]:
frequencies_2 = np.dot(initial_frequencies.T, transition_matrix)
print(frequencies_2)
# check sum of frequencies
print(np.sum(frequencies_2))

[0.512 0.128 0.128 0.032]
0.8000000000000002


In [440]:
frequencies_3 = np.dot(frequencies_2.T, transition_matrix)
print(frequencies_3)
# check sum of frequencies
print(np.sum(frequencies_3))

[0.295936 0.135424 0.139264 0.050176]
0.6208000000000002


In [441]:
# initialize frequency matrix of shape (number_states, sequence_length)
frequencies = np.zeros((number_states, sequence_length))
# randomize frequencies over states that sum to 1
frequencies = np.random.rand(number_states, sequence_length)
frequencies = frequencies/np.sum(frequencies, axis=0)
print(frequencies)

[[0.3362897  0.97635517 0.26447004]
 [0.6637103  0.02364483 0.73552996]]


In [442]:
frequencies = np.array([[.3,.3,.3], [.7,.7,.7]])

In [443]:
transistion_probabilities = []
for sequence in range(sequences.shape[0]):
    probability_matrix = np.zeros((number_states, sequence_length))
    for i in range(sequence_length):
        for m in range(number_states):
            # if state is the same as the sequence state
            if sequences[sequence, i] == m:
                probability_matrix[m, i] = frequencies[m, i]*(1-mutation_rate)
            else:
                probability_matrix[m, i] = frequencies[m, i]*(mutation_rate/(number_states-1))
    print(probability_matrix)
    print(np.sum(probability_matrix, axis=0))
    print(np.prod(np.sum(probability_matrix, axis=0)))
    transistion_probabilities.append(np.prod(np.sum(probability_matrix, axis=0)))

print(transistion_probabilities)

[[0.24 0.24 0.24]
 [0.14 0.14 0.14]]
[0.38 0.38 0.38]
0.054872000000000004
[[0.24 0.24 0.06]
 [0.14 0.14 0.56]]
[0.38 0.38 0.62]
0.08952799999999998
[[0.06 0.24 0.24]
 [0.56 0.14 0.14]]
[0.62 0.38 0.38]
0.08952799999999998
[[0.06 0.06 0.24]
 [0.56 0.56 0.14]]
[0.62 0.62 0.38]
0.14607199999999995
[0.054872000000000004, 0.08952799999999998, 0.08952799999999998, 0.14607199999999995]


In [444]:
# add the probabilities as new row to the transition matrix
transition_matrix = np.vstack((transition_matrix, transistion_probabilities))
print(np.round(transition_matrix,3))

[[0.512 0.128 0.128 0.032]
 [0.128 0.512 0.032 0.008]
 [0.128 0.032 0.512 0.128]
 [0.032 0.008 0.128 0.512]
 [0.055 0.09  0.09  0.146]]


In [445]:
# add new column to the transition matrix so that the sum of the rows is 1
transition_matrix = np.hstack((transition_matrix, 1-np.sum(transition_matrix, axis=1).reshape(-1,1)))
print(np.round(transition_matrix,3))

# check sums of rows
print(np.sum(transition_matrix, axis=1))

[[0.512 0.128 0.128 0.032 0.2  ]
 [0.128 0.512 0.032 0.008 0.32 ]
 [0.128 0.032 0.512 0.128 0.2  ]
 [0.032 0.008 0.128 0.512 0.32 ]
 [0.055 0.09  0.09  0.146 0.62 ]]
[1. 1. 1. 1. 1.]


In [447]:
np.dot(np.dot(np.array([1,0,0,0,0]), transition_matrix), transition_matrix)

array([0.3069104, 0.1533296, 0.1571696, 0.0793904, 0.3032   ])