In [19]:
import numpy as np

In [20]:
start_probs = [0.5, 0.5]

transition = np.array([[0.8,0.2],
                       [0.2,0.8]])

emission = np.array([[0.1,0.2,0.7],
                     [0.7,0.2,0.1]])

states = ['H','C']
states_dic = {'H':0, 'C':1}
sequence_syms = {'1':0,'2':1,'3':2}
sequence = ['1','2','3']

test_sequence = '3311'
test_sequence = [x for x in test_sequence]

In [21]:
def forward_probs():
    
    node_values_fwd = np.zeros((len(states), len(test_sequence)))

    for i, sequence_val in enumerate(test_sequence):
        for j in range(len(states)):
            # if first sequence value then do this
            if (i == 0):
                node_values_fwd[j, i] = start_probs[j] * emission[j, sequence_syms[sequence_val]]
            # else perform this
            else:
                values = [node_values_fwd[k, i - 1] * emission[j, sequence_syms[sequence_val]] * transition[k, j] for k in
                          range(len(states))]
                node_values_fwd[j, i] = sum(values)

    end_state = node_values_fwd[:, -1]
    end_state_val = sum(end_state)
    return node_values_fwd, end_state_val

In [22]:
def backward_probs():
    
    node_values_bwd = np.zeros((len(states), len(test_sequence)))

    for i in range(1,len(test_sequence)+1):
        for j in range(len(states)):
            
            if (-i == -1):
                node_values_bwd[j, -i] = 1
            
            else:
                values = [node_values_bwd[k, -i+1] * emission[k, sequence_syms[test_sequence[-i+1]]] * transition[j, k] for k in range(len(states))]
                node_values_bwd[j, -i] = sum(values)

    start_state = [node_values_bwd[m,0] * emission[m, sequence_syms[test_sequence[0]]] for m in range(len(states))]
    start_state = np.multiply(start_state, start_probs)
    start_state_val = sum(start_state)
    return node_values_bwd, start_state_val

In [23]:
def si_probs(forward, backward, forward_val):

    si_probabilities = np.zeros((len(states), len(test_sequence)-1, len(states)))

    for i in range(len(test_sequence)-1):
        for j in range(len(states)):
            for k in range(len(states)):
                si_probabilities[j,i,k] = ( forward[j,i] * backward[k,i+1] * transition[j,k] * emission[k,sequence_syms[test_sequence[i+1]]] )\
                                                    / forward_val
                # print('Index:',i)
                # print('State 1:',j)
                # print('State 2:',k)
                # print('Alpha:',forward[j][i])
                # print('Transition:',transition[j,k])
                # print('Emission:', emission[k,sequence_syms[test_sequence[i+1]]])
                # print('Beta:',backward[k,i+1])
                # print()
                
    return si_probabilities

def gamma_probs(forward, backward, forward_val):

    gamma_probabilities = np.zeros((len(states), len(test_sequence)))

    for i in range(len(test_sequence)):
        for j in range(len(states)):
            gamma_probabilities[j, i] = (forward[j, i] * backward[j, i]) / forward_val

    return gamma_probabilities

In [24]:
#performing iterations until convergence

for iteration in range(100):

    # Calling probability functions to calculate all probabilities
    fwd_probs, fwd_val = forward_probs()
    bwd_probs, bwd_val = backward_probs()
    si_probabilities = si_probs(fwd_probs, bwd_probs, fwd_val)
    gamma_probabilities = gamma_probs(fwd_probs, bwd_probs, fwd_val)
    print('Gamma:')
    print(gamma_probabilities)
    print()
    print('Shi Matrix:')
    print(si_probabilities)
    print()

    #calculating 'a' and 'b' matrices
    a = np.zeros((len(states), len(states)))
    b = np.zeros((len(states), len(sequence_syms)))

    #'a' matrix
    for j in range(len(states)):
        for i in range(len(states)):
            for t in range(len(test_sequence)-1):
                a[j,i] = a[j,i] + si_probabilities[j,t,i]

            denomenator_a = [si_probabilities[j, t_x, i_x] for t_x in range(len(test_sequence) - 1) for i_x in range(len(states))]
            denomenator_a = sum(denomenator_a)

            if (denomenator_a == 0):
                a[j,i] = 0
            else:
                a[j,i] = a[j,i]/denomenator_a

    #'b' matrix
    for j in range(len(states)): #states
        for i in range(len(sequence)): #seq
            indices = [idx for idx, val in enumerate(test_sequence) if val == sequence[i]]
            numerator_b = sum( gamma_probabilities[j,indices] )
            denomenator_b = sum( gamma_probabilities[j,:] )

            if (denomenator_b == 0):
                b[j,i] = 0
            else:
                b[j, i] = numerator_b / denomenator_b


    print('\nMatrix a:\n')
    print(np.matrix(a.round(decimals=4)))
    print('\nMatrix b:\n')
    print(np.matrix(b.round(decimals=4)))

    transition = a
    emission = b

    new_fwd_temp, new_fwd_temp_val = forward_probs()
    print('New forward probability: ', new_fwd_temp_val)
    diff =  np.abs(fwd_val - new_fwd_temp_val)
    print('Difference in forward probability: ', diff)

    if (diff < 0.0000001):
        break


c = 1

Gamma:
[[0.91517721 0.8470622  0.1529378  0.08482279]
 [0.08482279 0.1529378  0.8470622  0.91517721]]

Shi Matrix:
[[[0.81785316 0.09732405]
  [0.15089367 0.69616853]
  [0.05561374 0.09732405]]

 [[0.02920904 0.05561374]
  [0.00204413 0.15089367]
  [0.02920904 0.81785316]]]


Matrix a:

[[0.5349 0.4651]
 [0.0557 0.9443]]

Matrix b:

[[0.1189 0.     0.8811]
 [0.8811 0.     0.1189]]
New forward probability:  0.10045509699911193
Difference in forward probability:  0.07677749699911192
Gamma:
[[0.94258319 0.78347514 0.06406184 0.01599853]
 [0.05741681 0.21652486 0.93593816 0.98400147]]

Shi Matrix:
[[[0.77261293 0.16997026]
  [0.06309279 0.72038235]
  [0.00860403 0.0554578 ]]

 [[0.01086221 0.0465546 ]
  [0.00096904 0.21555582]
  [0.00739449 0.92854367]]]


Matrix a:

[[0.4716 0.5284]
 [0.0159 0.9841]]

Matrix b:

[[0.0443 0.     0.9557]
 [0.8751 0.     0.1249]]
New forward probability:  0.11751065896362725
Difference in forward probability:  0.017055561964515326
Gamma:
[[0.94821697 0.75228