In [1]:
import pandas as pd
import numpy as np

## Baum -Welch algorithm

source http://www.adeveloperdiary.com/data-science/machine-learning/derivation-and-implementation-of-baum-welch-algorithm-for-hidden-markov-model/

In [2]:
def forward(V, a, b, initial_distribution):
    alpha = np.zeros((V.shape[0], a.shape[0]))
    alpha[0, :] = initial_distribution * b[:, V[0]]
 
    for t in range(1, V.shape[0]):
        for j in range(a.shape[0]):
            # Matrix Computation Steps
            #                  ((1x2) . (1x2))      *     (1)
            #                        (1)            *     (1)
            alpha[t, j] = alpha[t - 1] @ a[:, j] * b[j, V[t]]
 
    return alpha

In [3]:
def backward(V, a, b):
    beta = np.zeros((V.shape[0], a.shape[0]))
 
    # setting beta(T) = 1
    beta[V.shape[0] - 1] = np.ones((a.shape[0]))
 
    # Loop in backward way from T-1 to
    # Due to python indexing the actual loop will be T-2 to 0
    for t in range(V.shape[0] - 2, -1, -1):
        for j in range(a.shape[0]):
            beta[t, j] = (beta[t + 1] * b[:, V[t + 1]]) @ a[j, :]
 
    return beta

In [4]:
def baum_welch(V, a, b, initial_distribution, n_iter=100):
    M = a.shape[0]
    T = len(V)

    for n in range(n_iter):
        ###estimation step
        alpha = forward(V, a, b, initial_distribution)
        beta = backward(V, a, b)

        xi = np.zeros((M, M, T - 1))
        for t in range(T - 1):
            # joint probab of observed data up to time t @ transition prob * emisssion prob as t+1 @
            # joint probab of observed data from time t+1
            denominator = (alpha[t, :].T @ a * b[:, V[t + 1]].T) @ beta[t + 1, :]
            for i in range(M):
                numerator = alpha[t, i] * a[i, :] * b[:, V[t + 1]].T * beta[t + 1, :].T
                xi[i, :, t] = numerator / denominator

        gamma = np.sum(xi, axis=1)
        ### maximization step
        a = np.sum(xi, 2) / np.sum(gamma, axis=1).reshape((-1, 1))

        # Add additional T'th element in gamma
        gamma = np.hstack((gamma, np.sum(xi[:, :, T - 2], axis=0).reshape((-1, 1))))

        K = b.shape[1]
        denominator = np.sum(gamma, axis=1)
        for l in range(K):
            b[:, l] = np.sum(gamma[:, V == l], axis=1)

        b = np.divide(b, denominator.reshape((-1, 1)))

    return a, b

## Usage

In [5]:
data = pd.read_csv('data_python.csv.txt')
 
V = data['Visible'].values
 
# Transition Probabilities
a = np.ones((2, 2))
a = a / np.sum(a, axis=1)
 
# Emission Probabilities
b = np.array(((1, 3, 5), (2, 4, 6)))
b = b / np.sum(b, axis=1).reshape((-1, 1))
 
# Equal Probabilities for the initial distribution
initial_distribution = np.array((0.5, 0.5))
 
print(baum_welch(V, a, b, initial_distribution, n_iter=100))

(array([[0.53816345, 0.46183655],
       [0.48664443, 0.51335557]]), array([[0.16277513, 0.26258073, 0.57464414],
       [0.2514996 , 0.27780971, 0.47069069]]))


In [6]:
a

array([[0.5, 0.5],
       [0.5, 0.5]])

In [7]:
b

array([[ 41.2       ,  67.5       , 137.89473684],
       [ 61.8       ,  67.5       , 124.10526316]])

In [8]:
data

Unnamed: 0,Hidden,Visible
0,B,0
1,B,1
2,B,2
3,B,2
4,B,2
...,...,...
495,A,1
496,A,2
497,B,2
498,A,1
