In [1]:
import numpy as np
import scipy.io
def TrainDOHMMBaumWelch_Single_Seq_variables(pi_init, A, B, O):
    M, N = B.shape[:]
    T = len(O)
    
    #alpha
    alpha = np.zeros((T, N))
    alpha[0] = B[O[0]].T*pi_init
    c = np.zeros(T)
    c[0] = 1./np.sum(alpha[0])
    alpha[0] = c[0]*alpha[0]
    
    for t in range(T - 1):
        for j in range(N):
            alpha[t + 1, j] = B[O[t + 1], j]*np.sum(alpha[t]*A[:, j].T)
        c[t+1] = 1./np.sum(alpha[t+1])
        alpha[t+1] = c[t+1]*alpha[t+1]
    P = -np.sum(np.log10(c))
    
    beta = np.zeros((T, N))
    beta[-1, :] = 1
    beta[-1] = beta[-1]*c[-1]
    for t in np.arange(T-2, -1, -1):
        for i in range(N):
            beta[t, i] = np.sum(A[i]*B[O[t + 1]]*beta[t+1])
        beta[t] = c[t]*beta[t]
    
    #ksi
    ksi = np.zeros((T - 1, N, N))
    for t in range(T-1):
        denum = 0
        for i in range(N):
            for j in range(N):
                denum = denum + alpha[t, i]*A[i, j]*B[O[t+1], j]*beta[t+1, j]
        for i in range(N):
            for j in range(N):
                ksi[t, i, j] = (alpha[t, i]*A[i, j]*B[O[t+1], j]*beta[t+1, j])/denum
    #gamma
    gamma = np.zeros((T - 1, N))
    for t in range(T - 1):
        for i in range(N):
            gamma[t, i] = 0
            for j in range(N):
                gamma[t, i] = gamma[t, i] + ksi[t, i, j]
    return alpha, beta, c, P, ksi, gamma

def TrainDOHMMBaumWelch_Single_Seq_reest_variables(M,N,T,O,gamma,ksi):
    Adenum = np.zeros(N)
    Anum = np.zeros((N, N))
    for i in range(N):
        for j in range(N):
            Adenum[i] = 0
            Anum[i, j] = 0
            for t in range(T-1):
                Adenum[i] = Adenum[i] + gamma[t, i]
                Anum[i, j] = Anum[i, j] + ksi[t, i, j]
    
    Bdenum = np.zeros(N)
    Bnum = np.zeros((M, N))
    for j in range(N):
        for k in range(M):
            Bdenum[j] = 0
            Bnum[k,j] = 0
            for t in range(T-1):
                Bdenum[j] = Bdenum[j] + gamma[t, j]
                if(O[t] == k):
                    Bnum[k, j] = Bnum[k, j] + gamma[t, j]
    return Anum, Adenum, Bnum, Bdenum
            
def MultSeqTrainDoHMMBWsc(pi_init, A, B, NumericData, maxEpoch):
    #M: state of data, N: state, K: number of data, T: data length
    M, N = B.shape[:]
    K = len(NumericData)
    epoch  = 1
    curProb = -10000
    AllProb = []
    
    alpha = []; beta = []; c = []; P = []; ksi = []; gamma = []; Anum = []; Adenum = []; Bnum = []; Bdenum = []
    
    while epoch <= maxEpoch:
        for k in range(K):
            O = NumericData[k]
            T = len(O)
            alpha_, beta_, c_, P_, ksi_, gamma_ = TrainDOHMMBaumWelch_Single_Seq_variables(pi_init, A, B, O)
            Anum_, Adenum_, Bnum_, Bdenum_ =TrainDOHMMBaumWelch_Single_Seq_reest_variables(M,N,T,O,gamma_,ksi_)
            alpha.append(alpha_); beta.append(beta_); P.append(P_); ksi.append(ksi_); gamma.append(gamma_)
            Anum.append(Anum_); Adenum.append(Adenum_); Bnum.append(Bnum_); Bdenum.append(Bdenum_)
        
        pi_init_hat = np.zeros(N)
        for i in range(N):
            pi_init_hat[i] = 0
            for k in range(K):
                pi_init_hat[i] = pi_init_hat[i] + gamma[k][0, i]
        pi_init_hat = pi_init_hat/np.sum(pi_init_hat)
        
        A_hat_num = np.zeros((N, N))
        A_hat_denum = np.zeros((N, N))
        A_hat = np.zeros((N, N))
        for i in range(N):
            for j in range(N):
                A_hat_num[i, j] = 0
                A_hat_denum[i, j] = 0
                for k in range(K):
                    A_hat_num[i, j] = A_hat_num[i, j] + (1./P[k])*Anum[k][i, j]
                    A_hat_denum[i, j] = A_hat_denum[i, j] + (1./P[k])*Adenum[k][i]
                A_hat[i, j] = A_hat_num[i, j]/A_hat_denum[i, j]
    
        
        B_hat_num = np.zeros((M, N))
        B_hat_denum = np.zeros((M, N))
        B_hat = np.zeros((N, N))
        for i in range(M):
            for j in range(N):
                B_hat_num[i, j] = 0
                B_hat_denum[i, j] = 0
                for k in range(K):
                    B_hat_num[i, j] = B_hat_num[i, j] + (1/P[k])*Bnum[k][i, j]
                    B_hat_denum[i, j] = B_hat_denum[i, j] + (1/P[k])*Bdenum[k][j]
                B_hat[i, j] = B_hat_num[i, j]/B_hat_denum[i, j]
            
        sumP = np.sum(P)
        if(sumP <= curProb):
            break
        else:
            AllProb.append(sumP)
            curProb = sumP
        
        pi_init=pi_init_hat
        A=A_hat
        B=B_hat
        epoch=epoch+1
        
        

    return pi_init, A, B
        

In [2]:
data = scipy.io.loadmat("DOHMMTrainingData.mat")
TrainingData = data["TrainingData"][0]
Train = []
for data in TrainingData:
    s = data[0]
    num_data = []
    for c in s:
        if c == 'H':
            num_data.append(0)
        else:
            num_data.append(1)
    Train.append(num_data)
            

In [3]:
pi_init = np.array([0.5, 0.5]).T
A_init = np.array([[0.6, 0.4], [0, 1]])
B_init = np.array([[0.6, 0], [0.4, 1]])
pi_init, A, B = MultSeqTrainDoHMMBWsc(pi_init, A_init, B_init, Train[:70], 1000)
print("pi ", pi_init)
print("A ", A)
print("B ", B)

pi  [1. 0.]
A  [[ 0.9940788  0.0059212]
 [-0.         1.       ]]
B  [[ 0.63271165 -0.        ]
 [ 0.36728835  1.        ]]
