In [1]:
import numpy as np

In [2]:
states = ('NNP', 'MD', 'VB', 'JJ', 'NN', 'RB', 'DT')
pi = (0.2767, 0.0006, 0.0031, 0.0453, 0.0449, 0.0510, 0.2026)
A = np.array([
    (0.3777, 0.0110, 0.0009, 0.0084, 0.0584, 0.0090, 0.0025),
    (0.0008, 0.0002, 0.7968, 0.0005, 0.0008, 0.1698, 0.0041),
    (0.0322, 0.0005, 0.0050, 0.0837, 0.0615, 0.0514, 0.2231),
    (0.0366, 0.0004, 0.0001, 0.0733, 0.4509, 0.0036, 0.0036),
    (0.0096, 0.0176, 0.0014, 0.0086, 0.1216, 0.0177, 0.0068),
    (0.0068, 0.0102, 0.1011, 0.1012, 0.0120, 0.0728, 0.0479),
    (0.1147, 0.0021, 0.0002, 0.2157, 0.4744, 0.0102, 0.0017),
])
B = np.array([
    (0.000032, 0, 0, 0.000048, 0),
    (0, 0.308431, 0, 0, 0),
    (0, 0.000028, 0.000672, 0, 0.000028),
    (0, 0, 0.000340, 0, 0),
    (0, 0.000200, 0.000223, 0, 0.002337),
    (0, 0, 0.010446, 0, 0),
    (0, 0, 0, 0.010446, 0),
])
print(B.shape)

(7, 5)


In [3]:
def viterbi(observations, A, B, pi):
    """
    -------
    V : numpy.ndarray
        V [s][t] = Maximum probability of an observation sequence ending
                   at time 't' with final state 's'
    prev : numpy.ndarray
        Contains a pointer to the previous state at t-1 that maximizes
        V[state][t]
        
    V对应δ，prev对应ψ
    """
    N = A.shape[0]
    T = len(observations)
    prev = np.zeros((T - 1, N), dtype=int)

    # DP matrix containing max likelihood of state at a given time
    V = np.zeros((N, T))
    V[:,0] = pi * B[:,obs_seq[0]]

    for t in range(1, T):
        for n in range(N):
            seq_probs = V[:,t-1] * A[:,n] * B[n, observations[t]]
            prev[t-1,n] = np.argmax(seq_probs)
            V[n,t] = np.max(seq_probs)

    reversed_path = []
    state_ptr = np.argmax(V[:,-1])  # end state
    reversed_path.append(state_ptr)
    for ptrs in reversed(prev):
        state_ptr = ptrs[state_ptr]
        reversed_path.append(state_ptr)
    path_idx = reversed(reversed_path)

    return path_idx, [states[x] for x in path_idx]

In [4]:
observations = 'Janet/NNP will/MD back/VB the/DT bill/NN'.split()
obs_seq = range(len(observations))
x, y = viterbi(obs_seq, A, B, pi)
print(', '.join(y))

NNP, MD, VB, DT, NN
