In [1]:
from __future__ import print_function
import tensorflow as tf
import numpy as np

In [1]:
class HMM(object):
    """
    A class for Hidden Markov Models.

    The model attributes are:
    - K :: the number of states
    - P :: the K by K transition matrix (from state i to state j,
        (i, j) in [1..K])
    - p0 :: the initial distribution (defaults to starting in state 0)
    """

    def __init__(self, P, p0=None):
        self.K = P.shape[0]

        self.P = P
        self.logP = np.log(self.P)

        if p0 is None:
            self.p0 = np.ones(self.K)
            self.p0 /= sum(self.p0)
        elif len(p0) != self.K:
            raise ValueError(
                'dimensions of p0 {} must match P[0] {}'.format(
                    p0.shape, P.shape[0]))
        else:
            self.p0 = p0
        self.logp0 = np.log(self.p0)


class HMMNumpy(HMM):

    def forward_backward(self, y):
        # set up
        nT = y.shape[0]
        posterior = np.zeros((nT, self.K))
        forward = np.zeros((nT + 1, self.K))
        backward = np.zeros((nT + 1, self.K))

        # forward pass
        forward[0, :] = 1.0 / self.K
        for t in range(nT):
            tmp = np.multiply(
                np.matmul(forward[t, :], self.P),
                y[t]
            )

            forward[t + 1, :] = tmp / np.sum(tmp)

        # backward pass
        backward[-1, :] = 1.0
        for t in range(nT, 0, -1):
            tmp = np.matmul(
                np.matmul(
                    self.P, np.diag(y[t - 1])
                ),
                backward[t, :].transpose()
            ).transpose()

            backward[t - 1, :] = tmp / np.sum(tmp)

        # remove initial/final probabilities
        forward = forward[1:, :]
        backward = backward[:-1, :]

        # combine and normalize
        posterior = np.array(forward) * np.array(backward)
        # [:,None] expands sum to be correct size
        posterior = posterior / np.sum(posterior, 1)[:, None]

        return posterior, forward, backward

    def _viterbi_partial_forward(self, scores):
        tmpMat = np.zeros((self.K, self.K))
        for i in range(self.K):
            for j in range(self.K):
                tmpMat[i, j] = scores[i] + self.logP[i, j]
        return tmpMat

    def viterbi_decode(self, y):
        y = np.array(y)

        nT = y.shape[0]

        pathStates = np.zeros((nT, self.K), dtype=np.int)
        pathScores = np.zeros((nT, self.K))

        # initialize
        pathScores[0] = self.logp0 + np.log(y[0])

        for t, yy in enumerate(y[1:]):
            # propagate forward
            tmpMat = self._viterbi_partial_forward(pathScores[t])

            # the inferred state
            pathStates[t + 1] = np.argmax(tmpMat, 0)
            pathScores[t + 1] = np.max(tmpMat, 0) + np.log(yy)

        # now backtrack viterbi to find states
        s = np.zeros(nT, dtype=np.int)
        s[-1] = np.argmax(pathScores[-1])
        for t in range(nT - 1, 0, -1):
            s[t - 1] = pathStates[t, s[t]]

        return s, pathScores


class HMMTensorflow(HMM):

    def forward_backward(self, y):
        """
        runs forward backward algorithm on state probabilities y

        Arguments
        ---------
        y : np.array : shape (T, K) where T is number of timesteps and
            K is the number of states

        Returns
        -------
        (posterior, forward, backward)
        posterior : list of length T of tensorflow graph nodes representing
            the posterior probability of each state at each time step
        forward : list of length T of tensorflow graph nodes representing
            the forward probability of each state at each time step
        backward : list of length T of tensorflow graph nodes representing
            the backward probability of each state at each time step
        """
        # set up
        nT = y.shape[0]

        posterior = np.zeros((nT, self.K))
        forward = []
        backward = np.zeros((nT + 1, self.K))

        # forward pass
        forward.append(
            tf.ones((1, self.K), dtype=tf.float64) * (1.0 / self.K)
        )
        for t in range(nT):
            # NOTE: np.matrix expands forward[t, :] into 2d and causes * to be
            # matrix multiplies instead of element wise that an array would be
            tmp = tf.mul(
                tf.matmul(forward[t], self.P),
                y[t]
            )
            tmp = tf.squeeze(tmp)
            forward.append(tmp / tf.reduce_sum(tmp))

        # backward pass
        backward = [None] * (nT + 1)
        backward[-1] = tf.ones((1, self.K), dtype=tf.float64) * (1.0 / self.K)
        for t in range(nT, 0, -1):
            tmp = tf.transpose(
                tf.matmul(
                    tf.matmul(self.P, tf.diag(y[t - 1])),
                    tf.transpose(backward[t])
                )
            )
            tmp = tf.squeeze(tmp)
            backward[t - 1] = tmp / tf.reduce_sum(tmp)

        # remove initial/final probabilities
        forward = forward[1:]
        backward = backward[:-1]

        # combine and normalize
        posterior = [f * b for f, b in zip(forward, backward)]
        posterior = [p / tf.reduce_sum(p) for p in posterior]

        return posterior, forward, backward

    def _viterbi_partial_forward(self, scores):
        # first convert scores into shape [K, 1]
        # then concatenate K of them into shape [K, K]
        expanded_scores = tf.concat(
            1, [tf.expand_dims(scores, 1)] * self.K
        )
#         print(expanded_scores.eval(session = tf.Session()))
#         print()
#         print(self.logP)
#         print()
        return expanded_scores + self.logP

    def viterbi_decode(self, y, nT):
        """
        Runs viterbi decode on state probabilies y.

        Arguments
        ---------
        y : np.array : shape (T, K) where T is number of timesteps and
            K is the number of states
        nT : int : number of timesteps in y

        Returns
        -------
        (s, pathScores)
        s : list of length T of tensorflow ints : represents the most likely
            state at each time step.
        pathScores : list of length T of tensorflow tensor of length K
            each value at (t, k) is the log likliehood score in state k at
            time t.  sum(pathScores[t, :]) will not necessary == 1
        """

        # pathStates and pathScores wil be of type tf.Tensor.  They
        # are lists since tensorflow doesn't allow indexing, and the
        # list and order are only really necessary to build the unrolled
        # graph.  We never do any computation across all of time at once
        pathStates = []
        pathScores = []

        # initialize
        pathStates.append(None)
        pathScores.append(self.logp0 + np.log(y[0]))

        for t, yy in enumerate(y[1:]):
            # propagate forward
            tmpMat = self._viterbi_partial_forward(pathScores[t])
            print(tmpMat.eval(session = tf.Session()))
            # the inferred state
            pathStates.append(tf.argmax(tmpMat, 0))
            pathScores.append(tf.reduce_max(tmpMat, 0) + np.log(yy))

        # now backtrack viterbi to find states
        s = [0] * nT
        s[-1] = tf.argmax(pathScores[-1], 0)
        for t in range(nT - 1, 0, -1):
            s[t - 1] = tf.gather(pathStates[t], s[t])

        return s, pathScores

In [2]:
p0 = np.array([0.6, 0.4])

emi = np.array([[0.5, 0.1],
                [0.4, 0.3],
                [0.1, 0.6]])

trans = np.array([[0.7, 0.3],
                  [0.4, 0.6]])

states = {0:'Healthy', 1:'Fever'}
obs = {0:'normal', 1:'cold', 2:'dizzy'}

obs_seq = np.array([0, 1, 2])


In [3]:
def dptable(V):
    print(" ".join(("%10d" % i) for i in range(V.shape[0])))
    for i, y in enumerate(V.T):
        print("%.7s: " % states[i] +" ".join("%.7s" % ("%f" % yy) for yy in y))

In [4]:
tf_model = HMMTensorflow(trans, p0)

y = emi[obs_seq]
tf_s_graph, tf_scores_graph = tf_model.viterbi_decode(y, len(y))
tf_s = tf.Session().run(tf_s_graph)
print("Most likely States: ", [obs[s] for s in tf_s])

tf_scores = [tf_scores_graph[0]]
tf_scores.extend([tf.Session().run(g) for g in tf_scores_graph[1:]])
pathScores = np.array(np.exp(tf_scores))
dptable(pathScores)

[[-1.56064775 -2.40794561]
 [-4.13516656 -3.72970145]]
[[-2.83361342 -3.68091128]
 [-4.52820914 -4.12274404]]
Most likely States:  ['normal', 'normal', 'cold']
         0          1          2
Healthy: 0.30000 0.08400 0.00588
Fever: 0.04000 0.02700 0.01512


In [5]:
np_model = HMMNumpy(trans, p0)

y = emi[obs_seq]
np_states, np_scores = np_model.viterbi_decode(y)
print("Most likely States: ",[obs[s] for s in np_states])
pathScores = np.array(np.exp(np_scores))
dptable(pathScores)

Most likely States:  ['normal', 'normal', 'cold']
         0          1          2
Healthy: 0.30000 0.08400 0.00588
Fever: 0.04000 0.02700 0.01512


In [9]:
import tensorflow as tf
import numpy as np


class HMM(object):
    """
    A class for Hidden Markov Models.

    The model attributes are:
    - K :: the number of states
    - P :: the K by K transition matrix (from state i to state j,
        (i, j) in [1..K])
    - p0 :: the initial distribution (defaults to starting in state 0)
    """

    def __init__(self, P, p0=None):
        self.K = P.shape[0]

        self.P = P
        self.logP = np.log(self.P)

        if p0 is None:
            self.p0 = np.ones(self.K)
            self.p0 /= sum(self.p0)
        elif len(p0) != self.K:
            raise ValueError(
                'dimensions of p0 {} must match P[0] {}'.format(
                    p0.shape, P.shape[0]))
        else:
            self.p0 = p0
        self.logp0 = np.log(self.p0)


class HMMTensorflow(HMM):

    def forward_backward(self, y):
        """
        runs forward backward algorithm on state probabilities y

        Arguments
        ---------
        y : np.array : shape (T, K) where T is number of timesteps and
            K is the number of states

        Returns
        -------
        (posterior, forward, backward)
        posterior : list of length T of tensorflow graph nodes representing
            the posterior probability of each state at each time step
        forward : list of length T of tensorflow graph nodes representing
            the forward probability of each state at each time step
        backward : list of length T of tensorflow graph nodes representing
            the backward probability of each state at each time step
        """
        # set up
        nT = y.shape[0]

        posterior = np.zeros((nT, self.K))
        forward = []
        backward = np.zeros((nT + 1, self.K))

        # forward pass
        forward.append(
            tf.ones((1, self.K), dtype=tf.float64) * (1.0 / self.K)
        )
        for t in range(nT):
            # NOTE: np.matrix expands forward[t, :] into 2d and causes * to be
            # matrix multiplies instead of element wise that an array would be
            tmp = tf.mul(
                tf.matmul(forward[t], self.P),
                y[t]
            )

            forward.append(tmp / tf.reduce_sum(tmp))

        # backward pass
        backward = [None] * (nT + 1)
        backward[-1] = tf.ones((1, self.K), dtype=tf.float64) * (1.0 / self.K)
        for t in range(nT, 0, -1):
            tmp = tf.transpose(
                tf.matmul(
                    tf.matmul(self.P, tf.diag(y[t - 1])),
                    tf.transpose(backward[t])
                )
            )
            backward[t - 1] = tmp / tf.reduce_sum(tmp)

        # remove initial/final probabilities
        forward = forward[1:]
        backward = backward[:-1]

        # combine and normalize
        posterior = [f * b for f, b in zip(forward, backward)]
        posterior = [p / tf.reduce_sum(p) for p in posterior]

        return posterior, forward, backward

    def _viterbi_partial_forward(self, scores):
        # first convert scores into shape [K, 1]
        # then concatenate K of them into shape [K, K]
        expanded_scores = tf.concat(
            1, [tf.expand_dims(scores, 1)] * self.K
        )
        return expanded_scores + self.logP

    def viterbi_decode(self, y, nT):
        """
        Runs viterbi decode on state probabilies y.

        Arguments
        ---------
        y : np.array : shape (T, K) where T is number of timesteps and
            K is the number of states
        nT : int : number of timesteps in y

        Returns
        -------
        (s, pathScores)
        s : list of length T of tensorflow ints : represents the most likely
            state at each time step.
        pathScores : list of length T of tensorflow tensor of length K
            each value at (t, k) is the log likliehood score in state k at
            time t.  sum(pathScores[t, :]) will not necessary == 1
        """

        # pathStates and pathScores wil be of type tf.Tensor.  They
        # are lists since tensorflow doesn't allow indexing, and the
        # list and order are only really necessary to build the unrolled
        # graph.  We never do any computation across all of time at once
        pathStates = []
        pathScores = []

        # initialize
        pathStates.append(None)
        pathScores.append(self.logp0 + np.log(y[0]))

        for t, yy in enumerate(y[1:]):
            # propagate forward
            tmpMat = self._viterbi_partial_forward(pathScores[t])

            # the inferred state
            pathStates.append(tf.argmax(tmpMat, 0))
            pathScores.append(tf.reduce_max(tmpMat, 0) + np.log(yy))

        # now backtrack viterbi to find states
        s = [0] * nT
        s[-1] = tf.argmax(pathScores[-1], 0)
        for t in range(nT - 1, 0, -1):
            s[t - 1] = tf.gather(pathStates[t], s[t])

        return s, pathScores

In [3]:
p0 = np.array([0.5, 0.5])

emi = np.array([[0.9, 0.2],
                [0.1, 0.8]])

trans = np.array([[0.7, 0.3],
                  [0.3, 0.7]])

states = {0:'rain', 1:'no_rain'}
obs = {0:'umbrella', 1:'no_umbrella'}

obs_seq = np.array([0, 0, 1, 0, 0])

In [78]:
# Number of states
S = trans.shape[0]
# Emission probability
E = tf.constant(emi, name='emission_matrix')
# Transition matrix
T = tf.constant(trans, name='transition_matrix')
# Initial state vector
T0 = tf.constant(p0, name='inital_state_vector')
        
# length of observed sequence
N = len(obs_seq)

# shape path Variables
shape = [N, S]
shape_ext = [N+1, S]
# observed sequence
x = tf.constant(obs_seq, name='observation_sequence')

forward = tf.Variable(tf.zeros(shape_ext, dtype=tf.float64), name='forward')
backward = tf.Variable(tf.zeros(shape_ext, dtype=tf.float64), name='backward')
posteriror = tf.Variable(tf.zeros(shape, dtype=tf.float64), name='posteriror')

# forward pass
forward = tf.scatter_update(forward, 0, T0)

for t in range(N):
    # NOTE: np.matrix expands forward[t, :] into 2d and causes * to be
    # matrix multiplies instead of element wise that an array would be
    tmp = tf.mul(
        tf.matmul(forward[t], self.P),
        y[t]
    )

    forward.append(tmp / tf.reduce_sum(tmp))

# backward pass
backward = [None] * (nT + 1)
backward[-1] = tf.ones((1, self.K), dtype=tf.float64) * (1.0 / self.K)
for t in range(nT, 0, -1):
    tmp = tf.transpose(
        tf.matmul(
            tf.matmul(self.P, tf.diag(y[t - 1])),
            tf.transpose(backward[t])
        )
    )
    backward[t - 1] = tmp / tf.reduce_sum(tmp)

# remove initial/final probabilities
forward = forward[1:]
backward = backward[:-1]

# combine and normalize
posterior = tf.mul(forward, backward)
# [:,None] expands sum to be correct size
#posterior = posterior / np.sum(posterior, 1)[:, None]

#posterior = [p / tf.reduce_sum(p) for p in posterior]



ValueError: Shape (6, 2) must have rank 1

In [6]:
sess = tf.InteractiveSession()

In [13]:
# Number of states
S = trans.shape[0]
# Emission probability
E = tf.constant(emi, name='emission_matrix')
# Transition matrix
T = tf.constant(trans, name='transition_matrix')
# Initial state vector
T0 = tf.constant(p0, name='inital_state_vector')
        
# length of observed sequence
N = len(obs_seq)

# shape path Variables
shape = [N, S]
shape_ext = [N+1, S]
# observed sequence
x = tf.constant(obs_seq, dtype=tf.int32, name='observation_sequence')

forward = tf.Variable(tf.zeros(shape_ext, dtype=tf.float64), name='forward')
backward = tf.Variable(tf.zeros(shape_ext, dtype=tf.float64), name='backward')
posterior = tf.Variable(tf.zeros(shape, dtype=tf.float64), name='posteriror')

obs_prob_seq = tf.gather(E, x)

# forward pass
forward = tf.scatter_update(forward, 0, T0)

for step in range(N):
    prev_prob = tf.reshape(forward[step, :], [1, -1])
    prior_prob = tf.matmul(prev_prob, T)
    forward_score = tf.mul(prior_prob, tf.cast(obs_prob_seq[step, :], tf.float64))
    # Normalize score into a probability
    forward_prob = tf.reshape(forward_score / tf.reduce_sum(forward_score), [-1])
    # Update forward matrix
    forward = tf.scatter_update(forward, step + 1, forward_prob)

# backward pass
backward = tf.scatter_update(backward, N, tf.ones([S], dtype=tf.float64)) 

for step in range(N, 0, -1):
    next_prob = tf.reshape(backward[step, :], [-1, 1])
    obs_prob = tf.diag(obs_prob_seq[step - 1, :])
    prior_prob = tf.matmul(T, obs_prob)
    backward_score = tf.matmul(prior_prob, next_prob)
    backward_prob = tf.reshape(backward_score / tf.reduce_sum(backward_score), [-1])
    
    # Update backward matrix
    backward = tf.scatter_update(backward, step - 1, backward_prob)

forward = tf.slice(forward, [1,0], [5,2])
backward = tf.slice(backward, [0,0], [5,2])

# combine and normalize
posterior = tf.mul(forward, backward)
a= tf.reduce_sum(posterior, 1)
posterior = posterior / tf.reshape(a, [-1, 1])

In [15]:
sess.run(tf.initialize_all_variables())
obs_prob_seq.eval()

array([[ 0.9,  0.2],
       [ 0.9,  0.2],
       [ 0.1,  0.8],
       [ 0.9,  0.2],
       [ 0.9,  0.2]])

In [22]:
tf_model = HMMTensorflow(trans, p0)

y = emi[obs_seq]
p, f, b = tf_model.forward_backward(y)
[tf.Session().run(g) for g in p]

[array([[ 0.89183984,  0.10816016]]),
 array([[ 0.91668737,  0.08331263]]),
 array([[ 0.12443362,  0.87556638]]),
 array([[ 0.83650094,  0.16349906]]),
 array([[ 0.91668737,  0.08331263]])]

In [23]:
[tf.Session().run(g) for g in f]

[array([[ 0.81818182,  0.18181818]]),
 array([[ 0.88335704,  0.11664296]]),
 array([[ 0.19066794,  0.80933206]]),
 array([[ 0.730794,  0.269206]]),
 array([[ 0.86733889,  0.13266111]])]

In [24]:
[tf.Session().run(g) for g in b]

[array([[ 0.64693556,  0.35306444]]),
 array([[ 0.5923176,  0.4076824]]),
 array([[ 0.37626718,  0.62373282]]),
 array([[ 0.65334282,  0.34665718]]),
 array([[ 0.62727273,  0.37272727]])]

In [7]:
np_model = HMMNumpy(trans, p0)

y = emi[obs_seq]
results = np_model.forward_backward(y)
for pathScores in results:
    np_states = np.argmax(pathScores, axis=1)
    print("Most likely States: ",[obs[s] for s in np_states])
    print()
    dptable(pathScores)
    print()

Most likely States:  ['umbrella', 'umbrella', 'no_umbrella', 'umbrella', 'umbrella']

         0          1          2          3          4
rain: 0.89184 0.91668 0.12443 0.83650 0.91668
no_rain: 0.10816 0.08331 0.87556 0.16349 0.08331

Most likely States:  ['umbrella', 'umbrella', 'no_umbrella', 'umbrella', 'umbrella']

         0          1          2          3          4
rain: 0.81818 0.88335 0.19066 0.73079 0.86733
no_rain: 0.18181 0.11664 0.80933 0.26920 0.13266

Most likely States:  ['umbrella', 'umbrella', 'no_umbrella', 'umbrella', 'umbrella']

         0          1          2          3          4
rain: 0.64693 0.59231 0.37626 0.65334 0.62727
no_rain: 0.35306 0.40768 0.62373 0.34665 0.37272

