In [1]:
import tensorflow as tf
import numpy as np
from __future__ import print_function

In [2]:
def dptable(state_prob):
    print(" ".join(("%10d" % i) for i in range(state_prob.shape[0])))
    for i, prob in enumerate(state_prob.T):
        print("%.7s: " % states[i] +" ".join("%.7s" % ("%f" % p) for p in prob))

In [3]:
class HiddenMarkovModel(object):

    """
    Hidden Markov Model Class

    Parameters:
    -----------
    
    - S: Number of states.
    - T: Transition matrix of size S by S
         stores probability from state i to state j.
    - E: Emission matrix of size S by N (number of observations)
         stores the probability of observing  O_j  from state  S_i. 
    - T0: Initial state probabilities of size S.
    """

    def __init__(self, T, E, T0):
        # Number of states
        self.S = tf.constant(T.shape[0], name='num_states')
        
        # Emission probability
        self.E = tf.constant(E, name='emission_matrix')

        # Transition matrix
        self.T = tf.constant(T,name='transition_matrix')

        # Initial state vector
        self.T0 = tf.constant(T0, name='inital_state_vector')
    
    def _forward(self, y):
        # forward pass
        # TensorFlow doesn't support indexing. List of Tensors will be used instead
        forward = []
        
        # initialize with state probabilities T0
        forward.append(tf.constant(self.T0, shape=(1, self.S), dtype=tf.float64))
        #forward.append(tf.ones((1, self.S), dtype=tf.float64) * self.T0)
        
        # forward belief propagation
        for step in range(y.shape[0]):

            forward_score = tf.mul(tf.matmul(forward[step], self.T), y[step])
            
            # normalize scores into probabilities
            forward.append(forward_score / tf.reduce_sum(forward_score))

        return forward[1:] # remove initial value
        

    def _backward(self, y):
        
        N = y.shape[0]
        backward = np.zeros((nT + 1, self.K))

        # backward pass
        backward = [None] * (nT + 1)
        backward[-1] = tf.ones((1, self.K), dtype=tf.float64) * (1.0 / self.K)
        for t in range(nT, 0, -1):
            backward_score = tf.transpose(
                tf.matmul(
                    tf.matmul(self.P, tf.diag(y[t - 1])),
                    tf.transpose(backward[t])
                )
            )
            backward[t - 1] = tmp / tf.reduce_sum(tmp)      
        
    def forward_backward(self, y):
        """
        runs forward backward algorithm on state probabilities y

        Arguments
        ---------
        y : np.array : shape (T, K) where T is number of timesteps and
            K is the number of states

        Returns
        -------
        (posterior, forward, backward)
        posterior : list of length T of tensorflow graph nodes representing
            the posterior probability of each state at each time step
        forward : list of length T of tensorflow graph nodes representing
            the forward probability of each state at each time step
        backward : list of length T of tensorflow graph nodes representing
            the backward probability of each state at each time step
        """
        # set up
        nT = y.shape[0]

        posterior = np.zeros((nT, self.K))
        


        # remove initial/final probabilities
        forward = forward[1:]
        backward = backward[:-1]

        # combine and normalize
        posterior = [f * b for f, b in zip(forward, backward)]
        posterior = [p / tf.reduce_sum(p) for p in posterior]

        return posterior, forward, backward

    def _viterbi_partial_forward(self, scores):
        # first convert scores into shape [K, 1]
        # then concatenate K of them into shape [K, K]
        
        scores_reshape = tf.reshape(scores, (-1,1))
        return tf.add(scores_reshape, tf.log(self.T))


    def viterbi_decode(self, obs_seq):

        pathStates = []
        pathScores = []
        
        N = len(obs_seq)
        x = tf.constant(obs_seq, name='observation_sequence')
        
        obs_prob_seq = tf.log(tf.gather(self.E, x))
        obs_prob_list = tf.split(0, N, obs_prob_seq)

        # initialize
        pathStates.append(None)
        
        pathScores.append(tf.log(self.T0) + tf.squeeze(obs_prob_list[0]))
            
        
        for step, obs_prob in enumerate(obs_prob_list[1:]):
            # propagate forward
            tmpMat = self._viterbi_partial_forward(pathScores[step])

            # the inferred state
            pathStates.append(tf.argmax(tmpMat, 0))
            pathScores.append(tf.reduce_max(tmpMat, 0) + tf.squeeze(obs_prob))

        # now backtrack viterbi to find states
        s = [0] * N
        s[-1] = tf.argmax(pathScores[-1], 0)
        for step in range(N - 1, 0, -1):
            s[step - 1] = tf.gather(pathStates[step], s[step])

        return s, pathScores
    
    def run_viterbi_decode(self, obs_seq):
        with tf.Session() as sess:
            
            state_graph, state_prob = self.viterbi_decode(obs_seq)
            states_seq = sess.run(state_graph)
            state_prob = [sess.run(tf.exp(g)) for g in state_prob]

        return states_seq, np.array(state_prob)

In [297]:
class HiddenMarkovModel(object):

    """
    Hidden Markov Model Class

    Parameters:
    -----------
    
    - S: Number of states.
    - T: Transition matrix of size S by S
         stores probability from state i to state j.
    - E: Emission matrix of size S by N (number of observations)
         stores the probability of observing  O_j  from state  S_i. 
    - T0: Initial state probabilities of size S.
    """

    def __init__(self, T, E, T0):
        # Number of states
        self.S = tf.constant(T.shape[0], name='num_states')
        
        # Emission probability
        self.E = tf.constant(E, name='emission_matrix')

        # Transition matrix
        self.T = tf.constant(T,name='transition_matrix')

        # Initial state vector
        self.T0 = tf.constant(T0, name='inital_state_vector')
    
    def _viterbi_partial_forward(self, scores):
        # first convert scores into shape [K, 1]
        # then concatenate K of them into shape [K, K]
        
        scores_reshape = tf.reshape(scores, (-1,1))
        return tf.add(scores_reshape, tf.log(self.T))


    def viterbi_decode(self, obs_seq):

        N = len(obs_seq)
        shape = tf.pack([N, self.S])
        pathStates = tf.Variable(tf.zeros(shape, dtype=tf.int64), validate_shape=False, name='States')
        pathScores = tf.Variable(tf.zeros(shape, dtype=tf.float64),  validate_shape=False, name='Probabilities')        
        
        x = tf.constant(obs_seq, name='observation_sequence')
        
        obs_prob_seq = tf.log(tf.gather(self.E, x))
        obs_prob_list = tf.split(0, N, obs_prob_seq)

        tf.scatter_update(pathScores, 0, tf.log(self.T0) + tf.squeeze(obs_prob_list[0]))
            
        
        for step, obs_prob in enumerate(obs_prob_list[1:]):
            # propagate forward
            
            tmpMat = self._viterbi_partial_forward(tf.gather(pathScores, step))

            # the inferred state
            tf.scatter_update(pathStates, step + 1, tf.argmax(tmpMat, 0))
            tf.scatter_update(pathScores, step + 1, tf.reduce_max(tmpMat, 0) + tf.squeeze(obs_prob))

        # now backtrack viterbi to find states
        
        states_seq = tf.Variable(tf.zeros([N], dtype=tf.int64))
        print(tf.argmax(tf.gather(pathScores, -1), 0).get_shape())
        tf.scatter_update(states_seq, -1, tf.argmax(tf.gather(pathScores, -1), 0))
        
        for step in range(N - 1, 0, -1):
            
            state = tf.gather(states_seq, step)
            idx = tf.reshape(tf.pack([step, state]), (1,2))
            state_prob = tf.gather_nd(pathStates, idx)
            print(state_prob.get_shape())
            print(states_seq.get_shape())
            print(tf.gather(states_seq, step - 1).get_shape())
#             state_prob.set_shape((1,-1))
            tf.scatter_update(states_seq, step - 1,  state_prob)

        return states_seq, pathScores
    
    def run_viterbi_decode(self, obs_seq):
        with tf.Session() as sess:
            init_op = tf.initialize_all_variables()
            state_graph, state_prob_graph = self.viterbi_decode(obs_seq)
            _, states_seq, state_prob = sess.run([init_op, state_graph, state_prob_graph])
#             state_prob = [sess.run(tf.exp(g)) for g in state_prob_graph]

        return states_seq, np.array(state_prob)

In [298]:
p0 = np.array([0.6, 0.4])

emi = np.array([[0.5, 0.1],
                [0.4, 0.3],
                [0.1, 0.6]])

trans = np.array([[0.7, 0.3],
                  [0.4, 0.6]])

states = {0:'Healthy', 1:'Fever'}
obs = {0:'normal', 1:'cold', 2:'dizzy'}

obs_seq = np.array([0, 1, 2])

In [299]:
# flags = tf.app.flags
# FLAGS = flags.FLAGS
# flags.DEFINE_string('summaries_dir', 'logs', 'Summaries directory')

In [300]:
model =  HiddenMarkovModel(trans, emi, p0)
states_seq, state_prob = model.run_viterbi_decode(obs_seq)
# dptable(state_prob)
print(states_seq)
print(state_prob)
# print("Most likely States: ", [obs[s] for s in states_seq])

<unknown>
(1,)
(3,)
()


ValueError: Shapes (1,) and () are not compatible

In [219]:
a = tf.constant([[1,2],[2,3]])


In [80]:
s = tf.gather_nd(a,[[0, 0]])
s.eval(session=tf.Session())

array([1], dtype=int32)

In [96]:
tf.slice(a, [1, 1], [0, 1]).eval(session=tf.Session())

array([], shape=(0, 1), dtype=int32)

In [141]:
np.array([[2,3]]).shape

(1, 2)

In [161]:
a.set_shape()

TensorShape([Dimension(2), Dimension(2)])