In [1]:
from __future__ import print_function
import tensorflow as tf
import numpy as np

In [61]:
class HiddenMarkovModel(object):

    """
    Hidden Markov Model Class

    Parameters:
    -----------
    
    - S: Number of states.
    - T: Transition matrix of size S by S
         stores probability from state i to state j.
    - E: Emission matrix of size S by N (number of observations)
         stores the probability of observing  O_j  from state  S_i. 
    - T0: Initial state probabilities of size S.
    """

    def __init__(self, T, E, T0):
        # Number of possible states
        self.S = T.shape[0]
        
        # Number of possible observations
        self.O = E.shape[0]
        
        # Emission probability
        self.E = tf.Variable(E, dtype=tf.float64, name='emission_matrix')

        # Transition matrix
        self.T = tf.Variable(T, dtype=tf.float64, name='transition_matrix')

        # Initial state vector
        self.T0 = tf.constant(T0, dtype=tf.float64, name='inital_state_vector')
    
    def initialize_variables(self, shape):
        self.forward = tf.Variable(tf.zeros(shape, dtype=tf.float64), name='forward')
        self.backward = tf.Variable(tf.zeros(shape, dtype=tf.float64), name='backward')
        self.posterior = tf.Variable(tf.zeros(shape, dtype=tf.float64), name='posteriror')


    def _forward(self, obs_prob_list):
        
        self.scale = tf.Variable(tf.zeros([self.N], tf.float64)) #scale factors
        
        # initialize with state starting priors
        init_prob = tf.mul(self.T0, tf.squeeze(obs_prob_list[0]))
        
        # scaling factor at t=0
        self.scale = tf.scatter_update(self.scale, 0, 1.0 / tf.reduce_sum(init_prob))

        # scaled belief at t=0
        self.forward = tf.scatter_update(self.forward, 0, self.scale[0] * init_prob)

        # propagate belief
        for step, obs_prob in enumerate(obs_prob_list[1:]):
            # previous state probability
            prev_prob = tf.expand_dims(self.forward[step, :], 0)
            # transition prior
            prior_prob = tf.matmul(prev_prob, self.T)
            # forward belief propagation
            forward_score = tf.mul(prior_prob, tf.squeeze(obs_prob))

            forward_prob = tf.squeeze(forward_score)
            # scaling factor
            self.scale = tf.scatter_update(self.scale, step+1, 1.0 / tf.reduce_sum(forward_prob))
            # Update forward matrix
            self.forward = tf.scatter_update(self.forward, step+1, self.scale[step+1] * forward_prob)
        

    def _backward(self, obs_prob_list):
        # initialize with state ending priors
        self.backward = tf.scatter_update(self.backward, 0, self.scale[self.N-1] * tf.ones([self.S], dtype=tf.float64)) 

        # propagate belief
        for step, obs_prob in enumerate(obs_prob_list[:-1]):
            # next state probability
            next_prob = tf.expand_dims(self.backward[step, :], 1)
            # observation emission probabilities
            obs_prob_d = tf.diag(tf.squeeze(obs_prob))
            # transition prior
            prior_prob = tf.matmul(self.T, obs_prob_d)
            # backward belief propagation
            backward_score = tf.matmul(prior_prob, next_prob)

            backward_prob = tf.squeeze(backward_score)

            # Update backward matrix
            self.backward = tf.scatter_update(self.backward, step+1, self.scale[self.N-2-step] * backward_prob)
        
        self.backward = tf.reverse(self.backward, [True, False])
        # remove final probability
        #self.backward = tf.slice(self.backward, [0,0], [self.N, self.S])
        
    def _posterior(self):
        # posterior score
        self.posterior = tf.mul(self.forward, self.backward)
        
        # marginal per timestep
        #marginal = tf.reduce_sum(self.forward[self.N-1, :], 0)
        marginal = tf.reduce_sum(self.posterior, 1)
#         init_prob = tf.mul(self.T0, tf.squeeze(obs_prob_list[0])) #2x1
        
        # Normalize porsterior into probabilities
        self.posterior = self.posterior / tf.expand_dims(marginal, 1)       
        
        
    def re_estimate_emission(self, x):
        
        states_marginal = tf.reduce_sum(self.posterior, 0)
        seq_one_hot = tf.one_hot(tf.cast(x, tf.int64), self.O, 1, 0)
        emission_score = tf.matmul(tf.cast(seq_one_hot, tf.float64), self.posterior, transpose_a=True)
        return emission_score / states_marginal
    
    def re_estimate_transition(self, x):
        
        self.M = tf.Variable(tf.zeros((self.N-1, self.S, self.S), tf.float64))
        
        for t in range(self.N - 1):
            tmp_0 = tf.matmul(tf.expand_dims(self.forward[t, :], 0), self.T)
            tmp_1 = tf.mul(tmp_0, tf.expand_dims(tf.gather(self.E, x[t+1]), 0))
            denom = tf.squeeze(tf.matmul(tmp_1, tf.expand_dims(self.backward[t+1, :], 1)))

            trans_re_estimate = tf.Variable(tf.zeros((self.S, self.S), tf.float64))
            for i in range(self.S):
                numer = self.forward[t, i] * self.T[:, i] * tf.gather(self.E, x[t+1]) * self.backward[t+1, :]
                trans_re_estimate = tf.scatter_update(trans_re_estimate, i, numer / denom)

            self.M = tf.scatter_update(self.M, t, trans_re_estimate)
#             return tf.reduce_sum(self.M, 0) / tf.expand_dims(tf.reduce_sum(self.posterior, 0), 0)
        
    def forward_backward(self, obs_seq):
        """
        runs forward backward algorithm on observation sequence

        Arguments
        ---------
        - obs_seq : matrix of size N by S, where N is number of timesteps and
            S is the number of states

        Returns
        -------
        - forward : matrix of size N by S representing
            the forward probability of each state at each time step
        - backward : matrix of size N by S representing
            the backward probability of each state at each time step
        - posterior : matrix of size N by S representing
            the posterior probability of each state at each time step
        """

        # length of observed sequence
        self.N = len(obs_seq)

        # shape of Variables
        shape = [self.N, self.S]
        
        # observed sequence
        x = tf.constant(obs_seq, dtype=tf.int32, name='observation_sequence')
        
        # initialize variables
        self.initialize_variables(shape)
        
        # probability of emission sequence
        obs_prob_seq = tf.gather(self.E, x)
        
        obs_prob_list_for = tf.split(0, self.N, obs_prob_seq)
        
        # forward belief propagation
        self._forward(obs_prob_list_for)
        
        obs_prob_seq_rev = tf.reverse(obs_prob_seq, [True, False])
        obs_prob_list_back = tf.split(0, self.N, obs_prob_seq_rev)
        
        # backward belief propagation
        self._backward(obs_prob_list_back)

        # apply smoothing
#         self._posterior()
        
#         new_emission = self.re_estimate_emission(x)
#         new_transition = self.re_estimate_transition(x)
        self.re_estimate_transition(x)

#         self.E = new_emission
#         self.T = new_transition
        return self.forward, self.backward, self.M
    
    def run_forward_backward(self, obs_seq):
        with tf.Session() as sess:
            
            forward, backward, M = self.forward_backward(obs_seq)
            sess.run(tf.initialize_all_variables())
            return sess.run([forward, backward, M])

In [62]:
True_pi = np.array([0.3, 0.7])

True_T = np.array([[0.15, 0.85],
                  [0.88, 0.12]])

True_E = np.array([[0.6, 0.4],
                   [0.2, 0.3],
                   [0.2, 1.3]])

obs_seq = [ 0,  2,  2,  1]

In [63]:
model =  HiddenMarkovModel(True_T, True_E, True_pi)

results = model.run_forward_backward(obs_seq)
for i in results:
    print(i)
    print()

[[ 0.39130435  0.60869565]
 [ 0.18394671  0.81605329]
 [ 0.31090456  0.68909544]
 [ 0.55649826  0.44350174]]

[[ 2.96575316  1.66487297]
 [ 3.33736818  1.1440049 ]
 [ 2.53140162  1.88300752]
 [ 4.26083044  4.26083044]]

[[[ 0.0391778   0.51211628]
  [ 0.34534506  0.10863073]]

 [[ 0.01396929  0.39625076]
  [ 0.35117896  0.23971498]]

 [[ 0.03974135  0.34972386]
  [ 0.4991402   0.10570028]]]



In [47]:
sess = tf.InteractiveSession()

Exception AssertionError: AssertionError() in <bound method InteractiveSession.__del__ of <tensorflow.python.client.session.InteractiveSession object at 0x10e65fa50>> ignored


In [48]:
sess.run(tf.initialize_all_variables())
model.scale.eval()

array([ 2.17391304,  1.54746686,  2.0845962 ,  4.26083044])

In [None]:
[ 2.17391304,  1.54746686,  2.0845962 ,  4.26083044])