In [1]:
import numpy as np

In [2]:
def load_index(filename):
    index = 0
    word_to_index, index_to_word = {}, {}
    with open(filename) as f:
        for line in f.readlines():
            index += 1 
            word = line.strip('\n')
            word_to_index[word] = index
            index_to_word[index] = word
            
    return word_to_index, index_to_word

In [3]:
def load_hmm_prob(filename):
    hmm_prob = []
    with open(filename) as f:
        lines = f.readlines()
    
    for line in lines:
        line = line.strip(' \n')
        nums = line.split(' ')
        hmm_prob.append([float(num) for num in nums])
    return np.array(hmm_prob)

In [4]:
test_input = 'testwords.txt'
index_to_word_file = 'index_to_word.txt'
index_to_tag_file = 'index_to_tag.txt'
hmmprior = 'hmmprior.txt'
hmmemit = 'hmmemit.txt'
hmmtrans = 'hmmtrans.txt'
predicted_file = 'predicted.txt'
metric_file = 'metrics.txt'

In [5]:
content = []
with open(test_input) as f:
    for line in f.readlines():
        content.append(line.strip('\n'))
        
word_to_index,index_to_word = load_index(index_to_word_file)
tag_to_index, index_to_tag = load_index(index_to_tag_file)

In [10]:
num_states = len(index_to_tag)
num_words = len(index_to_word)

In [11]:
prior = []
with open(hmmprior) as f:
    for _ in range(num_states):
        line = f.readline()
        num = line.strip('\n')
        prior.append( float(num))
prior = np.array(prior).reshape((num_states, 1))

In [12]:
A = load_hmm_prob(hmmtrans)
B = load_hmm_prob(hmmemit)

In [14]:
def word_tag_separation( line ):
    word_to_tag = line.split(' ')
    words, tags = [], []
    for combination in word_to_tag:
        word, tag = combination.split('_')
        words.append(word)
        tags.append(tag)
    return words, tags
    

In [105]:
words, tags = word_tag_separation( content[0] )

In [106]:
alpha, pt = forward( words, word_to_index, prior, A, B, num_states)
a = predict(alpha,pt,index_to_tag)

In [107]:
alpha[:][-1].reshape((11,-1))

array([[-11.24557695],
       [-17.57360755],
       [-22.0927526 ],
       [-26.61189765],
       [-31.13104269],
       [-43.33680908],
       [-55.54257546],
       [-65.92880062],
       [-66.26720109],
       [-70.78634613],
       [-80.59421724]])

In [90]:
def forward(words,word_to_index, prior, A, B,num_states):
    nt = len(words)
    alpha = np.zeros( (num_states, nt) )    # alpha table from forward propagation
    pt = np.zeros( (num_states, nt) )
    
    word = words[0]
    for i in range(num_states):
    
        alpha[i,0] = np.log( prior[i]) + np.log( B[i, word_to_index[word]] )
        pt[i,0] = i+1
        
    for t in range(1,nt):
        word = words[t]
    
        for j in range(num_states):
            lw = np.log( B[j, word_to_index[word]] ) + \
            np.log( A[:, j].reshape((num_states, -1)) ) + \
            alpha[:,t-1].reshape( (num_states,-1))
        
            alpha[j,t] = max(lw)
            pt[j,t] = np.argmax(lw) + 1
            
    return alpha, pt

def predict(alpha,pt, index_to_tag):
    predicted_tag = []
    index = np.argmax( alpha[:,-1] )
    predicted_tag.append( index_to_tag[index + 1] )
    
    for t in range( len(alpha[0]) - 1, 0, -1):
        index = pt[int(index) - 1, t]
        predicted_tag.append( index_to_tag[index])
    
    return list(reversed(predicted_tag) )   
    

In [104]:
def forward(words,word_to_index, prior, A, B,num_states):
    nt = len(words)
    alpha = np.zeros( (num_states, nt) )    # alpha table from forward propagation
    pt = np.zeros( (num_states, nt) )
    
    word = words[0]
    for i in range(num_states):
    
        alpha[i,0] = np.log( prior[i]) + np.log( B[i, word_to_index[word]] )
        pt[i,0] = i+1
        
    for t in range(1,nt):
        word = words[t]
    
        for j in range(num_states):
            lw = []
            for k in range(num_states):
                log_likelihood = np.log( B[j][ word_to_index[word] ]) + \
                np.log( A[k][j]) + alpha[k][t-1]
                lw.append( log_likelihood )
            
            alpha[j][t] = max(lw)
            pt[j][t] = lw.index( max(lw) ) + 1
    return alpha, pt

def predict(alpha,pt, index_to_tag):
    predicted_tag = []
    index = np.argmax( alpha[:,-1] )
    predicted_tag.append( index_to_tag[index + 1] )
    
    for t in range( len(alpha[0]) - 1, 0, -1):
        index = pt[int(index) - 1, t]
        predicted_tag.append( index_to_tag[index])
    
    return list(reversed(predicted_tag) )   
    

In [21]:
nt = len(words)
alpha = np.zeros( (num_states, nt) )    # alpha table from forward propagation
pt = np.zeros( (num_states, nt) )
for i in range(num_states):
    word = words[0]
    alpha[i,0] = np.log( prior[i]*B[i, word_to_index[word]] )
    pt[i,0] = i+1

for t in range(1,nt):
    word = words[t]
    
    for j in range(num_states):
        lw = np.log( B[j, word_to_index[word]] ) + \
        np.log( A[:, j].reshape((num_states, -1)) ) + \
        alpha[:,t-1].reshape( (num_states,-1))
        
        alpha[j,t] = max(lw)
        pt[j,t] = np.argmax(lw) + 1
        
        

In [76]:
def forward(words,word_to_index, prior, A, B,num_states):
    nt = len(words)
    alpha = np.zeros( (num_states, nt) )    # alpha table from forward propagation
    pt = np.zeros( (num_states, nt) )
    
    for i in range(num_states):
        word = words[0]
        alpha[i,0] = np.log( prior[i]*B[i, word_to_index[word]] )
        pt[i,0] = i+1
        
    for t in range(1,nt):
        word = words[t]
    
        for j in range(num_states):
            lw = np.log( B[j, word_to_index[word]] ) + \
            np.log( A[:, j].reshape((num_states, -1)) ) + \
            alpha[:,t-1].reshape( (num_states,-1))
        
            alpha[j,t] = max(lw)
            pt[j,t] = np.argmax(lw) + 1
            
    return alpha, pt
    

In [74]:
def predict(alpha,pt, index_to_tag):
    predicted_tag = []
    index = np.argmax( alpha[:,-1] )
    predicted_tag.append( index_to_tag[index + 1] )
    
    for t in range( len(alpha[0]) - 1, 0, -1):
        index = pt[int(index) - 1, t]
        predicted_tag.append( index_to_tag[index])
    
    return list(reversed(predicted_tag) )   
    