In [None]:
import numpy as np

def viterbi_decode(sentence, emission_matrix, transition_matrix):
    # Define states and words
    states = list(emission_matrix.keys())
    words = sentence.split()

    # Initialize viterbi matrix
    num_states = len(states)
    num_words = len(words)
    viterbi = np.zeros((num_states, num_words))
    backpointer = np.zeros((num_states, num_words), dtype=int)

    # Initialize first column of viterbi matrix
    for state in range(num_states):
        word = words[0]
        emission_prob = emission_matrix[states[state]].get(word, emission_matrix[states[state]].get('<UNK>', 0))
        transition_prob = transition_matrix['<s>'].get(states[state], 0)
        viterbi[state, 0] = emission_prob * transition_prob

    # Recursion step
    for t in range(1, num_words):
        for state in range(num_states):
            max_prob = 0
            best_prev_state = 0
            for prev_state in range(num_states):
                word = words[t]
                emission_prob = emission_matrix[states[state]].get(word, emission_matrix[states[state]].get('<UNK>', 0))
                transition_prob = transition_matrix[states[prev_state]].get(states[state], 0)
                prob = viterbi[prev_state, t-1] * transition_prob * emission_prob
                if prob > max_prob:
                    max_prob = prob
                    best_prev_state = prev_state
            viterbi[state, t] = max_prob
            backpointer[state, t] = best_prev_state

    # Backtrack to find the best sequence of tags
    best_seq = []
    best_last_state = np.argmax(viterbi[:, num_words-1])
    best_seq.append(best_last_state)
    for t in range(num_words-1, 0, -1):
        best_last_state = backpointer[best_last_state, t]
        best_seq.insert(0, best_last_state)

    return best_seq

# Example usage
emission_matrix = {
    'Noun': {'the': 0.7, 'cat': 0.2, 'jumps': 0.1},
    'Verb': {'the': 0.1, 'cat': 0.7, 'jumps': 0.2}
}

transition_matrix = {
    '<s>': {'Noun': 0.4, 'Verb': 0.6},
    'Noun': {'Noun': 0.2, 'Verb': 0.8},
    'Verb': {'Noun': 0.5, 'Verb': 0.5}
}

sentence = "the cat jumps"
tag_sequence_indices = viterbi_decode(sentence, emission_matrix, transition_matrix)
print("POS tag sequence indices:", tag_sequence_indices)


POS tag sequence indices: [0, 1, 1]
