In [2]:
import numpy as np

class HMM:
    def __init__(self, states, emissions, start_probs, trans_probs, emit_probs):
        self.states = states
        self.emissions = emissions
        self.start_probs = start_probs
        self.trans_probs = trans_probs
        self.emit_probs = emit_probs

    def viterbi(self, obs):
        T = len(obs)
        N = len(self.states)

        # Initialize the V table
        V = np.zeros((T, N))
        V[0] = self.start_probs * self.emit_probs[:, self.emissions.index(obs[0])]

        # Iterate over the observations
        for t in range(1, T):
            for s in range(N):
                V[t, s] = np.max(V[t - 1] * self.trans_probs[:, s]) * self.emit_probs[s, self.emissions.index(obs[t])]

        # Backtrack to find the most likely state sequence
        path = np.zeros(T, dtype=int)
        path[T - 1] = np.argmax(V[T - 1])
        for t in range(T - 2, -1, -1):
            path[t] = np.argmax(V[t] * self.trans_probs[:, path[t + 1]])

        return path

    def tag(self, obs):
        path = self.viterbi(obs)
        return [self.states[s] for s in path]

# Example usage:

states = ["NN", "VB", "DT", "JJ"]
emissions = ["dog", "run", "the", "fast"]
start_probs = np.array([0.2, 0.4, 0.2, 0.2])
trans_probs = np.array([[0.5, 0.2, 0.2, 0.1],
                         [0.3, 0.5, 0.1, 0.1],
                         [0.1, 0.1, 0.6, 0.2],
                         [0.1, 0.1, 0.2, 0.6]])
emit_probs = np.array([[0.8, 0.1, 0.05, 0.05],
                        [0.05, 0.8, 0.05, 0.1],
                        [0.05, 0.05, 0.8, 0.1],
                        [0.05, 0.05, 0.1, 0.8]])

hmm = HMM(states, emissions, start_probs, trans_probs, emit_probs)

obs = ["the", "dog", "run", "fast"]
tags = hmm.tag(obs)

print(tags)


['DT', 'NN', 'VB', 'JJ']


In [None]:
This means that the model has tagged the words "the", "dog", "run", and "fast" as "DT" (determiner), 
"NN" (noun), "VB" (verb), and "JJ" (adjective), respectively.