<a href="https://colab.research.google.com/github/Gousepasha789/Natural-Language-Processing/blob/main/POS_using_HMM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

# States (POS tags)
states = ('Noun', 'Verb', 'Adjective')

# Observations (words)
observations = ('dog', 'barks', 'loud')

# Start probabilities
start_prob = {'Noun': 0.5, 'Verb': 0.3, 'Adjective': 0.2}

# Transition probabilities
trans_prob = {
    'Noun': {'Noun': 0.1, 'Verb': 0.6, 'Adjective': 0.3},
    'Verb': {'Noun': 0.4, 'Verb': 0.2, 'Adjective': 0.4},
    'Adjective': {'Noun': 0.5, 'Verb': 0.3, 'Adjective': 0.2},
}

# Emission probabilities
emit_prob = {
    'Noun': {'dog': 0.6, 'barks': 0.1, 'loud': 0.3},
    'Verb': {'dog': 0.2, 'barks': 0.7, 'loud': 0.1},
    'Adjective': {'dog': 0.1, 'barks': 0.2, 'loud': 0.7},
}

def viterbi(obs, states, start_p, trans_p, emit_p):
    V = [{}]
    path = {}

    # Initialize base case
    for state in states:
        V[0][state] = start_p[state] * emit_p[state].get(obs[0], 0)
        path[state] = [state]

    # Run Viterbi for t > 0
    for t in range(1, len(obs)):
        V.append({})
        new_path = {}

        for curr_state in states:
            (prob, prev_state) = max(
                (V[t-1][prev_state] * trans_p[prev_state].get(curr_state, 0) * emit_p[curr_state].get(obs[t], 0), prev_state)
                for prev_state in states
            )
            V[t][curr_state] = prob
            new_path[curr_state] = path[prev_state] + [curr_state]

        path = new_path

    # Find the best final state
    (prob, best_final_state) = max((V[-1][state], state) for state in states)
    return path[best_final_state], prob

# Test the Viterbi algorithm
best_sequence, best_prob = viterbi(observations, states, start_prob, trans_prob, emit_prob)
print("Best sequence of POS tags:", best_sequence)
print("Probability of the sequence:", best_prob)


Best sequence of POS tags: ['Noun', 'Verb', 'Adjective']
Probability of the sequence: 0.03528
