# HMM POS Tagger: Final Evaluation & Analysis
This notebook presents the final results of the Hidden Markov Model POS Tagger, including comparisons against a baseline and detailed error analysis.

In [None]:
import sys
import os
sys.path.append(os.path.abspath('../src'))

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

from utils import load_pos_data, get_vocab_and_tags, download_nltk_data
from train_hmm import train_hmm
from evaluate import viterbi, evaluate_models, train_mft_baseline

In [None]:
# 1. Setup Data
download_nltk_data()
train_data, test_data = load_pos_data()
# Using min_freq=2 to make UNK emission probabilities more robust
vocab, tags = get_vocab_and_tags(train_data, min_freq=2)

print(f"Training sentences: {len(train_data)}")
print(f"Vocabulary size (min_freq=2): {len(vocab)}")

In [None]:
# 2. Train Models
hmm_model = train_hmm(train_data, vocab, tags)
mft_model = train_mft_baseline(train_data, tags)
print("Training complete.")

In [None]:
# 3. Run Quantitative Evaluation
results = evaluate_models(test_data[:300], hmm_model, mft_model)

metrics = {
    "Model": ["HMM (Viterbi)", "Baseline (MFT)"],
    "Accuracy": [results['hmm_accuracy'], results['mft_accuracy']]
}
display(pd.DataFrame(metrics))

In [None]:
# 4. Error Analysis: Confusion Matrix
plt.figure(figsize=(10, 8))
cm = results['confusion_matrix']
# Normalize confusion matrix to show percentages
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

sns.heatmap(cm_norm, annot=True, fmt=".2f", cmap="Blues", 
            xticklabels=results['tags'], yticklabels=results['tags'])
plt.title("Normalized HMM Confusion Matrix (True vs Predicted)")
plt.ylabel("True Tag")
plt.xlabel("Predicted Tag")
plt.show()

In [None]:
# 5. Top 10 Most Common Errors
errors = []
for i in range(len(results['tags'])):
    for j in range(len(results['tags'])):
        if i != j:
            errors.append((results['tags'][i], results['tags'][j], cm[i, j]))

errors = sorted(errors, key=lambda x: x[2], reverse=True)
print("Top 10 Tagging Errors (True -> Predicted):")
for true, pred, count in errors[:10]:
    print(f"{true:5} -> {pred:5} : {count} occurrences")

## Interactive Tagging
Run the cell below to tag your own sentences!

In [None]:
user_input = "The quick brown fox jumps over the lazy dog"
# user_input = input("Enter a sentence to tag: ") # Uncomment in traditional Jupyter environments

words = user_input.split()
preds = viterbi(words, hmm_model)

print(f"Sentence: {user_input}")
print("-"*30)
for w, t in zip(words, preds):
    print(f"{w:15} | {t}")