In [None]:
import numpy as np

# Define the states (POS tags)
states = ["Noun", "Verb", "Preposition", "Determiner"]

# Define the words (observations)
observations = ["Time", "flies", "like", "an", "arrow"]

# Transition probabilities (probability of POS tag following another)
transition_prob = {
    "Noun": {"Noun": 0.1, "Verb": 0.3, "Preposition": 0.1, "Determiner": 0.0},
    "Verb": {"Noun": 0.4, "Verb": 0.2, "Preposition": 0.5, "Determiner": 0.0},
    "Preposition": {"Noun": 0.7, "Verb": 0.0, "Preposition": 0.1, "Determiner": 0.2},
    "Determiner": {"Noun": 0.9, "Verb": 0.0, "Preposition": 0.0, "Determiner": 0.0},
}

# Emission probabilities (probability of word being a certain POS tag)
emission_prob = {
    "Noun": {"Time": 0.6, "flies": 0.2, "like": 0.1, "an": 0.0, "arrow": 0.9},
    "Verb": {"Time": 0.1, "flies": 0.8, "like": 0.2, "an": 0.0, "arrow": 0.1},
    "Preposition": {"Time": 0.0, "flies": 0.0, "like": 0.7, "an": 0.0, "arrow": 0.0},
    "Determiner": {"Time": 0.0, "flies": 0.0, "like": 0.0, "an": 1.0, "arrow": 0.0},
}

# Initial probabilities (probability of a POS tag being the first word)
initial_prob = {"Noun": 0.5, "Verb": 0.2, "Preposition": 0.1, "Determiner": 0.2}

# Viterbi Algorithm Implementation
def viterbi_algorithm(observations, states, initial_prob, transition_prob, emission_prob):
    n = len(observations)
    m = len(states)

    # Initialize the DP table and backpointer
    viterbi = np.zeros((m, n))
    backpointer = np.zeros((m, n), dtype=int)

    # Initialize the first column (starting probabilities)
    for i, state in enumerate(states):
        viterbi[i, 0] = initial_prob[state] * emission_prob[state].get(observations[0], 0)

    # Fill the DP table
    for t in range(1, n):
        for i, state in enumerate(states):
            max_prob, max_state = max(
                (viterbi[j, t - 1] * transition_prob[prev_state].get(state, 0) * emission_prob[state].get(observations[t], 0), j)
                for j, prev_state in enumerate(states)
            )
            viterbi[i, t] = max_prob
            backpointer[i, t] = max_state

    # Backtrace to find the best path
    best_last_state = np.argmax(viterbi[:, n - 1])
    best_path = [best_last_state]

    for t in range(n - 1, 0, -1):
        best_last_state = backpointer[best_last_state, t]
        best_path.insert(0, best_last_state)

    # Convert indices to state names
    best_path_states = [states[i] for i in best_path]

    return best_path_states

# Run the Viterbi Algorithm
best_tags = viterbi_algorithm(observations, states, initial_prob, transition_prob, emission_prob)

# Print the results
print("Sentence:", " ".join(observations))
print("POS Tags:", " ".join(best_tags))


Sentence: Time flies like an arrow
POS Tags: Noun Verb Preposition Determiner Noun


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Define the actual (true) POS tags from the dataset
true_tags = ["Noun", "Verb", "Preposition", "Determiner", "Noun"]  # Ground truth

# Predicted tags from the HMM model
predicted_tags = ["Noun", "Verb", "Preposition", "Determiner", "Noun"]

# Compute confusion matrix
labels = ["Noun", "Verb", "Preposition", "Determiner"]
cm = confusion_matrix(true_tags, predicted_tags, labels=labels)

# Print confusion matrix
print("Confusion Matrix:")
print(cm)

# Generate a classification report
print("\nClassification Report:")
print(classification_report(true_tags, predicted_tags, target_names=labels))


Confusion Matrix:
[[2 0 0 0]
 [0 1 0 0]
 [0 0 1 0]
 [0 0 0 1]]

Classification Report:
              precision    recall  f1-score   support

        Noun       1.00      1.00      1.00         1
        Verb       1.00      1.00      1.00         2
 Preposition       1.00      1.00      1.00         1
  Determiner       1.00      1.00      1.00         1

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5



In [None]:
def compute_accuracy(true_tags, predicted_tags):
    correct_predictions = sum(t1 == t2 for t1, t2 in zip(true_tags, predicted_tags))
    accuracy = correct_predictions / len(true_tags)
    return accuracy

# Compute accuracy
accuracy = compute_accuracy(true_tags, predicted_tags)

print(f"\nState Accuracy: {accuracy:.2%}")



State Accuracy: 100.00%
