In [1]:
from collections import defaultdict
import math

In [None]:
# Training Data
train_sentences = [
    "The_DET cat_NOUN sleeps_VERB",
    "A_DET dog_NOUN barks_VERB",
    "The_DET dog_NOUN sleeps_VERB",
    "My_DET dog_NOUN runs_VERB fast_ADV",
    "A_DET cat_NOUN meows_VERB loudly_ADV",
    "Your_DET cat_NOUN runs_VERB",
    "The_DET bird_NOUN sings_VERB sweetly_ADV",
    "A_DET bird_NOUN chirps_VERB"
]

In [None]:
# Count Transitions and Emissions
transition_counts = defaultdict(lambda: defaultdict(int))
emission_counts = defaultdict(lambda: defaultdict(int))
tag_counts = defaultdict(int)

for sentence in train_sentences:
    previous_tag = 'START'
    for wt in sentence.split():
        word, tag = wt.rsplit('_', 1)
        word = word.lower()

        # Count transitions
        transition_counts[previous_tag][tag] += 1
        previous_tag = tag

        # Count emissions
        emission_counts[tag][word] += 1
        tag_counts[tag] += 1

    # Add END transition
    transition_counts[previous_tag]['END'] += 1

In [None]:

# Compute Probabilities
def compute_log_probs(counts):
    probs = defaultdict(dict)
    for outer in counts:
        total = sum(counts[outer].values())
        for inner in counts[outer]:
            prob = counts[outer][inner] / total
            probs[outer][inner] = math.log(prob)
    return probs

transition_probs = compute_log_probs(transition_counts)
emission_probs = compute_log_probs(emission_counts)

In [None]:
# Show Probabilities
print("=== Transition Log Probabilities ===")
for prev_tag in transition_probs:
    for curr_tag in transition_probs[prev_tag]:
        print(f"log P({curr_tag} | {prev_tag}) = {transition_probs[prev_tag][curr_tag]:.3f}")

print("\n=== Emission Log Probabilities ===")
for tag in emission_probs:
    for word in emission_probs[tag]:
        print(f"log P({word} | {tag}) = {emission_probs[tag][word]:.3f}")

=== Transition Log Probabilities ===
log P(DET | START) = 0.000
log P(NOUN | DET) = 0.000
log P(VERB | NOUN) = 0.000
log P(END | VERB) = -0.470
log P(ADV | VERB) = -0.981
log P(END | ADV) = 0.000

=== Emission Log Probabilities ===
log P(the | DET) = -0.981
log P(a | DET) = -0.981
log P(my | DET) = -2.079
log P(your | DET) = -2.079
log P(cat | NOUN) = -0.981
log P(dog | NOUN) = -0.981
log P(bird | NOUN) = -1.386
log P(sleeps | VERB) = -1.386
log P(barks | VERB) = -2.079
log P(runs | VERB) = -1.386
log P(meows | VERB) = -2.079
log P(sings | VERB) = -2.079
log P(chirps | VERB) = -2.079
log P(fast | ADV) = -1.099
log P(loudly | ADV) = -1.099
log P(sweetly | ADV) = -1.099
