In [1]:
import nltk
from nltk.corpus import treebank
from nltk.tag import HiddenMarkovModelTagger
from sklearn.model_selection import train_test_split
import time

In [2]:
#Importing the corpus, simplifying tags and preprocessing the data
tagged_sentences = treebank.tagged_sents()[:1000]

def simplify_tag(tag):
    if tag.startswith('N'): return "NOUN"
    elif tag.startswith('V'): return "VERB"
    elif tag.startswith('J'): return "ADJ"
    else: return "OTHER"

def preprocess_sentence(tagged_sentence):
    words, tags = zip(*tagged_sentence)
    words = [word.lower() for word in words]
    tags = [simplify_tag(tag) for tag in tags]
    return list(zip(words, tags))

In [3]:
#Applying the preprocessing and splitting the dataset
tagged_sentences = [preprocess_sentence(sentence) for sentence in tagged_sentences]

train_data, test_data = train_test_split(tagged_sentences, test_size=0.2, random_state=42)


In [4]:
#Solving the HMM problem
start_time = time.time()
hmm_tagger = HiddenMarkovModelTagger.train(train_data)
end_time = time.time()
print(f'Training time: {end_time - start_time} sec')


Training time: 0.03914356231689453 sec


In [5]:
#Accuracy Calculation
start_time = time.time()
accuracy = hmm_tagger.evaluate(test_data)
end_time = time.time()
print(f'Testing time: {end_time - start_time} sec')
print('Accuracy: ', accuracy)


Testing time: 0.19546222686767578 sec
Accuracy:  0.8950136755733221


  Function evaluate() has been deprecated.  Use accuracy(gold)
  instead.
  accuracy = hmm_tagger.evaluate(test_data)


In [6]:
#Printing example sentences
for i in range(3): 
    print("\nExample", i+1)
    test_sentence, test_tags = zip(*test_data[i])
    predicted_tags = hmm_tagger.tag(test_sentence)

    print("Sentence:", " ".join(test_sentence))
    print("True labels:", " ".join(test_tags))
    print("Predicted labels:", " ".join(tag for word, tag in predicted_tags))



Example 1
Sentence: *-2 filmed *-1 in lovely black and white by bill dill , the new york streets of `` sidewalk stories '' seem benign .
True labels: OTHER VERB OTHER OTHER ADJ NOUN OTHER NOUN OTHER NOUN NOUN OTHER OTHER NOUN NOUN NOUN OTHER OTHER NOUN NOUN OTHER VERB ADJ OTHER
Predicted labels: OTHER VERB OTHER OTHER ADJ NOUN OTHER NOUN OTHER NOUN VERB OTHER OTHER ADJ NOUN NOUN OTHER OTHER NOUN NOUN OTHER VERB VERB OTHER

Example 2
Sentence: in 1986-87 and 1987-88 , she applied for *rnr*-1 and won *rnr*-1 bonus pay under the reform law .
True labels: OTHER OTHER OTHER OTHER OTHER OTHER VERB OTHER OTHER OTHER VERB OTHER NOUN NOUN OTHER OTHER NOUN NOUN OTHER
Predicted labels: OTHER VERB OTHER VERB OTHER OTHER VERB OTHER OTHER OTHER VERB OTHER NOUN VERB OTHER OTHER NOUN NOUN OTHER

Example 3
Sentence: `` she was an inspirational lady ; she had it all together , '' says *t*-1 laura dobson , a freshman at the university of south carolina who *t*-90 had mrs. yeargin in the teacher-cadet cl