In [1]:
!pip install -U nltk

import nltk
from nltk.corpus import treebank
from nltk.tag import hmm
from nltk.classify import MaxentClassifier

nltk.download('treebank')
nltk.download('maxent_treebank_pos_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('averaged_perceptron_tagger')

corpus = list(treebank.tagged_sents())

train_data = corpus[:int(0.8 * len(corpus))]
test_data = corpus[int(0.8 * len(corpus)):]

hmm_tagger = hmm.HiddenMarkovModelTrainer().train(train_data)

hmm_accuracy = hmm_tagger.evaluate(test_data)
print(f"HMM Tagger Accuracy: {hmm_accuracy:.4f}")

maxent_tagger = nltk.MaxentClassifier.train(
    [(dict([((w, ), True)]), t) for sent in train_data for (w, t) in sent],
    algorithm='gis', trace=0, max_iter=10
)

correct, total = 0, 0
for sent in test_data:
    words, tags = zip(*sent)
    predicted = [maxent_tagger.classify(dict([((w,), True)])) for w in words]
    correct += sum(p == t for p, t in zip(predicted, tags))
    total += len(tags)
maxent_accuracy = correct / total
print(f"Maximum Entropy (Log-Linear) Tagger Accuracy: {maxent_accuracy:.4f}")

sentence = ["The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"]

hmm_prediction = hmm_tagger.tag(sentence)
maxent_prediction = [(w, maxent_tagger.classify(dict([((w,), True)]))) for w in sentence]

print("HMM Prediction:", hmm_prediction)
print("MaxEnt Prediction:", maxent_prediction)



[nltk_data] Downloading package treebank to /root/nltk_data...
[nltk_data]   Unzipping corpora/treebank.zip.
[nltk_data] Downloading package maxent_treebank_pos_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/maxent_treebank_pos_tagger.zip.
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
  Function evaluate() has been deprecated.  Use accuracy(gold)
  instead.
  hmm_accuracy = hmm_tagger.evaluate(test_data)
  X[i, j] = self._transitions[si].logprob(self._states[j])
  O[i, k] = self._output_logprob(si, self._symbols[k])
  P[i] = self._priors.logprob(si)
  O[i, k] = self._output_logprob(si, self._symbols[k

HMM Tagger Accuracy: 0.3647
Maximum Entropy (Log-Linear) Tagger Accuracy: 0.8625
HMM Prediction: [('The', 'DT'), ('quick', 'JJ'), ('brown', 'NNP'), ('fox', 'NNP'), ('jumps', 'NNP'), ('over', 'NNP'), ('the', 'NNP'), ('lazy', 'NNP'), ('dog', 'NNP')]
MaxEnt Prediction: [('The', 'DT'), ('quick', 'JJ'), ('brown', '``'), ('fox', '``'), ('jumps', '``'), ('over', 'IN'), ('the', 'DT'), ('lazy', '``'), ('dog', '``')]
