In [101]:
import nltk
from nltk.corpus import treebank

nltk.download('punkt')
nltk.download('treebank')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/administrator/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package treebank to
[nltk_data]     /home/administrator/nltk_data...
[nltk_data]   Package treebank is already up-to-date!


True

In [102]:
tagged_sents = treebank.tagged_sents()
training_set = tagged_sents[:3500]
test_set = tagged_sents[3500:]

In [103]:
print("Length of training set: ", len(training_set))
print("Length of test set: ", len(test_set))

print("First sentence in training set: ", training_set[0])
print("First sentence in test set: ", test_set[0])

Length of training set:  3500
Length of test set:  414
First sentence in training set:  [('Pierre', 'NNP'), ('Vinken', 'NNP'), (',', ','), ('61', 'CD'), ('years', 'NNS'), ('old', 'JJ'), (',', ','), ('will', 'MD'), ('join', 'VB'), ('the', 'DT'), ('board', 'NN'), ('as', 'IN'), ('a', 'DT'), ('nonexecutive', 'JJ'), ('director', 'NN'), ('Nov.', 'NNP'), ('29', 'CD'), ('.', '.')]
First sentence in test set:  [('About', 'IN'), ('30', 'CD'), ('%', 'NN'), ('of', 'IN'), ('Ratners', 'NNP'), ("'s", 'POS'), ('profit', 'NN'), ('already', 'RB'), ('is', 'VBZ'), ('derived', 'VBN'), ('*-1', '-NONE-'), ('from', 'IN'), ('the', 'DT'), ('U.S.', 'NNP'), ('.', '.')]


In [104]:
def train_tagger(model, train_set, backoff=None):
    return model(train_set, backoff=backoff)

In [105]:
def train_many_taggers(models: list, train_set, backoff: list):
    out = []
    for model, backoff in zip(models, backoff):
        out.append(train_tagger(model, train_set, backoff=backoff))
    return tuple(out)

In [106]:
def evaluate_tagger(tagger, test_set):
    return tagger.accuracy(test_set)

In [107]:
def evaluate_many_taggers(taggers: list, test_set):
    out = []
    for tagger in taggers:
        out.append(evaluate_tagger(tagger, test_set))
    return tuple(out)

In [108]:
affix_tagger, unigram_tagger, bigram_tagger, trigram_tagger = train_many_taggers(
    [nltk.AffixTagger, nltk.UnigramTagger, nltk.BigramTagger, nltk.TrigramTagger],
    training_set,
    [None, None, None, None])

In [109]:
affix_accuracy, unigram_accuracy, bigram_accuracy, trigram_accuracy = evaluate_many_taggers(
    [affix_tagger, unigram_tagger, bigram_tagger, trigram_tagger],
    test_set)

In [110]:
def print_evaluation_result(name, accuracy):
    print(f"{name} accuracy: {accuracy}")

In [111]:
def print_many_evaluation_results(names: list, accuracies: list):
    for name, accuracy in zip(names, accuracies):
        print_evaluation_result(name, accuracy)

In [112]:
print_many_evaluation_results(
    ["Affix", "Unigram", "Bigram", "Trigram"],
    [affix_accuracy, unigram_accuracy, bigram_accuracy, trigram_accuracy])

Affix accuracy: 0.2756317165262852
Unigram accuracy: 0.8607803272340013
Bigram accuracy: 0.13466937748087907
Trigram accuracy: 0.08064672281924679


In [113]:
new_unigram_tagger = train_tagger(nltk.UnigramTagger, training_set, backoff=affix_tagger)
new_bigram_tagger = train_tagger(nltk.BigramTagger, training_set, backoff=new_unigram_tagger)
new_trigram_tagger = train_tagger(nltk.TrigramTagger, training_set, backoff=new_bigram_tagger)

In [114]:
new_unigram_accuracy, new_bigram_accuracy, new_trigram_accuracy = evaluate_many_taggers(
    [new_unigram_tagger, new_bigram_tagger, new_trigram_tagger],
    test_set)

In [115]:
print_many_evaluation_results(
    ["New Unigram", "New Bigram", "New Trigram"],
    [new_unigram_accuracy, new_bigram_accuracy, new_trigram_accuracy])

New Unigram accuracy: 0.8985380966211637
New Bigram accuracy: 0.9080259463646045
New Trigram accuracy: 0.9071546132249008


# The tagging accuracy for the individual taggers without a backoff model is (much) lower than the tagging accuracy for the corresponding taggers when using a backoff model. Explain why this is the case. In particular explain this for the case of the BigramTagger.

The backoff model increases the precision, if the bigram does not know the answer or is not "sure", it uses the backoff model which was itself really acccuate, therefore the accuracy increases.

In [116]:
off_the_shelf_tagger_tokenization_result = []
for x in treebank.sents()[3500:]:
    off_the_shelf_tagger_tokenization_result.append(nltk.pos_tag(x))

In [124]:
def evaluator(results: list[list[tuple[str, str]]], expected: list[list[tuple[str, str]]]):
    mismatching_tags = 0
    total_tags = 0
    for result, expected in zip(results, expected):
        for result_token, expected_token in zip(result, expected):
            total_tags += 1
            if result_token[0] != expected_token[0]:
                raise ValueError("Token mismatch")
            if result_token[1] != expected_token[1]:
                mismatching_tags += 1
    return (total_tags - mismatching_tags) / total_tags

In [125]:
off_the_shelf_evaluation = evaluator(off_the_shelf_tagger_tokenization_result, test_set)
print(f"Off the shelf tagger accuracy: {off_the_shelf_evaluation}")

Off the shelf tagger accuracy: 0.8914706167102333
