In [3]:
import nltk
from nltk import word_tokenize, pos_tag

# Download the 'punkt' resource
nltk.download('punkt')

# Download the 'averaged_perceptron_tagger' resource
nltk.download('averaged_perceptron_tagger') # Download the missing resource

# The sentence to analyze
sentence = "Machine learning models require large datasets for accurate predictions."

# Perform POS tagging
tokens = word_tokenize(sentence)
pos_tags = pos_tag(tokens)

# Dictionary of common POS tag descriptions
pos_descriptions = {
    'NN': 'Noun, singular',
    'NNS': 'Noun, plural',
    'VBP': 'Verb, non-3rd person singular present',
    'JJ': 'Adjective',
    'IN': 'Preposition',
    'DT': 'Determiner'
}

# Print detailed analysis
print("Detailed POS Tag Analysis:\n")
print(f"Original sentence: {sentence}\n")
print(f"{'Word':<15} {'POS Tag':<10} {'Description':<30} {'Explanation'}")
print("-" * 70)

for word, tag in pos_tags:
    description = pos_descriptions.get(tag, tag)

    # Provide specific explanations for each word
    explanation = {
        'Machine': "Part of compound noun 'machine learning'",
        'learning': "Part of compound noun 'machine learning'",
        'models': "Plural noun, subject of the sentence",
        'require': "Verb showing what the models need",
        'large': "Adjective describing 'datasets'",
        'datasets': "Plural noun, object of the verb",
        'for': "Preposition connecting to purpose",
        'accurate': "Adjective describing 'predictions'",
        'predictions': "Plural noun, indicating the outcome",
        '.': "Sentence-ending punctuation"
    }.get(word, "")

    print(f"{word:<15} {tag:<10} {description:<30} {explanation}")

# Summary statistics
tag_counts = {}
for _, tag in pos_tags:
    tag_counts[tag] = tag_counts.get(tag, 0) + 1

print("\nPOS Tag Distribution:")
for tag, count in tag_counts.items():
    print(f"{tag}: {count} occurrence(s)")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...


Detailed POS Tag Analysis:

Original sentence: Machine learning models require large datasets for accurate predictions.

Word            POS Tag    Description                    Explanation
----------------------------------------------------------------------
Machine         NN         Noun, singular                 Part of compound noun 'machine learning'
learning        NN         Noun, singular                 Part of compound noun 'machine learning'
models          NNS        Noun, plural                   Plural noun, subject of the sentence
require         VBP        Verb, non-3rd person singular present Verb showing what the models need
large           JJ         Adjective                      Adjective describing 'datasets'
datasets        NNS        Noun, plural                   Plural noun, object of the verb
for             IN         Preposition                    Preposition connecting to purpose
accurate        JJ         Adjective                      Adjective descri

[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
