In [1]:
# This is a method to tag parts of speech in sentences
# Make sure all the required libraries are installed. 
# If you get an error, check to see if it mentions anything you need to install.
# For the sentence "This is one simple example.", you should get the following output:
# [('This', 'DET'), ('is', 'VERB'), ('one', 'NUM'), ('simple', 'ADJ'), ('example', 'NOUN'), ('.', '.')]

import nltk
from nltk import word_tokenize
text = "This is one simple example."
tokens = word_tokenize(text)
tags = nltk.pos_tag(tokens, tagset = "universal")
print(tags)

[('This', 'DET'), ('is', 'VERB'), ('one', 'NUM'), ('simple', 'ADJ'), ('example', 'NOUN'), ('.', '.')]


In [2]:
# This does the same thing as above, but provides explanations for the POS
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
about_text = ("This is one simple example.")
about_doc = nlp(about_text)
for token in about_doc:
    print(f"""
         TOKEN: {str(token)}
         ===== 
         TAG: {str(token.tag_):10} POS: {token.pos_}
         EXPLANATION: {spacy.explain(token.tag_)}""")


         TOKEN: This
         ===== 
         TAG: DT         POS: PRON
         EXPLANATION: determiner

         TOKEN: is
         ===== 
         TAG: VBZ        POS: AUX
         EXPLANATION: verb, 3rd person singular present

         TOKEN: one
         ===== 
         TAG: CD         POS: NUM
         EXPLANATION: cardinal number

         TOKEN: simple
         ===== 
         TAG: JJ         POS: ADJ
         EXPLANATION: adjective (English), other noun-modifier (Chinese)

         TOKEN: example
         ===== 
         TAG: NN         POS: NOUN
         EXPLANATION: noun, singular or mass

         TOKEN: .
         ===== 
         TAG: .          POS: PUNCT
         EXPLANATION: punctuation mark, sentence closer


In [3]:
# extracting different POS 
# For the sentence "This is one simple example.", you should get the following output:
    # nouns: [example]; adjectives: [simple]

nouns = []
adjectives = []
for token in about_doc:
    if token.pos_ == "NOUN":
        nouns.append(token)
    if token.pos_ == "ADJ":
        adjectives.append(token)
print("nouns: ")
print(nouns)
print("adjectives: ")
print(adjectives)

nouns: 
[example]
adjectives: 
[simple]


In [4]:
# Find dependencies between parts of a sentence
# If you get the error:
    # "Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory."
    # Then run "python -m spacy download en" in your command prompt/terminal
# You should get a diagram showing the dependencies of different components of the sentence
displacy.render(about_doc, style = "dep", jupyter=True)

In [6]:
# print out dependencies
print([token.dep_ for token in about_doc])

['nsubj', 'ROOT', 'nummod', 'amod', 'attr', 'punct']


In [7]:
# print out dependencies with explanations
for token in about_doc:
    print(f"""
        TOKEN: {token.text}
        =====
        {token.tag_ = }
        {token.head.text = }
        {token.dep_ = }
        EXPLANATION: {spacy.explain(token.dep_)}""")


        TOKEN: This
        =====
        token.tag_ = 'DT'
        token.head.text = 'is'
        token.dep_ = 'nsubj'
        EXPLANATION: nominal subject

        TOKEN: is
        =====
        token.tag_ = 'VBZ'
        token.head.text = 'is'
        token.dep_ = 'ROOT'
        EXPLANATION: root

        TOKEN: one
        =====
        token.tag_ = 'CD'
        token.head.text = 'example'
        token.dep_ = 'nummod'
        EXPLANATION: numeric modifier

        TOKEN: simple
        =====
        token.tag_ = 'JJ'
        token.head.text = 'example'
        token.dep_ = 'amod'
        EXPLANATION: adjectival modifier

        TOKEN: example
        =====
        token.tag_ = 'NN'
        token.head.text = 'is'
        token.dep_ = 'attr'
        EXPLANATION: attribute

        TOKEN: .
        =====
        token.tag_ = '.'
        token.head.text = 'is'
        token.dep_ = 'punct'
        EXPLANATION: punctuation
