In [1]:
import nltk

In [2]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [3]:
from nltk.chunk import RegexpParser

In [4]:
text = "The future belongs to those who believe in the beauty of their dreams."

In [5]:
tokenize = nltk.word_tokenize(text)
tagged_tk = nltk.pos_tag(tokenize)

In [6]:
print("POS tags: ")
for token, tag in tagged_tk:
  print(f"- {token}/{tag}")

POS tags: 
- The/DT
- future/JJ
- belongs/NNS
- to/TO
- those/DT
- who/WP
- believe/VBP
- in/IN
- the/DT
- beauty/NN
- of/IN
- their/PRP$
- dreams/NNS
- ./.


In [7]:
grammar = r"""
    NP: {<DT|JJ|NN.*>+}          # Chunk sequences of DT, JJ, NN
    PP: {<IN><NP>}                # Chunk prepositions followed by NP
    VP: {<VB.*><NP|PP|CLAUSE>+$}  # Chunk verbs and their arguments
    CLAUSE: {<NP><VP>}            # Chunk NP, VP pairs
"""

In [8]:
chunk_parser = RegexpParser(grammar)
chunked_sentence = chunk_parser.parse(tagged_tk)
print("Chunked : ",chunked_sentence)

Chunked :  (S
  (NP The/DT future/JJ belongs/NNS)
  to/TO
  (NP those/DT)
  who/WP
  believe/VBP
  (PP in/IN (NP the/DT beauty/NN))
  of/IN
  their/PRP$
  (NP dreams/NNS)
  ./.)
