In [1]:
import nltk
from nltk import RegexpParser
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag


nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

sentence = "The quick brown fox jumps over the lazy dog"

tokens = word_tokenize(sentence)
tagged = pos_tag(tokens)

chunk_grammar = r"""
NP: {<DT>?<JJ>*<NN>}
"""

chunk_parser = RegexpParser(chunk_grammar)


chunks = chunk_parser.parse(tagged)

for subtree in chunks.subtrees():
    if subtree.label() == 'NP': # Print only noun phrases
        print(subtree)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


(NP The/DT quick/JJ brown/NN)
(NP fox/NN)
(NP the/DT lazy/JJ dog/NN)


In [2]:
import nltk
import os

nltk.data.path.append("/usr/local/share/nltk_data")

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

text = "The quick brown fox jumps over the lazy dog."

words = nltk.word_tokenize(text)

pos_tags = nltk.pos_tag(words)

chunk_grammar = r"""
    NP: {<DT>?<JJ>*<NN>} # Chunk sequences of DT, JJ, NN
"""

chunk_parser = nltk.RegexpParser(chunk_grammar)

chunked_text = chunk_parser.parse(pos_tags)

noun_phrases = []
for subtree in chunked_text.subtrees(filter=lambda t: t.label() == 'NP'):
    noun_phrases.append(' '.join(word for word, tag in subtree.leaves()))

print("Original Text:", text)
print("Noun Phrases:")
for phrase in noun_phrases:
    print("-", phrase)


Original Text: The quick brown fox jumps over the lazy dog.
Noun Phrases:
- The quick brown
- fox
- the lazy dog


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
