<a href="https://colab.research.google.com/github/MapariPrajwal/NLP/blob/main/Shallow_Parsing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import nltk
custom_grammar = r"""
    NP: {<DT>?<JJ>*<NN>}  # Noun Phrase
    ADJP: {<RB><JJ>}       # Adjective Phrase
    ADVP: {<RB>}           # Adverb Phrase
    VP: {<VB.*><NP|PP>*}   # Verb Phrase
"""
custom_chunk_parser = nltk.RegexpParser(custom_grammar)


In [10]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [6]:
sentence = "The quick brown fox jumps over the lazy dog."
tokens = nltk.word_tokenize(sentence)
pos_tags = nltk.pos_tag(tokens)
custom_parsed_tree = custom_chunk_parser.parse(pos_tags)
print("Customized Grammar Chunking:")
print(custom_parsed_tree)

Customized Grammar Chunking:
(S
  (NP The/DT quick/JJ brown/NN)
  (NP fox/NN)
  (VP jumps/VBZ)
  over/IN
  (NP the/DT lazy/JJ dog/NN)
  ./.)


In [11]:
inbuilt_parsed_tree = nltk.ne_chunk(pos_tags)
print("\nInbuilt Chunking:")
print(inbuilt_parsed_tree)


Inbuilt Chunking:
(S
  The/DT
  quick/JJ
  brown/NN
  fox/NN
  jumps/VBZ
  over/IN
  the/DT
  lazy/JJ
  dog/NN
  ./.)


In [12]:
def extract_chunks(tree, chunk_label):
    chunks = []
    for subtree in tree.subtrees(filter=lambda t: t.label() == chunk_label):
        chunks.append(" ".join([word for word, tag in subtree.leaves()]))
    return chunks
custom_np_chunks = extract_chunks(custom_parsed_tree, "NP")
custom_adjp_chunks = extract_chunks(custom_parsed_tree, "ADJP")
custom_advp_chunks = extract_chunks(custom_parsed_tree, "ADVP")
custom_vp_chunks = extract_chunks(custom_parsed_tree, "VP")
inbuilt_np_chunks = extract_chunks(inbuilt_parsed_tree, "NP")
inbuilt_vp_chunks = extract_chunks(inbuilt_parsed_tree, "VP")

print("\nCustomized NP Chunks:", custom_np_chunks)
print("Inbuilt NP Chunks:", inbuilt_np_chunks)
print("\nCustomized ADJP Chunks:", custom_adjp_chunks)
print("\nCustomized ADVP Chunks:", custom_advp_chunks)
print("\nCustomized VP Chunks:", custom_vp_chunks)
print("Inbuilt VP Chunks:", inbuilt_vp_chunks)


Customized NP Chunks: ['The quick brown', 'fox', 'the lazy dog']
Inbuilt NP Chunks: []

Customized ADJP Chunks: []

Customized ADVP Chunks: []

Customized VP Chunks: ['jumps']
Inbuilt VP Chunks: []
