In [None]:
import nltk
nltk.download('all')

1.	Write a Python program using NLTK to define a context-free grammar (CFG) that can parse simple sentences like "The cat sat on the mat." Use this grammar to generate the parse tree for the sentence.



In [29]:
import nltk
from nltk import CFG

# Define a context-free grammar (without comments)
grammar = CFG.fromstring("""
    S -> NP VP
    NP -> Det N
    VP -> V PP | V
    PP -> P NP
    Det -> 'the'
    N -> 'cat' | 'mat'
    V -> 'sat'
    P -> 'on'
""")

# Input sentence
sentence = "the cat sat on the mat"

# Tokenize the sentence into words
tokens = sentence.split()

# Create a parser using the defined grammar
parser = nltk.ChartParser(grammar)

# Parse the sentence and generate the parse tree
print("Parse Tree:")
for tree in parser.parse(tokens):
    print(tree)
    tree.pretty_print()

Parse Tree:
(S
  (NP (Det the) (N cat))
  (VP (V sat) (PP (P on) (NP (Det the) (N mat)))))
             S                     
      _______|_______               
     |               VP            
     |        _______|___           
     |       |           PP        
     |       |    _______|___       
     NP      |   |           NP    
  ___|___    |   |        ___|___   
Det      N   V   P      Det      N 
 |       |   |   |       |       |  
the     cat sat  on     the     mat



2.	Using NLTK, write a function that takes a sentence as input and returns all possible parse trees using a given CFG. Demonstrate this function with the sentence "I saw the man with the telescope."


In [5]:
import nltk
from nltk import CFG

# Define a CFG
grammar = CFG.fromstring("""
  S -> NP VP
  NP -> Pronoun | Det N | Det N PP
  VP -> V NP | V NP PP
  PP -> P NP
  Pronoun -> 'I' | 'he' | 'she'
  Det -> 'the' | 'a'
  N -> 'man' | 'telescope'
  V -> 'saw'
  P -> 'with'
""")

# Function to return all parse trees
def parse_all_trees(sentence):
    parser = nltk.ChartParser(grammar)
    trees = list(parser.parse(sentence))
    return trees

# Demonstrating with "I saw the man with the telescope."
sentence = ['I', 'saw', 'the', 'man', 'with', 'the', 'telescope']
trees = parse_all_trees(sentence)

# Display all parse trees
for tree in trees:
    tree.pretty_print()

         S                                    
    _____|___________                          
   |                 VP                       
   |      ___________|________                 
   |     |       |            PP              
   |     |       |        ____|___             
   NP    |       NP      |        NP          
   |     |    ___|___    |     ___|______      
Pronoun  V  Det      N   P   Det         N    
   |     |   |       |   |    |          |     
   I    saw the     man with the     telescope

         S                                
    _____|_______                          
   |             VP                       
   |      _______|___                      
   |     |           NP                   
   |     |    _______|____                 
   |     |   |   |        PP              
   |     |   |   |    ____|___             
   NP    |   |   |   |        NP          
   |     |   |   |   |     ___|______      
Pronoun  V  Det  N   P   Det         N    

3.	Write a Python program using NLTK to create a recursive descent parser for a given CFG. Parse the sentence "She eats a sandwich." and display the parse tree.


In [6]:
import nltk
from nltk import CFG

# Define a CFG
grammar = CFG.fromstring("""
  S -> NP VP
  NP -> Pronoun
  VP -> V NP
  Pronoun -> 'She'
  V -> 'eats'
  NP -> Det N
  Det -> 'a'
  N -> 'sandwich'
""")

# Recursive Descent Parser using NLTK's Top-Down ChartParser
parser = nltk.ChartParser(grammar)

# Sentence to parse
sentence = ['She', 'eats', 'a', 'sandwich']

# Parse and print the parse tree
for tree in parser.parse(sentence):
    tree.pretty_print()

              S                  
    __________|___                
   |              VP             
   |      ________|___            
   NP    |            NP         
   |     |         ___|_____      
Pronoun  V       Det        N    
   |     |        |         |     
  She   eats      a      sandwich



4.	Using NLTK, write a program to extract noun phrases from a sentence using a chunk grammar. Apply it to the sentence "The quick brown fox jumps over the lazy dog."


In [7]:
import nltk
from nltk import pos_tag, word_tokenize

# Define a chunk grammar for noun phrases (NP)
chunk_grammar = """
  NP: {<DT>?<JJ>*<NN.*>}   # Det, Adjective(s), Noun
"""
chunk_parser = nltk.RegexpParser(chunk_grammar)

# Sentence to process
sentence = "The quick brown fox jumps over the lazy dog."

# Tokenize and POS tag the sentence
tokens = word_tokenize(sentence)
tags = pos_tag(tokens)

# Apply the chunk grammar
tree = chunk_parser.parse(tags)

# Display the chunked tree
tree.pretty_print()

                                S                                          
     ___________________________|_______________________________            
    |        |     |            NP               NP             NP         
    |        |     |     _______|________        |       _______|______     
jumps/VBZ over/IN ./. The/DT quick/JJ brown/NN fox/NN the/DT lazy/JJ dog/NN



5.	Write a Python function using NLTK that takes a sentence as input and returns the verb phrases (VP) using a chunk grammar. Demonstrate this function with the sentence "The cat is sleeping on the mat."


In [8]:
import nltk
from nltk import pos_tag, word_tokenize

# Define a chunk grammar for verb phrases (VP)
chunk_grammar = """
  VP: {<VB.*><NP|PP|CLAUSE>*}    # Verb followed by NP, PP, or clauses
"""
chunk_parser = nltk.RegexpParser(chunk_grammar)

# Sentence to process
sentence = "The cat is sleeping on the mat."

# Tokenize and POS tag the sentence
tokens = word_tokenize(sentence)
tags = pos_tag(tokens)

# Apply the chunk grammar
tree = chunk_parser.parse(tags)

# Display the chunked tree
tree.pretty_print()

                      S                                  
   ___________________|___________________________        
  |      |      |     |      |     |    VP        VP     
  |      |      |     |      |     |    |         |       
The/DT cat/NN on/IN the/DT mat/NN ./. is/VBZ sleeping/VBG



6.	Write a Python program using NLTK to define a probabilistic context-free grammar (PCFG) and generate a parse tree for the sentence "The cat sleeps."


In [37]:
import nltk
from nltk import PCFG

# Define a Probabilistic Context-Free Grammar (PCFG)
pcfg = PCFG.fromstring("""
  S -> NP VP [1.0]
  NP -> Det N [1.0]
  VP -> V [1.0]
  Det -> 'The' [0.5] | 'a' [0.5]
  N -> 'cat' [1.0]
  V -> 'sleeps' [1.0]
""")

# Create a parser using the PCFG
parser = nltk.ChartParser(pcfg)

# Sentence to parse
sentence = ['The', 'cat', 'sleeps']

# Parse the sentence and print the tree
for tree in parser.parse(sentence):
    tree.pretty_print()

         S        
      ___|____     
     NP       VP  
  ___|___     |    
Det      N    V   
 |       |    |    
The     cat sleeps



7.	Write a Python program to visualize the chunk tree for a given sentence using a noun phrase chunking grammar.


In [10]:
import nltk
from nltk import pos_tag, word_tokenize
from nltk.chunk import RegexpParser

# Define a chunk grammar for noun phrases (NP)
chunk_grammar = """
  NP: {<DT>?<JJ>*<NN.*>}  # Determiner (optional), Adjectives (optional), Noun
"""
chunk_parser = RegexpParser(chunk_grammar)

# Sentence to process
sentence = "The quick brown fox jumped over the lazy dog."

# Tokenize and POS tag the sentence
tokens = word_tokenize(sentence)
tags = pos_tag(tokens)

# Apply the chunk grammar
tree = chunk_parser.parse(tags)

# Visualize the chunk tree
tree.pretty_print()

                                 S                                          
     ____________________________|_______________________________            
    |         |     |            NP               NP             NP         
    |         |     |     _______|________        |       _______|______     
jumped/VBD over/IN ./. The/DT quick/JJ brown/NN fox/NN the/DT lazy/JJ dog/NN



8.	Write a Python program that extracts prepositional phrases (PP) from a given text using a chunking grammar.


In [38]:
import nltk
from nltk import pos_tag, word_tokenize
from nltk.chunk import RegexpParser

# Download necessary NLTK data files
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

# Define a chunk grammar for prepositional phrases (PP)
chunk_grammar = r"""
    PP: {<IN><DT>?<JJ>*<NN>}  # Prepositional phrase: preposition + optional determiner + adjectives + noun
"""

# Create a chunk parser
chunk_parser = RegexpParser(chunk_grammar)

# Function to extract prepositional phrases
def extract_prepositional_phrases(text):
    # Tokenize and POS tag the text
    tokens = word_tokenize(text)
    pos_tags = pos_tag(tokens)

    # Parse the POS-tagged text using the chunk grammar
    tree = chunk_parser.parse(pos_tags)

    # Extract prepositional phrases
    prepositional_phrases = []
    for subtree in tree:
        if isinstance(subtree, nltk.Tree) and subtree.label() == 'PP':
            prepositional_phrases.append(" ".join(word for word, pos in subtree.leaves()))

    return prepositional_phrases

# Input text
text = "The cat sat on the mat. The dog barked at the stranger in the park."

# Extract prepositional phrases
prepositional_phrases = extract_prepositional_phrases(text)

# Print the extracted prepositional phrases
print("Prepositional Phrases:")
for pp in prepositional_phrases:
    print(pp)

Prepositional Phrases:
on the mat
at the stranger
in the park


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


9.	Write a Python program that extracts all adjective phrases (ADJP) from a given sentence using a chunking grammar.


In [12]:
import nltk
from nltk import pos_tag, word_tokenize
from nltk.chunk import RegexpParser

# Define a chunk grammar for adjective phrases (ADJP)
chunk_grammar = """
  ADJP: {<JJ.*>+}  # One or more adjectives
"""
chunk_parser = RegexpParser(chunk_grammar)

# Sentence to process
sentence = "The very quick brown fox jumped over the lazy dog."

# Tokenize and POS tag the sentence
tokens = word_tokenize(sentence)
tags = pos_tag(tokens)

# Apply the chunk grammar
tree = chunk_parser.parse(tags)

# Extract and print adjective phrases
for subtree in tree.subtrees(filter=lambda t: t.label() == 'ADJP'):
    print(subtree)

(ADJP quick/JJ)
(ADJP lazy/JJ)


10.	Write a Python program that extracts all verb phrases (VP) from a given sentence using a chunking grammar.


In [13]:
import nltk
from nltk import pos_tag, word_tokenize
from nltk.chunk import RegexpParser

# Define a chunk grammar for verb phrases (VP)
chunk_grammar = """
  VP: {<VB.*><NP|PP|CLAUSE>*}  # Verb followed by NP, PP, or clauses
"""
chunk_parser = RegexpParser(chunk_grammar)

# Sentence to process
sentence = "The cat is sleeping on the mat."

# Tokenize and POS tag the sentence
tokens = word_tokenize(sentence)
tags = pos_tag(tokens)

# Apply the chunk grammar
tree = chunk_parser.parse(tags)

# Extract and print verb phrases
for subtree in tree.subtrees(filter=lambda t: t.label() == 'VP'):
    print(subtree)

(VP is/VBZ)
(VP sleeping/VBG)


11.	Write a Python program to extract complex noun phrases (NP) containing nested structures using chunking grammar.


In [14]:
import nltk
from nltk import pos_tag, word_tokenize
from nltk.chunk import RegexpParser

# Define a chunk grammar for complex noun phrases (NP) with nested structures
chunk_grammar = """
  NP: {<DT>?<JJ>*<NN.*>+}  # Simple noun phrases
      {<DT><JJ>*<NN><PP>}  # Noun phrase with a prepositional phrase
"""
chunk_parser = RegexpParser(chunk_grammar)

# Sentence to process
sentence = "The quick brown fox with a shiny tail jumped over the lazy dog."

# Tokenize and POS tag the sentence
tokens = word_tokenize(sentence)
tags = pos_tag(tokens)

# Apply the chunk grammar
tree = chunk_parser.parse(tags)

# Extract and print complex noun phrases
for subtree in tree.subtrees(filter=lambda t: t.label() == 'NP'):
    print(subtree)

(NP The/DT quick/JJ brown/NN fox/NN)
(NP a/DT shiny/JJ tail/NN)
(NP the/DT lazy/JJ dog/NN)


12.	Write a Python program using NLTK to define and apply a probabilistic grammar to generate a parse tree for a given sentence.

In [21]:
import nltk
from nltk import PCFG

# Define a probabilistic context-free grammar (PCFG)
pcfg = PCFG.fromstring("""
  S -> NP VP [1.0]
  NP -> Det N [0.5] | Det Adj N [0.5]
  VP -> V NP [1.0]
  Det -> 'The' [0.5] | 'a' [0.5]
  N -> 'cat' [0.5] | 'dog' [0.5]
  V -> 'sleeps' [0.5] | 'eats' [0.5]
  Adj -> 'lazy' [1.0]
""")

# Create a parser
parser = nltk.ChartParser(pcfg)

# Sentence to parse
sentence = ['The', 'cat', 'sleeps']

# Parse the sentence and print the tree
for tree in parser.parse(sentence):
    tree.pretty_print()

(S
  (NP The/DT quick/JJ brown/NN fox/NN)
  with/IN
  (NP a/DT shiny/JJ tail/NN)
  jumped/VBD
  over/IN
  (NP the/DT lazy/JJ dog/NN)
  ./.)
