In [2]:
import nltk
from nltk.parse.generate import generate
import spacy
nltk.download("averaged_perceptron_tagger")
nltk.download("maxent_ne_chunker")
nltk.download("words")

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [3]:
# 1. Define a grammar and obtain the sentences from the grammar?
import nltk
from nltk.parse.generate import generate

In [4]:
grammar = nltk.CFG.fromstring("""
    S -> NP VP
    NP -> Det N | Det N PP
    VP -> V NP | VP PP
    PP -> P NP
    Det -> 'The' | 'a'
    N -> 'cat' | 'dog' | 'garden'
    V -> 'chased' | 'sat'
    P -> 'in' | 'on'
""")

In [5]:
for sentence in generate(grammar, n=10):
  print(' '.join(sentence))

The cat chased The cat
The cat chased The dog
The cat chased The garden
The cat chased a cat
The cat chased a dog
The cat chased a garden
The cat chased The cat in The cat
The cat chased The cat in The dog
The cat chased The cat in The garden
The cat chased The cat in a cat


In [23]:
# 2. Define a grammar and obtain a parse tree from the grammar by two approaches: a. Left Most Derivation b. Right Most Derivation.
import nltk
from nltk import CFG

# Define the grammar
grammar = CFG.fromstring("""
    S -> A | B
    A -> 'a' A | 'a'
    B -> 'b' B | 'b'
""")

# Create a parser
parser = nltk.ChartParser(grammar)

# Define a function for leftmost derivation
def leftmost_derivation(tree):
    if isinstance(tree, nltk.Tree):
        yield tree.label()
        for subtree in tree:
            yield from leftmost_derivation(subtree)
    else:
        yield tree

# Define a function for rightmost derivation
def rightmost_derivation(tree):
    if isinstance(tree, nltk.Tree):
        yield tree.label()
        for subtree in reversed(tree):
            yield from rightmost_derivation(subtree)
    else:
      yield tree


In [24]:

# Input string for parsing
input_string = 'aaa'

# Parse the input string
for tree in parser.parse(input_string):
    print("Parse Tree:", tree)

    # Leftmost Derivation
    print("Leftmost Derivation:", ' -> '.join(leftmost_derivation(tree)))

    # Rightmost Derivation
    print("Rightmost Derivation:", ' -> '.join(rightmost_derivation(tree)))


Parse Tree: (S (A a (A a (A a))))
Leftmost Derivation: S -> A -> a -> A -> a -> A -> a
Rightmost Derivation: S -> A -> A -> A -> a -> a -> a


In [14]:
# 3. Obtain the generated parse tree by Regex parser?
regex_grammar = r"""
  S: {<DT>?<JJ>*<NN>}
"""
parser = nltk.RegexpParser(regex_grammar)
tagged = nltk.pos_tag(tokens)
tree = parser.parse(tagged)
print("Regex Parse Tree:")
print(tree)


Regex Parse Tree:
(S Mary/NNP saw/VBD (S a/DT dog/NN) in/IN (S the/DT park/NN))


In [29]:
# 4. Implement Dependency Parser to generate the parse tree.
import spacy

# Load the English language model
nlp = spacy.load("en_core_web_sm")

# Example sentence
sentence = "The quick brown fox jumps over the lazy dog."

# Process the sentence with spaCy
doc = nlp(sentence)

# Print the dependency parse tree
for token in doc:
    print(f"{token.text} <--{token.dep_}-- {token.head.text}")

# Alternatively, you can visualize the dependency parse tree using displacy
from spacy import displacy

# Visualize the dependency parse tree
displacy.render(doc, style="dep", options={'distance':90})


The <--det-- fox
quick <--amod-- fox
brown <--amod-- fox
fox <--nsubj-- jumps
jumps <--ROOT-- jumps
over <--prep-- jumps
the <--det-- dog
lazy <--amod-- dog
dog <--pobj-- over
. <--punct-- jumps


In [25]:
# 5. Implement chunking using shallow parsing?
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [26]:
def shallow_parse(text):
  chunk_grammar = r'NP: {<DT>?<JJ>*<NN>}'
  chunk_parser = nltk.RegexpParser(chunk_grammar)
  word_token = nltk.word_tokenize(text)
  word_pos = nltk.pos_tag(word_token)
  chunked_sentence = chunk_parser.parse(word_pos)
  chunked_sentence.pretty_print()

In [27]:
text = 'The big red and yellow bird flew over the house'
shallow_parse(text)

                               S                                                      
   ____________________________|___________________________________________            
  |      |      |      |       |        |               NP                 NP         
  |      |      |      |       |        |         ______|_____        _____|_____      
The/DT big/JJ red/JJ and/CC flew/VBD over/IN yellow/JJ     bird/NN the/DT     house/NN



In [28]:
# 6. Obtain the Named entity relations in the document?
print("Named Entity Recognition:")
doc = nlp(sentence)
for ent in doc.ents:
    print(ent.text, ent.label_)


Named Entity Recognition:
Mary PERSON
