In [None]:
!pip install nltk



In [7]:
import nltk
from nltk.parse import ChartParser
from nltk import CFG

g = CFG.fromstring(r"""
# ----- Sentence Structure ---
S -> NP VP

# -- Verb Phrases?
VP -> MD VPb | Cop AdjP | Vtrans NP | Aux Vbg | VP CP | VPb
VPb -> Cop AdjP | Vtrans NP
CP -> 'more' 'than' NP


NP -> Det N | AdjP N | Vbg NP | NP PP | NP Conj NP | N
#  -> 'the groom' | 'dangerous flying planes'  |
PP -> P NP
AdjP -> Adj | Adj AdjP

# ----- Lexicon ---------
Det -> 'the'
Conj -> 'and'
P -> 'of'
MD -> 'can'
Cop -> 'be'
Aux -> 'were'
Vtrans -> 'loves'
Vbg -> 'flying'
Adj -> 'dangerous' | 'flying'
N -> 'planes' | 'parents' | 'bride' | 'groom'
""")

def toks(s):
    s = s.rstrip(".").lower()
    return s.split()

### **2** -  Structure Trees

In [8]:
parser = ChartParser(g)

sentences = [
  "Flying planes can be dangerous.",
  "The parents of the bride and the groom were flying.",
  "The groom loves dangerous planes more than the bride."
]
for s in sentences:
    print(s)
    parses = list(parser.parse(toks(s)))
    print("parses:", len(parses))
    for t in parses[:3]:  # show a few if many
       t.pretty_print()
    print()

Flying planes can be dangerous.
parses: 2
                   S                   
         __________|___                 
        |              VP              
        |           ___|___             
        NP         |      VPb          
   _____|____      |    ___|______      
 AdjP        |     |   |         AdjP  
  |          |     |   |          |     
 Adj         N     MD Cop        Adj   
  |          |     |   |          |     
flying     planes can  be     dangerous

                   S                   
         __________|___                 
        |              VP              
        |           ___|___             
        NP         |      VPb          
   _____|____      |    ___|______      
  |          NP    |   |         AdjP  
  |          |     |   |          |     
 Vbg         N     MD Cop        Adj   
  |          |     |   |          |     
flying     planes can  be     dangerous


The parents of the bride and the groom were flying.
parses: 2
   

### **3** -  Dependency Parser

In [9]:
import spacy
nlp = spacy.load("en_core_web_sm")


for s in sentences:
    print(f"\n{s}")
    doc = nlp(s)
    for token in doc:
        print(f"{token.text:10} -> {token.dep_:12} ({token.head.text})")


Flying planes can be dangerous.
Flying     -> amod         (planes)
planes     -> nsubj        (be)
can        -> aux          (be)
be         -> ROOT         (be)
dangerous  -> acomp        (be)
.          -> punct        (be)

The parents of the bride and the groom were flying.
The        -> det          (parents)
parents    -> nsubj        (flying)
of         -> prep         (parents)
the        -> det          (bride)
bride      -> pobj         (of)
and        -> cc           (parents)
the        -> det          (groom)
groom      -> conj         (parents)
were       -> aux          (flying)
flying     -> ROOT         (flying)
.          -> punct        (flying)

The groom loves dangerous planes more than the bride.
The        -> det          (groom)
groom      -> nsubj        (loves)
loves      -> ROOT         (loves)
dangerous  -> amod         (planes)
planes     -> dobj         (loves)
more       -> advmod       (loves)
than       -> prep         (more)
the        -> det       

Syntactic and dependency parsing help a computer understand how words in a sentence are structured: who does what, to whom, and how. For example, they show which word is the subject, which one is the object, and what describes what.

In a question answering system, this is very useful. If the question is “Who loves dangerous planes?”, the parser helps the program see that “the groom” is the one doing the action “loves” and “dangerous planes” is what he loves. That way, the computer can find the right answer based on the relationships between words, not just by matching keywords.

In some way, it's also more deterministic, since it follows some explicit grammar rules.