# Syntactic Analysis

Let's analyze the structure of sentences!

In [51]:
import spacy
from collections import Counter

In [2]:
nlp = spacy.load('en_core_web_lg')

In [4]:
dubliners = open('../Readings/content/texts/dubliners.md').read()

In [5]:
dublinersStories = dubliners.split('\n## ')

In [6]:
len(dublinersStories)

16

In [120]:
boardingHouse = dublinersStories[7]

In [121]:
boardingHouseDoc = nlp(boardingHouse)

In [122]:
boardingHouseSents = list(boardingHouseDoc.sents)

In [123]:
devil = boardingHouseSents[5]

In [124]:
devilW = devil[8]

In [125]:
type(devilW)

spacy.tokens.token.Token

In [126]:
devil

Mr. Mooney began to go to the devil.

Methods useful for syntactic analysis are: `ancestors`, `children`, `root`, and so on. 

In [127]:
spacy.displacy.render(devil)

In [128]:
fox = nlp('The quick brown fox jumped over the lazy dogs.')

In [129]:
spacy.displacy.render(fox)

In [130]:
"Mr. Darcy was a man who was certainly not as handsome and his father."

'Mr. Darcy was a man who was certainly not as handsome and his father.'

In [131]:
devilW

devil

In [132]:
devilW.sent

Mr. Mooney began to go to the devil.

In [133]:
list(devilW.ancestors)

[to, go, began]

In [134]:
list(devilW.children)

[the]

In [135]:
devilW.conjuncts

()

In [136]:
devil.root.

SyntaxError: invalid syntax (<ipython-input-136-67347a90e183>, line 1)

In [None]:
rootVerbs = [sent.root.lemma_ for sent in boardingHouseSents if sent.root.is_alpha]

In [None]:
Counter(rootVerbs).most_common(20)

In [137]:
"I may have gone to the store."

'I may have gone to the store.'

In [138]:
boardingHousePropns = [w for w in boardingHouseDoc if w.pos_ == 'PROPN']

In [139]:
people = [propn for propn in boardingHousePropns if propn.lower_ not in ['mrs', 'mr.', '', 'house', '.']]

In [140]:
peopleDict = {person.text: [] for person in people} 
for propn in people: 
    children = [word for word in list(propn.children) if word.pos_ == 'ADJ']
    for child in children: 
        peopleDict[propn.text].append(children)

In [141]:
peopleDict

{'MOONEY': [],
 'father': [],
 'Spring': [],
 'Gardens': [],
 'Mooney': [],
 'cleaver': [],
 'sheriff': [],
 'bailiff': [],
 'Mrs.': [],
 'Hardwicke': [],
 'Street': [],
 'Liverpool': [],
 'Isle': [],
 'Man': [],
 'Madam': [],
 'Jack': [],
 'Fleet': [],
 'mits': [],
 'Sunday': [],
 'Sheridan': [],
 'Polly': [],
 '\\[quote\\': [],
 '\\_\\_\\_\\_\\': [],
 '_': [],
 '\\[verse\\': [],
 'madonna': [[perverse]],
 'disreputable': [],
 'George': [],
 'Church': [],
 'Breakfast': [],
 'Mary': [],
 'Tuesday': [],
 'cavalier': [],
 'Doran': [],
 'Marlborough': [],
 'Meade': [],
 'Bantam': [],
 'Lyons': [],
 'Dublin': [],
 'Leonard': [[old]],
 'God': [],
 'Reynolds': [],
 'Newspaper': [],
 "had've": [],
 'Bob': [],
 'missus': [],
 'Bass': [],
 'Londoner': [[little, blond], [little, blond]],
 'cool': [],
 'iron': []}

In [149]:
[(w, list(w.ancestors)) for w in boardingHouseDoc if w.pos_ == 'ADJ' and w.lemma_ == 'small']

[(small, [hours, in, came]),
 (small, [mouth, hair, had]),
 (small, [city, is, knows])]

In [157]:
verbs = {}
for sent in boardingHouseSents: 
    root = sent.root
    children = list(root.children)
    if len(children) > 0: 
        #print(root, children[0])
        if root.text in verbs: 
            verbs[root.text].append(children[0])
        else: 
            verbs[root.text] = [children[0]]

In [158]:
verbs

{'HOUSE': [THE],
 'was': [MOONEY,
  She,
  was,
  He,
  Mooney,
  He,
  was,
  It,
  Breakfast,
  She,
  had,
  He,
  that,
  question,
  He,
  Doran,
  had,
  First,
  then,
  It,
  It,
  On,
  was,
  There],
 'married': [She],
 'But': [as],
 'began': [Mooney, Polly, ,],
 'drank': [He],
 'By': [fighting],
 'went': [night, She, Things, Then],
 'had': [and, house, Mooney, met, was, eyes, be, He],
 'lived': [After],
 'give': [She],
 'made': [population, She, She, He],
 'governed': [She],
 'knew': [house, Polly, She, she, He],
 'spoke': [men],
 'paid': [Mrs.],
 'shared': [They],
 'discussed': [They],
 'came': [was, sitting],
 'be': [On, daughter, There, Whereas, affair, Perhaps, It],
 'room': [Mrs., the],
 'oblige': [artistes],
 'played': [and],
 'sing': [Mooney],
 'sang': [She],
 "'m": [I],
 'need': [You],
 'know': [You],
 '_': [\_\_\_\_\],
 'sent': [Mooney, belfry],
 'taken': [,, He],
 'Besides': [,],
 'flirted': [Polly],
 'meant': [none],
 'watched': [She],
 'been': [There, were, Both,