In [1]:
import re
import nltk
from nltk import sem
from nltk.corpus import ieer
from nltk.sem import relextract

In [2]:
roles = r"""
... (.*(
... analyst|
... chair(wo)?man|
... commissioner|
... counsel|
... director|
... economist|
... editor|
... executive|
... foreman|
... governor|
... head|
... lawyer|
... leader|
... librarian).*)|
... manager|
... partner|
... president|
... producer|
... professor|
... researcher|
... spokes(wo)?man|
... spokesperson|
... writer|
... ,\sof\sthe?\s*  # "X, of (the) Y"
... """
ROLES = re.compile(roles, re.VERBOSE)

In [3]:
for fileid in ieer.fileids():
    for doc in ieer.parsed_docs(fileid):
        for rel in relextract.extract_rels('PER', 'ORG', doc, corpus='ieer', pattern = ROLES):
            print(relextract.rtuple(rel))

[PER: 'Kivutha Kibwana'] ', of the' [ORG: 'National Convention Assembly']
[PER: 'Boban Boskovic'] ', chief executive of the' [ORG: 'Plastika']
[PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
[PER: 'Kiriyenko'] 'became a foreman at the' [ORG: 'Krasnoye Sormovo']
[PER: 'Annan'] ', the first sub-Saharan African to head the' [ORG: 'United Nations']
[PER: 'Mike Godwin'] ', chief counsel for the' [ORG: 'Electronic Frontier Foundation']
[PER: 'Robert Mergess'] ', the co-director of the' [ORG: 'Berkeley Center for Law and Technology']
[PER: 'Jack Balkin'] ", director of the school's program. ``What happened at" [ORG: 'Yale']
[PER: 'William Gale'] ', an economist at the' [ORG: 'Brookings Institution']
[PER: 'Joel Slemrod'] ', an economist at the' [ORG: 'University of Michigan']
[PER: 'Alan Braverman'] ', Internet analyst at' [ORG: 'Credit Suisse First Boston']
[PER: 'Michael Coffey'] ', managing editor of' [ORG: 'Publishers Weekly']
[PER: 'Lorne Michaels'] "

In [37]:
grammar = r"""
    DT: {<DT>} 
    JJ: {<JJ>} 
    N: {<NN.*>}
    VP: {<VB.*><NP|PP|CLAUSE>+$}
    S: {<NP><VP>}
"""

In [38]:
parser = nltk.RegexpParser(grammar)
parser_deep = nltk.RegexpParser(grammar, loop = 2)

In [39]:
sent1 = "The quick brown fox jumps over the lazy dog."
tokens1 = nltk.word_tokenize(sent1)
sent2 = "The energetic young children, after devouring a plate of delicious chocolate chip cookies, eagerly raced outside."
tokens2 = nltk.word_tokenize(sent2)

In [40]:
tagg1 = nltk.pos_tag(tokens1)
tagg2 = nltk.pos_tag(tokens2)

In [41]:
chunks = parser.parse(tagg1)
print(chunks)

(S
  (DT The/DT)
  (JJ quick/JJ)
  (N brown/NN)
  (N fox/NN)
  jumps/VBZ
  over/IN
  (DT the/DT)
  (JJ lazy/JJ)
  (N dog/NN)
  ./.)


In [42]:
chunks_deep = parser.parse(tagg1)
print(chunks_deep)

(S
  (DT The/DT)
  (JJ quick/JJ)
  (N brown/NN)
  (N fox/NN)
  jumps/VBZ
  over/IN
  (DT the/DT)
  (JJ lazy/JJ)
  (N dog/NN)
  ./.)


In [43]:
chunks = parser.parse(tagg2)
print(chunks)

(S
  (DT The/DT)
  (JJ energetic/JJ)
  (JJ young/JJ)
  (N children/NNS)
  ,/,
  after/IN
  devouring/VBG
  (DT a/DT)
  (N plate/NN)
  of/IN
  (JJ delicious/JJ)
  (N chocolate/NN)
  (N chip/NN)
  (N cookies/NNS)
  ,/,
  eagerly/RB
  raced/VBD
  (JJ outside/JJ)
  ./.)


In [17]:
chunks_deep = parser.parse(tagg2)
print(chunks_deep)

(S
  (DT The/DT)
  (JJ energetic/JJ)
  (JJ young/JJ)
  (N children/NNS)
  ,/,
  after/IN
  devouring/VBG
  (DT a/DT)
  (N plate/NN)
  of/IN
  (JJ delicious/JJ)
  (N chocolate/NN)
  (N chip/NN)
  (N cookies/NNS)
  ,/,
  eagerly/RB
  raced/VBD
  outside/IN
  to/TO
  play/VB
  in/IN
  (DT the/DT)
  (N vibrant/NN)
  (JJ green/JJ)
  (N park/NN)
  across/IN
  (DT the/DT)
  (N street/NN)
  ./.)
