In [65]:
import nltk
from nltk.corpus import brown

In [66]:
raw = "In 1650 he started his career as advisor in the ministerium of finances in Den Haag. After he became a minister he went back to Amsterdam, and took place as a sort of chairing mayor of this city. After the death of his brother Cornelis, De Graeff became the strong leader of the republicans. He held this position until the rampjaar."

In [67]:
sents = [nltk.word_tokenize(sent) for sent in nltk.sent_tokenize(raw)]
tagged_sents = [nltk.pos_tag(sent) for sent in sents]

In [216]:
def parse_sent(sent):
    token_sent = nltk.word_tokenize(sent)
    tagged_sent = nltk.pos_tag(token_sent)

    print(tagged_sent)

    I_words = [word for word, tag in tagged_sent if tag.startswith('IN') or tag.startswith('TO')]
    PN_words = [word for word, tag in tagged_sent if tag.startswith('NNP')]
    N_words = [word for word, tag in tagged_sent if tag.startswith('NN') and not tag.startswith('NNP')]
    V_words = [word for word, tag in tagged_sent if tag.startswith('V')]
    DT_words = [word for word, tag in tagged_sent if tag.startswith('DT')]
    PR_words = [word for word, tag in tagged_sent if tag.startswith('PR')]
    J_words = [word for word, tag in tagged_sent if tag.startswith('JJ') or tag.startswith('VBG')]
    CD_words = [word for word, tag in tagged_sent if tag.startswith('CD')]
    CC_words = [word for word, tag in tagged_sent if tag.startswith('CC')]
    R_words = [word for word, tag in tagged_sent if tag.startswith('RB')]

    def list2str(l):
        if len(l) == 0:
            return 'N/A'
        return "'" + "' | '".join(set(l)) + "'"

    grammar1 = nltk.CFG.fromstring("""
        S  -> NP VP '.' | PP ',' NP VP '.' | PR VP '.' | PP PR VP '.' | PP PR VP ',' CC VP '.'
        PP -> I NP | I CD | I clause 
        NP -> PR N PN | DT N PP | PN PN | DT PN | DT J N | J N PP | DT N | PN | N PP | PR N PP
        VP -> V NP PP | V NP | V R I PN
        clause -> PR VP | PP PR VP

        DT -> {DT}
        I -> {I}
        N -> {N}
        PN -> {PN}
        V -> {V}
        J -> {J}
        R -> {R}
        PR -> {PR}
        CD -> {CD}
        CC -> {CC}
    """.format(DT= list2str(DT_words),
            I = list2str(I_words),
            N = list2str(N_words),
            PN = list2str(PN_words),
            V = list2str(V_words),
            PR= list2str(PR_words),
            J = list2str(J_words),
            R = list2str(R_words),
            CD= list2str(CD_words),
            CC= list2str(CC_words)))

    parser = nltk.ChartParser(grammar1)
    for tree in parser.parse(token_sent):
        print(tree, end='\n\n')

In [217]:
parse_sent("He held this position until the rampjaar.")

[('He', 'PRP'), ('held', 'VBD'), ('this', 'DT'), ('position', 'NN'), ('until', 'IN'), ('the', 'DT'), ('rampjaar', 'NN'), ('.', '.')]
(S
  (PR He)
  (VP
    (V held)
    (NP (DT this) (N position))
    (PP (I until) (NP (DT the) (N rampjaar))))
  .)

(S
  (PR He)
  (VP
    (V held)
    (NP
      (DT this)
      (N position)
      (PP (I until) (NP (DT the) (N rampjaar)))))
  .)



In [218]:
parse_sent("After the death of his brother Cornelis, De Graeff became the strong leader of the republicans.")

[('After', 'IN'), ('the', 'DT'), ('death', 'NN'), ('of', 'IN'), ('his', 'PRP$'), ('brother', 'NN'), ('Cornelis', 'NNP'), (',', ','), ('De', 'NNP'), ('Graeff', 'NNP'), ('became', 'VBD'), ('the', 'DT'), ('strong', 'JJ'), ('leader', 'NN'), ('of', 'IN'), ('the', 'DT'), ('republicans', 'NNPS'), ('.', '.')]
(S
  (PP
    (I After)
    (NP
      (DT the)
      (N death)
      (PP (I of) (NP (PR his) (N brother) (PN Cornelis)))))
  ,
  (NP (PN De) (PN Graeff))
  (VP
    (V became)
    (NP (DT the) (J strong) (N leader))
    (PP (I of) (NP (DT the) (PN republicans))))
  .)



In [219]:
parse_sent("In 1650 he started his career as advisor in the ministerium of finances in Den Haag.")

[('In', 'IN'), ('1650', 'CD'), ('he', 'PRP'), ('started', 'VBD'), ('his', 'PRP$'), ('career', 'NN'), ('as', 'IN'), ('advisor', 'NN'), ('in', 'IN'), ('the', 'DT'), ('ministerium', 'NN'), ('of', 'IN'), ('finances', 'NNS'), ('in', 'IN'), ('Den', 'NNP'), ('Haag', 'NNP'), ('.', '.')]
(S
  (PP (I In) (CD 1650))
  (PR he)
  (VP
    (V started)
    (NP
      (PR his)
      (N career)
      (PP
        (I as)
        (NP (N advisor) (PP (I in) (NP (DT the) (N ministerium))))))
    (PP (I of) (NP (N finances) (PP (I in) (NP (PN Den) (PN Haag))))))
  .)

(S
  (PP (I In) (CD 1650))
  (PR he)
  (VP
    (V started)
    (NP
      (PR his)
      (N career)
      (PP
        (I as)
        (NP
          (N advisor)
          (PP
            (I in)
            (NP
              (DT the)
              (N ministerium)
              (PP
                (I of)
                (NP (N finances) (PP (I in) (NP (PN Den) (PN Haag)))))))))))
  .)



In [221]:
parse_sent("After he became a minister he went back to Amsterdam, and took place as a sort of chairing mayor of this city.")

[('After', 'IN'), ('he', 'PRP'), ('became', 'VBD'), ('a', 'DT'), ('minister', 'NN'), ('he', 'PRP'), ('went', 'VBD'), ('back', 'RB'), ('to', 'TO'), ('Amsterdam', 'NNP'), (',', ','), ('and', 'CC'), ('took', 'VBD'), ('place', 'NN'), ('as', 'IN'), ('a', 'DT'), ('sort', 'NN'), ('of', 'IN'), ('chairing', 'VBG'), ('mayor', 'NN'), ('of', 'IN'), ('this', 'DT'), ('city', 'NN'), ('.', '.')]
(S
  (PP
    (I After)
    (clause (PR he) (VP (V became) (NP (DT a) (N minister)))))
  (PR he)
  (VP (V went) (R back) (I to) (PN Amsterdam))
  ,
  (CC and)
  (VP
    (V took)
    (NP (N place) (PP (I as) (NP (DT a) (N sort))))
    (PP
      (I of)
      (NP (J chairing) (N mayor) (PP (I of) (NP (DT this) (N city))))))
  .)

(S
  (PP
    (I After)
    (clause (PR he) (VP (V became) (NP (DT a) (N minister)))))
  (PR he)
  (VP (V went) (R back) (I to) (PN Amsterdam))
  ,
  (CC and)
  (VP
    (V took)
    (NP
      (N place)
      (PP
        (I as)
        (NP
          (DT a)
          (N sort)
          (PP
 

In [204]:
small_grammar = nltk.CFG.fromstring("""
    S -> NP VP
    VP -> V PR
    NP -> 'I'
    
    PR -> 'you' | 'hate'
    V -> 'hate' | 'you'
""")

In [205]:
small_parser = nltk.ChartParser(small_grammar)

In [206]:
for tree in small_parser.parse(['I', 'hate', 'you']):
    print(tree)

(S (NP I) (VP (V hate) (PR you)))
