In [1]:
import nltk
from nltk import Tree
from nltk.corpus import treebank
from nltk import Nonterminal, Production
from nltk import induce_pcfg

In [8]:
print("Induce PCFG grammar from treebank data:")

productions = []
Leaves=set()
for item in treebank.fileids():
    for tree in treebank.parsed_sents(item):
        tree.collapse_unary(collapsePOS = False)# Remove branches A-B-C into A-B+C
        tree.chomsky_normal_form(horzMarkov = 2)# Remove A->(B,C,D) into A->B,C+D->D
        productions += tree.productions()
        for i in range(len(tree.leaves())):
            Leaves.add(tree.leaves()[i])

Induce PCFG grammar from treebank data:


In [9]:
Leaves=list(Leaves)

In [11]:
from nltk  import Nonterminal
S = Nonterminal('S')
grammar = induce_pcfg(S, productions)

In [14]:
from nltk.parse import ViterbiParser
from functools import reduce
sentence="I saw it"
tokens = sentence.split(' ')
parser = ViterbiParser(grammar)
parses = parser.parse_all(tokens)

if parses: 
    lp = len(parses)
    p = reduce(lambda a,b:a+b.prob(), parses, 0.0)
else:
    p = 0
for parse in parses:
    print(parse)

(S (NP-SBJ (PRP I)) (VP (VBD saw) (NP (PRP it)))) (p=5.31475e-10)


In [None]:
count = {}
rule_count = {}
rule_probs = {}
for rule in grammar:
    if rule[0] not in count:
        count[rule[0]] = 1
    else:
        count[rule[0]] += 1
for rule in grammar:
    if rule not in rule_count:
        rule_count[rule] = 1
    else:
        rule_count[rule] += 1
for entry in rule_count:
    rule_probs[entry] = float(rule_count[entry])/count[entry[0]]

In [14]:
grammar_rules=[]
for line in productions:
    grammar_rules.append(tuple(str(line).rstrip('\n').split(' -> ')))

In [15]:
count = {}
rule_count = {}
rule_probs = {}
for rule in grammar_rules:
    if rule[0] not in count:
        count[rule[0]] = 1
    else:
        count[rule[0]] += 1
for rule in grammar_rules:
    if rule not in rule_count:
        rule_count[rule] = 1
    else:
        rule_count[rule] += 1
for entry in rule_count:
    rule_probs[entry] = float(rule_count[entry])/count[entry[0]]

In [20]:
non_terms = set()
for rules in set(grammar_rules):
    non_terms.add(rules[0])
non_terms=list(non_terms)
sent="I saw John with my eyes"
sent=sent.split(" ")
print(sent)

['I', 'saw', 'John', 'with', 'my', 'eyes']


In [22]:
score=[[[0.0 for i in range(len(non_terms))] for j in range(len(sent)+1)] for k in range(len(sent)+1)]
back =[[[-1 for i in range(len(non_terms))] for j in range(len(sent)+1)] for k in range(len(sent)+1)]

In [32]:
rule_index = {}
from tqdm import tqdm_notebook as tqdm
for i,word in tqdm(enumerate(sent)):
    rules_used = []
    rules_not_used = []
    for j,A in tqdm(enumerate(non_terms)):
        r = A, '\'' + word + '\''
        if r in grammar_rules:
            score[i][i+1][j] = rule_probs[r]
            rules_used.append(j)
            rule_index[A] = j
        else:
            rules_not_used.append(j)
            rule_index[A] = j
    rules_used_temp = rules_used[:]
    rules_not_used_temp = rules_not_used[:]
    added = True
    while added:
        print(added)
        added = False
        for a in rules_not_used:
            for b in rules_used:
                r = non_terms[a], non_terms[b]
                if r in grammar_rules:
                    prob = rule_probs[r] * score[i][i+1][b]
                    if prob > score[i][i+1][a]:
                        score[i][i+1][a] = prob
                        back[i][i+1][a] = b
                        rules_used_temp.append(a)
                        try:
                            rules_not_used_temp.remove(a)
                            rules_used_temp.remove(b)
                        except ValueError:
                            pass
                        added = True

        rules_used = rules_used_temp[:]
        rules_not_used =rules_not_used_temp[:]

    

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

True


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

True


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

True


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

True
True


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

True
True


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

True
True


In [34]:
bin_set = set()
for rules in grammar_rules:
    if len(rules[1].split(' ')) == 2:
        b, c = rules[1].split(' ')
        bin_set.add((rules[0],b,c))
        bin_set.add((rules[0], c, b))

In [None]:
binary_rules =list(bin_set)
for span in tqdm(range(2,len(sent)+1)):
    print("span:"+str(span))
    for begin in tqdm(range(len(sent)+1-span)):
        rules_used = []
        end = begin + span
        print("span: "+str(span)+" begin: "+str(begin)+" end: "+str(end))
        for split in tqdm(range(begin+1, end)):
            print("span: "+str(span)+" begin: "+str(begin)+" end: "+str(end)+" split: "+str(split))
            for rule in tqdm(binary_rules):
                a, b, c = rule_index[rule[0]], rule_index[rule[1]], rule_index[rule[2]]
                concat_rule = rule[0], ' '.join((rule[1], rule[2]))
                if concat_rule in grammar_rules:
                    prob = score[begin][split][b] * score[split][end][c] * rule_probs[concat_rule]
                else:
                    continue
                if prob > score[begin][end][a]:
                    score[begin][end][a] = prob
                    back[begin][end][a] = split, b, c
                    rules_used.append(a)

            ### Handle Unaries
        added = True
        while added:
            added = False
            for a in range(len(non_terms)):
                for b in rules_used:
                    r = non_terms[a], non_terms[b]
                    if r in grammar_rules:
                        prob = rule_probs[r] * score[begin][end][b]
                        if prob > score[begin][end][a]:
                            score[begin][end][a] = prob
                            back[begin][end][a] = b
                            added = True

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

span:2


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

span: 2 begin: 0 end: 2


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

span: 2 begin: 0 end: 2 split: 1


HBox(children=(IntProgress(value=0, max=20802), HTML(value='')))

span: 2 begin: 1 end: 3


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

span: 2 begin: 1 end: 3 split: 2


HBox(children=(IntProgress(value=0, max=20802), HTML(value='')))

span: 2 begin: 2 end: 4


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

span: 2 begin: 2 end: 4 split: 3


HBox(children=(IntProgress(value=0, max=20802), HTML(value='')))

In [26]:
from collections import defaultdict
print(grammar.productions()[0].lhs())
source=defaultdict()
prob=defaultdict()
rhs_1=set()
lhs_1=set()
total=set()
for i in range(len(grammar.productions())):
    q=len(grammar.productions()[i].rhs())
    print(grammar.productions()[i].lhs())
    A=str(grammar.productions()[i]).split(' -> ')[0]
    print(A)
    BC=str(grammar.productions()[i]).split(' -> ')[1].split()[:-1]
    source[A]=BC
    lhs_1.add(A)
    total.add(A)
    p1=float(str(grammar.productions()[i]).split(' -> ')[1].split()[-1][1:-1])
    if len(BC)==1:
        prob[str(grammar.productions()[i]).split(' -> ')[0]+" -> "+str(BC[0])]=p1
        rhs_1.add(BC[0])
        total.add(BC[0])
    elif len(BC)==2:
        prob[str(grammar.productions()[i]).split(' -> ')[0]+" -> "+str(BC[0])+" "+str(BC[1])]=p1
        rhs_1.add(BC[0])
        rhs_1.add(BC[1])
        total.add(BC[0])
        total.add(BC[1])
rhs_dict=defaultdict()
lhs_dict=defaultdict()
rhs_dict_r=defaultdict()
lhs_dict_l=defaultdict()
for i, item in enumerate(lhs_1):
    lhs_dict[item]=i
    lhs_dict_l[i]=item
total_dict=defaultdict()
reverse_dict=defaultdict()
for i, item in enumerate(total):
    total_dict[item]=i
    reverse_dict[i]=item

S
S
S
NP-SBJ
NP-SBJ
NP
NP
NNP
NNP
NNP
NNP
NP-SBJ|<,-ADJP>
NP-SBJ|<,-ADJP>
,
,
NP-SBJ|<ADJP-,>
NP-SBJ|<ADJP-,>
ADJP
ADJP
NP
NP
CD
CD
NNS
NNS
JJ
JJ
S|<VP-.>
S|<VP-.>
VP
VP
MD
MD
VP
VP
VB
VB
VP|<NP-PP-CLR>
VP|<NP-PP-CLR>
NP
NP
DT
DT
NN
NN
VP|<PP-CLR-NP-TMP>
VP|<PP-CLR-NP-TMP>
PP-CLR
PP-CLR
IN
IN
NP
NP
DT
DT
NP|<JJ-NN>
NP|<JJ-NN>
JJ
JJ
NN
NN
NP-TMP
NP-TMP
NNP
NNP
CD
CD
.
.
NP-SBJ
NP-SBJ
NNP
NNP
VP
VP
VBZ
VBZ
NP-PRD
NP-PRD
NP
NP
NN
NN
PP
PP
IN
IN
NP
NP
NNP
NNP
NNP
NNP
NP|<,-NP>
NP|<,-NP>
NP
NP
NP|<NNP-VBG>
NP|<NNP-VBG>
NNP
NNP
NP|<VBG-NN>
NP|<VBG-NN>
VBG
VBG
NN
NN
S
S
NP-SBJ-1
NP-SBJ-1
NNP
NNP
NNP
NNP
NP-SBJ-1|<,-UCP>
NP-SBJ-1|<,-UCP>
NP-SBJ-1|<UCP-,>
NP-SBJ-1|<UCP-,>
UCP
UCP
CD
CD
UCP|<CC-NP>
UCP|<CC-NP>
CC
CC
NP
NP
NP
NP
JJ
JJ
NP
NP
NNP
NNP
NP|<NNP-NNP>
NP|<NNP-NNP>
NNP
NNP
NP|<NNP-NNP>
NP|<NNP-NNP>
NNP
NNP
NNP
NNP
VP
VP
VBD
VBD
VP
VP
VBN
VBN
S
S
NP-SBJ
NP-SBJ
-NONE-
-NONE-
NP
NP
DT
DT
NP|<JJ-JJ>
NP|<JJ-JJ>
JJ
JJ
JJ
JJ
NN
NN
S
S
S-TPC-1
S-TPC-1
NP-SBJ
NP-SBJ
DT
DT
NN
NN
NN
NN
RRC
RRC
ADVP

NP
NN
NN
NN
NN
NP-TMP
NP-TMP
NP-SBJ-1
NP-SBJ-1
NNS
NNS
NP
NP
JJ
JJ
JJ
JJ
NNS
NNS
VP
VP
VBD
VBD
VP|<S-,>
VP|<S-,>
VP
VP
VB
VB
VP|<,-PP-LOC>
VP|<,-PP-LOC>
IN
IN
NP
NP
NNS
NNS
NN
NN
VBP
VBP
JJ
JJ
NNS
NNS
NN
NN
NNS
NNS
JJ
JJ
NP|<JJ-NN>
NP|<JJ-NN>
JJ
JJ
NN
NN
NN
NN
NP
NP
NP|<CD-JJ>
NP|<CD-JJ>
CD
CD
JJ
JJ
VBN
VBN
NP-LGS
NP-LGS
NNP
NNP
NP-LGS|<NNP-NNP>
NP-LGS|<NNP-NNP>
NNP
NNP
NNP
NNP
NNP
NNP
VP
VP
VBD
VBD
VP|<NP-ADV-PP-DIR>
VP|<NP-ADV-PP-DIR>
NP-ADV
NP-ADV
NN
NN
NN
NN
VP|<PP-DIR-PP-DIR>
VP|<PP-DIR-PP-DIR>
NP
NP
CD
CD
NN
NN
CD
CD
NN
NN
VP
VP
VBD
VBD
NP-TMP
NP-TMP
NNP
NNP
NN
NN
NNS
NNS
VP
VP
VBP
VBP
UCP
UCP
NN
NN
NNS
NNS
UCP|<CC-SBAR>
UCP|<CC-SBAR>
NP-SBJ
NP-SBJ
NP-SBJ|<JJ-NN>
NP-SBJ|<JJ-NN>
JJ
JJ
VP
VP
VBZ
VBZ
JJ
JJ
NN
NN
NP
NP
NP
NP
NP|<NNS-POS>
NP|<NNS-POS>
POS
POS
NNS
NNS
VP
VP
VBD
VBD
VP|<PP-EXT-PP-DIR>
VP|<PP-EXT-PP-DIR>
PP-EXT
PP-EXT
NN
NN
VP|<PP-DIR-PP>
VP|<PP-DIR-PP>
CD
CD
NNS
NNS
NP
NP
JJS
JJS
IN
IN
NP
NP
NNP
NNP
NNP
NNP
NP-SBJ-1
NP-SBJ-1
JJR
JJR
NNS
NNS
VBN
VBN
VP
VP
VB
VB
VP|<NP-SB

NNP
NNP
NNP
NNP
VP
VP
VBD
VBD
VP|<ADVP-DIR-CLR-PP-CLR>
VP|<ADVP-DIR-CLR-PP-CLR>
ADVP-DIR-CLR
ADVP-DIR-CLR
IN
IN
VP|<PP-CLR-,>
VP|<PP-CLR-,>
NN
NN
NNP
NNP
NNP
NNP
NNP
NNP
VP
VP
VBG
VBG
S
S
NNS
NNS
VP
VP
ADJP-PRD-3
ADJP-PRD-3
RB
RB
ADJP-PRD-3|<JJ-S>
ADJP-PRD-3|<JJ-S>
-NONE-
-NONE-
S|<CC-S>
S|<CC-S>
S|<S-S-4>
S|<S-S-4>
S
S
NP-SBJ=2
NP-SBJ=2
NP-SBJ=2|<JJ-NN>
NP-SBJ=2|<JJ-NN>
JJ
JJ
NN
NN
ADVP-PRD-LOC=3
ADVP-PRD-LOC=3
ADVP-PRD-LOC=3|<RB-PP-LOC>
ADVP-PRD-LOC=3|<RB-PP-LOC>
ADVP-PRD-LOC=3|<PP-LOC-S>
ADVP-PRD-LOC=3|<PP-LOC-S>
NN
NN
S-4
S-4
VB
VB
NN
NN
NN
NN
VP
VP
VBZ
VBZ
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NP
NP
NP|<VBG-JJ>
NP|<VBG-JJ>
JJ
JJ
NNS
NNS
NNP
NNP
RB
RB
VBN
VBN
NP
NP
JJ
JJ
NN
NN
NP|<PP-LOC-SBAR>
NP|<PP-LOC-SBAR>
NP
NP
NN
NN
CD
CD
NN
NN
NNS
NNS
SBAR
SBAR
WHPP-2
WHPP-2
WHNP
WHNP
VP
VP
JJ
JJ
PP-LOC
PP-LOC
S
S
NP-SBJ-4
NP-SBJ-4
NP-SBJ-4|<,-VP>
NP-SBJ-4|<,-VP>
NP-SBJ-4|<VP-,>
NP-SBJ-4|<VP-,>
NNP
NNP
VP
VP
VP|<NP-S-PRP>
VP|<NP-S-PRP>
CD
CD
VP|<S-PRP-,>
VP|<S-PRP-,>
S-PRP
S-PRP
VB
VB
VP|<,-PP-1>


NN
VB
VB
NN
NN
S
S
NP-SBJ-21
NP-SBJ-21
-NONE-
-NONE-
NN
NN
NNP
NNP
NNP
NNP
NNP
NNP
NP
NP
NNP
NNP
NP|<,-NP-LOC>
NP|<,-NP-LOC>
NNP
NNP
NP-PRD
NP-PRD
NP-PRD|<NN-NN>
NP-PRD|<NN-NN>
NN
NN
S
S
S|<S-CC>
S|<S-CC>
S
S
PRP
PRP
S|<RB-VP>
S|<RB-VP>
RB
RB
VBP
VBP
VB
VB
S|<S-.>
S|<S-.>
VP
VP
VBP
VBP
VP
VP
IN
IN
NN
NN
VBP
VBP
NP
NP
NP
NP
JJ
JJ
JJ
JJ
NN
NN
NN
NN
NP-SBJ
NP-SBJ
NP-SBJ|<NNP-NNS>
NP-SBJ|<NNP-NNS>
NNP
NNP
VP
VP
VBP
VBP
VP|<NP-2-PP-CLR>
VP|<NP-2-PP-CLR>
NP-2
NP-2
NNS
NNS
NP-2|<,-SBAR>
NP-2|<,-SBAR>
NP-2|<SBAR-,>
NP-2|<SBAR-,>
S
S
NP-SBJ-22
NP-SBJ-22
-NONE-
-NONE-
NN
NN
NN
NN
VP
VP
VBG
VBG
ADJP
ADJP
ADJP|<JJ-CC>
ADJP|<JJ-CC>
JJ
JJ
ADJP|<CC-JJ>
ADJP|<CC-JJ>
JJ
JJ
VB
VB
JJ
JJ
NN
NN
NN
NN
NN
NN
VB
VB
CD
CD
NNS
NNS
PRN
PRN
ADJP
ADJP
ADJP
ADJP
RB
RB
ADJP|<RB-JJ>
ADJP|<RB-JJ>
NP
NP
NP|<JJS-JJ>
NP|<JJS-JJ>
VB
VB
JJ
JJ
NP|<,-PP-DIR>
NP|<,-PP-DIR>
PP-DIR
PP-DIR
PP-DIR|<IN-NP>
PP-DIR|<IN-NP>
SBAR
SBAR
SBAR
SBAR
WHNP-23
WHNP-23
-NONE-
-NONE-
QP
QP
RB
RB
JJ
JJ
SBAR|<CC-SBAR>
SBAR|<CC-SBAR>
SBAR
SBAR
WHN

NN
NN
NN
NNS
NNS
NP
NP
NN
NN
NN
NN
VP
VP
VP|<ADVP-PRD-LOC-PP-TMP>
VP|<ADVP-PRD-LOC-PP-TMP>
ADVP-PRD-LOC
ADVP-PRD-LOC
S
S
S|<NP-SBJ-PRN>
S|<NP-SBJ-PRN>
NP-SBJ
NP-SBJ
NP-SBJ|<JJ-CD>
NP-SBJ|<JJ-CD>
PP
PP
JJ
JJ
NP
NP
JJ
JJ
NNS
NNS
NP
NP
NNP
NNP
NNP
NNP
NNP
NNP
NNS
NNS
VP
VP
VB
VB
NNS
NNS
JJ
JJ
ADJP-PRD
ADJP-PRD
JJR
JJR
NN
NN
NN
NN
ADJP
ADJP
QP
QP
JJR
JJR
NNS
NNS
NP-PRD
NP-PRD
NP-PRD|<NN-NN>
NP-PRD|<NN-NN>
NN
NN
NP-PRD|<NN-NNS>
NP-PRD|<NN-NNS>
NN
NN
NNS
NNS
S
S
NP-SBJ
NP-SBJ
NNP
NNP
NNP
NNP
NNP
NNP
S|<PP-TMP-VP>
S|<PP-TMP-VP>
CD
CD
VBD
VBD
JJ
JJ
NN
NN
VP
VP
VBD
VBD
VP|<NP-PRD-SBAR-TMP>
VP|<NP-PRD-SBAR-TMP>
NN
NN
SBAR-TMP
SBAR-TMP
NP-ADV
NP-ADV
SBAR-TMP|<IN-S>
SBAR-TMP|<IN-S>
NNP
NNP
VBD
VBD
NNS
NNS
CD
CD
NNP
NNP
NNP
NNP
NP
NP
NP|<NP-PP>
NP|<NP-PP>
NNP
NNP
NNP
NNP
SBAR
SBAR
WHNP-32
WHNP-32
-NONE-
-NONE-
NN
NN
NNS
NNS
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NP
NP
NNP
NNP
NNS
NNS
NNS
NNS
NNS
NNS
SBAR
SBAR
WHNP-33
WHNP-33
-NONE-
-NONE-
VBP
VBP
VB
VB
IN
IN
NN
NN
NN
NN
VB
VB
VP|<NP-PP-TMP>
VP|<NP-PP-TMP>

NNS
NNS
SBAR
SBAR
WHNP-37
WHNP-37
-NONE-
-NONE-
VBP
VBP
NNS
NNS
JJ
JJ
NP-SBJ-2
NP-SBJ-2
NP
NP
QP
QP
RB
RB
QP|<JJR-IN>
QP|<JJR-IN>
CD
CD
VP|<CC-VP>
VP|<CC-VP>
VP|<VP-NP-TMP>
VP|<VP-NP-TMP>
VBN
VBN
NNS
NNS
VP|<NP-TMP-PRN>
VP|<NP-TMP-PRN>
VP|<PRN-PP>
VP|<PRN-PP>
PRN
PRN
PRN|<NP-1-,>
PRN|<NP-1-,>
NP-1
NP-1
NP
NP
QP
QP
RB
RB
NP|<DT-NN>
NP|<DT-NN>
NNP
NNP
NNP
NNP
NNP
NNP
JJ
JJ
NN
NN
VBN
VBN
PP-CLR
PP-CLR
NNP
NNP
PP-CLR|<PP-PP>
PP-CLR|<PP-PP>
NNP
NNP
NNP
NNP
S
S
JJ
JJ
S|<,-NP-SBJ-46>
S|<,-NP-SBJ-46>
S|<NP-SBJ-46-VP>
S|<NP-SBJ-46-VP>
NP-SBJ-46
NP-SBJ-46
NNP
NNP
VBN
VBN
-NONE-
-NONE-
NP-LGS
NP-LGS
NN
NN
NP
NP
JJ
JJ
NNP
NNP
NNP
NNP
NP
NP
NNP
NNP
NN
NN
VP
VP
VBN
VBN
VP|<PRT-NP>
VP|<PRT-NP>
VP|<NP-PP-LOC>
VP|<NP-PP-LOC>
VP|<PP-LOC-ADVP-LOC>
VP|<PP-LOC-ADVP-LOC>
NNP
NNP
NNP
NNP
VP|<ADVP-LOC-ADVP-TMP>
VP|<ADVP-LOC-ADVP-TMP>
RB
RB
FRAG
FRAG
NN
NN
.
.
SINV
SINV
SINV|<S-TPC-1-''>
SINV|<S-TPC-1-''>
S-TPC-1
S-TPC-1
NN
NN
S-TPC-1|<:-SBARQ>
S-TPC-1|<:-SBARQ>
SBARQ
SBARQ
WRB
WRB
SBARQ|<RP-FRAG+NP>
SBARQ|<RP

ADVP-PRD
ADVP-PRD
ADVP-PRD|<NP-PP>
ADVP-PRD|<NP-PP>
CD
CD
CD
CD
CD
CD
VBD
VBD
NP-SBJ
NP-SBJ
NNS
NNS
VP|<NP-EXT-PP-DIR>
VP|<NP-EXT-PP-DIR>
CD
CD
CD
CD
VP
VP
CD
CD
NNS
NNS
VP
VP
JJ
JJ
S
S
NNS
NNS
NNS
NNS
JJ
JJ
CD
CD
VP|<PP-TMP-PP-DIR>
VP|<PP-TMP-PP-DIR>
CD
CD
VBN
VBN
NP-LGS
NP-LGS
NN
NN
NP|<NN-NNS>
NP|<NN-NNS>
VBG
VBG
VBD
VBD
SBAR
SBAR
WHNP-50
WHNP-50
-NONE-
-NONE-
VBZ
VBZ
NP+QP
NP+QP
NN
NN
VP|<ADVP-PRD-PP-TMP>
VP|<ADVP-PRD-PP-TMP>
ADVP-PRD
ADVP-PRD
JJ
JJ
VP|<PP-TMP-PP-DIR-3>
VP|<PP-TMP-PP-DIR-3>
PP-DIR-3
PP-DIR-3
CD
CD
NNP
NNP
NNP
NNP
VBD
VBD
RB
RB
VP
VP
VB
VB
VP|<NP-PRN>
VP|<NP-PRN>
JJ
JJ
NNS
NNS
PRN
PRN
PP
PP
PP|<ADVP-RB>
PP|<ADVP-RB>
PP|<RB-PP-TMP>
PP|<RB-PP-TMP>
NN
NN
NP
NP
JJ
JJ
JJ
JJ
VP
VP
VP|<ADVP-PRD-PP>
VP|<ADVP-PRD-PP>
CD
CD
NNP
NNP
VP|<NP-EXT-PP-DIR>
VP|<NP-EXT-PP-DIR>
CD
CD
CD
CD
RB
RB
VBG
VBG
NP-SBJ-1
NP-SBJ-1
VB
VB
VP
VP
PP
PP
NP
NP
NP-SBJ|<NN-NN>
NP-SBJ|<NN-NN>
VBP
VBP
VBD
VBD
VP|<NP-PP-TMP>
VP|<NP-PP-TMP>
VP|<PP-TMP-PP>
VP|<PP-TMP-PP>
PP-TMP
PP-TMP
RBR
RBR
PP-TMP|<IN-NP>

NP-SBJ|<,-JJ>
NN
NN
NN
NN
ADJP-PRD
ADJP-PRD
RB
RB
ADJP-PRD|<VBG-PP>
ADJP-PRD|<VBG-PP>
VBG
VBG
SBAR
SBAR
WHNP-59
WHNP-59
-NONE-
-NONE-
VP
VP
NN
NN
JJ
JJ
VB
VB
NNP
NNP
RB
RB
NN
NN
VBD
VBD
VP|<NP-,>
VP|<NP-,>
VP|<,-SBAR-TMP>
VP|<,-SBAR-TMP>
VBD
VBD
JJ
JJ
NN
NN
NN
NN
NP|<JJ-NNP>
NP|<JJ-NNP>
JJ
JJ
NNP
NNP
NNP
NNP
VP
VP
VP|<NP-PRD-SBAR-1>
VP|<NP-PRD-SBAR-1>
SBAR-1
SBAR-1
RB
RB
VP
VP
RB
RB
VBD
VBD
NN
NN
VP|<ADVP-ADJP-PRD>
VP|<ADVP-ADJP-PRD>
VP|<ADJP-PRD-SBAR-1>
VP|<ADJP-PRD-SBAR-1>
VP
VP
NNP
NNP
NNP
NNP
NN
NN
NN
NN
NNS
NNS
VP
VP
ADJP
ADJP
ADJP|<CC-JJR>
ADJP|<CC-JJR>
NNS
NNS
NN
NN
VBG
VBG
NN
NN
NN
NN
NNP
NNP
VP|<PRT-NP>
VP|<PRT-NP>
VBG
VBG
NNS
NNS
NNS
NNS
NP
NP
NNS
NNS
NNS
NNS
ADJP
ADJP
ADJP|<CD--NONE->
ADJP|<CD--NONE->
NN
NN
NNS
NNS
NNS
NNS
JJ
JJ
S
S
NP-SBJ-60
NP-SBJ-60
NN
NN
-NONE-
-NONE-
VP|<PP-LOC-PP>
VP|<PP-LOC-PP>
NNS
NNS
IN
IN
NNS
NNS
VBZ
VBZ
NNS
NNS
NX|<,-NX>
NX|<,-NX>
NX|<NX-,>
NX|<NX-,>
NX
NX
NNS
NNS
NNS
NNS
NX
NX
NN
NN
NNP
NNP
NNP
NNP
NNP
NNP
NN
NN
JJ
JJ
VBD
VBD
NP
NP
NP|<``-NNP>
NP

NNS
VBD
VBD
JJ
JJ
NN
NN
NP|<PP-,>
NP|<PP-,>
NN
NN
SBAR
SBAR
WHNP-68
WHNP-68
-NONE-
-NONE-
NP
NP
NP|<JJ-NP>
NP|<JJ-NP>
NP|<NP-NN>
NP|<NP-NN>
NNS
NNS
NN
NN
SBAR
SBAR
WHNP-69
WHNP-69
-NONE-
-NONE-
VP
VP
VBZ
VBZ
JJ
JJ
JJ
JJ
NN
NN
VP
VP
VB
VB
VP|<ADVP-MNR-SBAR-ADV>
VP|<ADVP-MNR-SBAR-ADV>
RB
RB
NP
NP
NP|<JJ-DT>
NP|<JJ-DT>
NN
NN
VP
VP
IN
IN
NP-SBJ-1
NP-SBJ-1
JJ
JJ
VB
VB
NN
NN
NN
NN
VP
VP
VP|<NP-,>
VP|<NP-,>
VP|<,-PP-MNR>
VP|<,-PP-MNR>
VP
VP
VBZ
VBZ
NN
NN
NN
NN
VP|<RB-NP-PRD>
VP|<RB-NP-PRD>
NP-PRD
NP-PRD
ADJP
ADJP
JJ
JJ
JJ
JJ
JJ
JJ
JJ
JJ
NNS
NNS
JJ
JJ
NNP
NNP
NNP
NNP
VBN
VBN
JJ
JJ
NN
NN
NP
NP
NP|<``-NX-TTL>
NP|<``-NX-TTL>
NX-TTL
NX-TTL
NNP
NNP
NNP
NNP
JJ
JJ
JJ
JJ
NN
NN
NP|<PRN-PP>
NP|<PRN-PP>
SBAR
SBAR
WHNP-70
WHNP-70
-NONE-
-NONE-
JJ
JJ
NN
NN
SBAR
SBAR
SBAR|<,-S>
SBAR|<,-S>
S-ADV
S-ADV
VBN
VBN
JJ
JJ
NN
NN
S|<NP-SBJ-2-VP>
S|<NP-SBJ-2-VP>
VB
VB
JJ
JJ
S
S
RB
RB
NNP
NNP
NNP
NNP
JJ
JJ
NN
NN
PP
PP
PP|<PP-CC>
PP|<PP-CC>
VBN
VBN
NP|<NNP-NN>
NP|<NNP-NN>
NNP
NNP
NN
NN
VBD
VBD
VP|<PP-CLR-PP-TMP>
VP|<PP-

VP
VP
S-PRP
S-PRP
VP
VP
PRN
PRN
PRN|<ADJP-:>
PRN|<ADJP-:>
NN
NN
NP
NP
PDT
PDT
NN
NN
VB
VB
NN
NN
NN
NN
JJ
JJ
NN
NN
VBD
VBD
NNS
NNS
VBG
VBG
VBG
VBG
PRP
PRP
NNS
NNS
NN
NN
NP-HLN
NP-HLN
NP-HLN|<NNP-:>
NP-HLN|<NNP-:>
NNS
NNS
VP
VP
SBAR-TMP
SBAR-TMP
NNP
NNP
NNP
NNP
VBD
VBD
NNP
NNP
NN
NN
NN
NN
NNP
NNP
NNP
NNP
SINV
SINV
SINV|<S-TPC-1-.>
SINV|<S-TPC-1-.>
VB
VB
SINV|<.-''>
SINV|<.-''>
JJ
JJ
VB
VB
S|<ADVP-TMP-NP-SBJ>
S|<ADVP-TMP-NP-SBJ>
NNP
NNP
NP
NP
JJ
JJ
JJ
JJ
NNS
NNS
VBN
VBN
NNS
NNS
VBP
VBP
NN
NN
VBZ
VBZ
VP|<,-``>
VP|<,-``>
VP|<``-FRAG>
VP|<``-FRAG>
JJ
JJ
NN
NN
NP
NP
NP|<PRN-NN>
NP|<PRN-NN>
FRAG|<:-S>
FRAG|<:-S>
NNS
NNS
VBG
VBG
NN
NN
NN
NN
VBZ
VBZ
S
S
S-1
S-1
NP-SBJ
NP-SBJ
ADJP
ADJP
VBN
VBN
S-1|<,-PRN+SINV>
S-1|<,-PRN+SINV>
S-1|<PRN+SINV-,>
S-1|<PRN+SINV-,>
VBP
VBP
NNS
NNS
S-1|<,-VP>
S-1|<,-VP>
NP-PRD
NP-PRD
VP
VP
VBN
VBN
NP
NP
NNS
NNS
SINV
SINV
SINV|<SBARQ-TPC-2-.>
SINV|<SBARQ-TPC-2-.>
SBARQ-TPC-2
SBARQ-TPC-2
VBG
VBG
VBZ
VBZ
VP
VP
PRN
PRN
PRN|<''-S>
PRN|<''-S>
SBAR-PRP
SBAR-PRP
SBAR-PRP|<``-S

VBD
VBD
NN
NN
JJ
JJ
VP
VP
RB
RB
VP|<VBD-NP-PRD>
VP|<VBD-NP-PRD>
NP-PRD
NP-PRD
NP-PRD|<NN-JJ>
NP-PRD|<NN-JJ>
JJ
JJ
NP
NP
CD
CD
NP|<CC-CD>
NP|<CC-CD>
CD
CD
VP|<CC-VP>
VP|<CC-VP>
VP|<VP-NP-1>
VP|<VP-NP-1>
VP|<NP-1-PP-LOC>
VP|<NP-1-PP-LOC>
NP-1
NP-1
NN
NN
VP
VP
JJ
JJ
VBD
VBD
NP
NP
NP|<NN-``>
NP|<NN-``>
NP|<``-NN>
NP|<``-NN>
NN
NN
NN
NN
VB
VB
NN
NN
VP
VP
VP|<NP-PP-BNF>
VP|<NP-PP-BNF>
NN
NN
VP|<,-VP>
VP|<,-VP>
VP|<VP-,>
VP|<VP-,>
VBD
VBD
NNS
NNS
NN
NN
NN
NN
JJ
JJ
NN
NN
NN
NN
VP
VP
VBN
VBN
VP|<NP-PP-LGS>
VP|<NP-PP-LGS>
VP|<PP-LGS-PP-CLR>
VP|<PP-LGS-PP-CLR>
PP-LGS
PP-LGS
NP
NP
NNP
NNP
PP-CLR|<``-NP>
PP-CLR|<``-NP>
NN
NN
NN
NN
S-TPC-1
S-TPC-1
JJ
JJ
NN
NN
S-TPC-1|<:-S>
S-TPC-1|<:-S>
S
S
ADVP-PRD
ADVP-PRD
RB
RB
NNP
NNP
NNP
NNP
NP-SBJ|<,-NP>
NP-SBJ|<,-NP>
NP-SBJ|<NP-SBAR>
NP-SBJ|<NP-SBAR>
NN
NN
SBAR
SBAR
WHNP-90
WHNP-90
-NONE-
-NONE-
JJ
JJ
S
S
VP
VP
VBG
VBG
VB
VB
NNP
NNP
NNP
NNP
NP
NP
JJ
JJ
NN
NN
SINV
SINV
SINV|<S-2-,>
SINV|<S-2-,>
VBP
VBP
VBN
VBN
NNP
NNP
NNP
NNP
NN
NN
S-3
S-3
S-3|<PRN-NP-SBJ>
S-3

NN
NN
NN
NN
NN
NN
NN
NNS
NNS
VP
VP
NN
NN
NP
NP
NNS
NNS
VB
VB
NN
NN
NP-PRD
NP-PRD
NP-PRD|<CC-ADVP>
NP-PRD|<CC-ADVP>
NP-PRD|<ADVP-NP>
NP-PRD|<ADVP-NP>
NN
NN
S
S
S|<NP-TTL-SBJ-VP>
S|<NP-TTL-SBJ-VP>
NP-TTL-SBJ
NP-TTL-SBJ
QP
QP
CD
CD
CD
CD
S
S
CD
CD
NN
NN
JJ
JJ
JJ
JJ
NNS
NNS
S
S
VP
VP
VP|<PP-CLR-:>
VP|<PP-CLR-:>
WHNP-4
WHNP-4
VP
VP
NN
NN
NP-SBJ
NP-SBJ
VBZ
VBZ
QP|<JJS-DT>
QP|<JJS-DT>
QP|<DT-NN>
QP|<DT-NN>
NNS
NNS
NNS
NNS
JJ
JJ
NNS
NNS
NNP
NNP
NNP
NNP
PP
PP
NP-SBJ
NP-SBJ
NNPS
NNPS
VP
VP
VBG
VBG
NN
NN
NN
NN
S
S
VP
VP
RB
RB
VP|<VBP-''>
VP|<VBP-''>
VBP
VBP
VP|<''-S>
VP|<''-S>
VBG
VBG
VB
VB
S
S
VP|<ADVP-CLR-PP-TMP>
VP|<ADVP-CLR-PP-TMP>
NN
NN
NNP
NNP
JJ
JJ
NN
NN
S
S
VP
VP
VP|<NP-TTL-SBAR-PRP>
VP|<NP-TTL-SBAR-PRP>
VBD
VBD
JJ
JJ
NP|<CC-NNP>
NP|<CC-NNP>
NNP
NNP
SBAR
SBAR
WHNP-108
WHNP-108
-NONE-
-NONE-
NP
NP
SBAR
SBAR
WHNP-109
WHNP-109
-NONE-
-NONE-
ADVP-TMP
ADVP-TMP
JJ
JJ
NP
NP
NP
NP
NP|<PP-S-1>
NP|<PP-S-1>
VP
VP
VP|<CC-VB>
VP|<CC-VB>
VP|<VB-NP-TTL>
VP|<VB-NP-TTL>
VB
VB
NNP
NNP
NP|<PP-PP-DIR>
NP|<PP

NNP
NNPS
NNPS
VBD
VBD
VP|<NP-PP-TMP>
VP|<NP-PP-TMP>
NN
NN
VP|<PP-TMP-VP-1>
VP|<PP-TMP-VP-1>
VP-1
VP-1
VBG
VBG
NN
NN
SBAR
SBAR
WHNP-132
WHNP-132
-NONE-
-NONE-
NNP
NNP
JJ
JJ
VBN
VBN
VP|<PP-PP-CLR>
VP|<PP-PP-CLR>
NN
NN
NNS
NNS
VBG
VBG
PP
PP
UCP
UCP
VBZ
VBZ
UCP|<CC-S-PRP>
UCP|<CC-S-PRP>
VP
VP
VP|<NP-4-PP-CLR>
VP|<NP-4-PP-CLR>
NP-4
NP-4
VBG
VBG
VP
VP
VP|<PP-CLR-S-PRP>
VP|<PP-CLR-S-PRP>
NNP
NNP
SBAR
SBAR
WHNP-133
WHNP-133
-NONE-
-NONE-
NNS
NNS
VP|<S-PRP-NP-TMP>
VP|<S-PRP-NP-TMP>
S-1
S-1
VBD
VBD
NP-SBJ-2
NP-SBJ-2
VP|<NP-PP-DIR>
VP|<NP-PP-DIR>
NN
NN
VBD
VBD
JJ
JJ
NN
NN
S
S
NNP
NNP
NNP
NNP
NNP
NNP
SBAR
SBAR
SBAR|<``-S>
SBAR|<``-S>
S-NOM-SBJ
S-NOM-SBJ
VB
VB
S-NOM
S-NOM
NP-SBJ-2
NP-SBJ-2
NN
NN
NP-SBJ-2|<NN-NN>
NP-SBJ-2|<NN-NN>
VBN
VBN
VP
VP
VBG
VBG
VP|<ADVP-PP-DIR>
VP|<ADVP-PP-DIR>
RB
RB
VBD
VBD
NP-PRD
NP-PRD
NN
NN
NP-PRD|<VP-PP>
NP-PRD|<VP-PP>
VBN
VBN
NNS
NNS
SBAR
SBAR
VP|<NP-PP-DIR>
VP|<NP-PP-DIR>
NNS
NNS
JJ
JJ
VP
VP
NNS
NNS
NN
NN
VP
VP
VB
VB
VP|<PRT-PP-LOC-CLR>
VP|<PRT-PP-LOC-CLR>
VP
VP
VBP
VB

S|<NP-SBJ-4-VP>
S|<NP-SBJ-4-VP>
NP-SBJ-4
NP-SBJ-4
VP|<NP-TMP-SBAR>
VP|<NP-TMP-SBAR>
VB
VB
JJ
JJ
NN
NN
VB
VB
ADJP
ADJP
JJ
JJ
NNS
NNS
S-TPC-1
S-TPC-1
NN
NN
NNP
NNP
NNP
NNP
S
S
VBP
VBP
NP
NP
NP|<,-PP-TMP>
NP|<,-PP-TMP>
NN
NN
VP
VP
VB
VB
NP
NP
JJS
JJS
VP|<CC-``>
VP|<CC-``>
JJ
JJ
NP-SBJ
NP-SBJ
NP-SBJ|<ADJP-JJ>
NP-SBJ|<ADJP-JJ>
VBZ
VBZ
CD
CD
VBG
VBG
JJ
JJ
NN
NN
CD
CD
CD
CD
CD
CD
NP
NP
NP|<NN-PP-DIR>
NP|<NN-PP-DIR>
NN
NN
NP|<PP-DIR-PP-DIR>
NP|<PP-DIR-PP-DIR>
NP
NP
JJR
JJR
NP|<JJ-NX>
NP|<JJ-NX>
JJ
JJ
NX
NX
NP|<PP-DIR-NX-1>
NP|<PP-DIR-NX-1>
NP
NP
JJ
JJ
JJ
JJ
NX-1
NX-1
NN
NN
NNS
NNS
VB
VB
PP-MNR
PP-MNR
VBN
VBN
VBG
VBG
NP
NP
NNP
NNP
NNP
NNP
NNP
NNP
NN
NN
VB
VB
SBAR
SBAR
WHNP-146
WHNP-146
-NONE-
-NONE-
VP
VP
NNS
NNS
RB
RB
S
S
VP
VP
RB
RB
RB
RB
VBN
VBN
NNP
NNP
NNP
NNP
NNP
NNP
VP
VP
JJ
JJ
NN
NN
VBG
VBG
NP
NP
JJ
JJ
RB
RB
NN
NN
NNS
NNS
VP
VP
VP|<S-PP-PRP>
VP|<S-PP-PRP>
NNS
NNS
NP-HLN
NP-HLN
NNP
NNP
NP|<NNP-.>
NP|<NNP-.>
NP-HLN|<PRN-:>
NP-HLN|<PRN-:>
PRN
PRN
PRN|<NP-LOC--RRB->
PRN|<NP-LOC--RRB->
NNP
NN

VB
VB
VP|<NP-PP-CLR>
VP|<NP-PP-CLR>
VP|<PP-CLR-S-ADV>
VP|<PP-CLR-S-ADV>
JJ
JJ
NNS
NNS
JJ
JJ
NN
NN
S
S
NP-SBJ-89
NP-SBJ-89
-NONE-
-NONE-
JJ
JJ
S-1
S-1
S-1|<NP-SBJ-PRN>
S-1|<NP-SBJ-PRN>
NNP
NNP
NNP
NNP
VBZ
VBZ
NN
NN
JJ
JJ
VB
VB
NN
NN
NN
NN
NN
NN
NN
NN
VBN
VBN
S
S
NP-SBJ-90
NP-SBJ-90
NP-SBJ-90|<CC-NNS>
NP-SBJ-90|<CC-NNS>
VBN
VBN
-NONE-
-NONE-
NN
NN
NNS
NNS
S
S
NP-SBJ-91
NP-SBJ-91
-NONE-
-NONE-
NNP
NNP
VP|<RB-ADJP-PRD>
VP|<RB-ADJP-PRD>
JJ
JJ
NN
NN
VP
VP
VBG
VBG
VP|<NP-2-PP-TMP>
VP|<NP-2-PP-TMP>
JJ
JJ
SBAR
SBAR
WHNP-157
WHNP-157
-NONE-
-NONE-
NN
NN
VBN
VBN
VP
VP
RB
RB
NN
NN
VP
VP
RB
RB
NN
NN
NNP
NNP
S-TPC-1
S-TPC-1
S-TPC-1|<ADVP-TMP-,>
S-TPC-1|<ADVP-TMP-,>
S-TPC-1|<,-NP-SBJ-2>
S-TPC-1|<,-NP-SBJ-2>
S-TPC-1|<NP-SBJ-2-VP>
S-TPC-1|<NP-SBJ-2-VP>
NP-SBJ-2
NP-SBJ-2
NP-PRD
NP-PRD
NP-PRD|<RB-JJ>
NP-PRD|<RB-JJ>
RB
RB
NP-PRD|<JJ-NN>
NP-PRD|<JJ-NN>
JJ
JJ
VB
VB
NNP
NNP
NNP
NNP
NNS
NNS
NNP
NNP
NNP
NNP
VP
VP
NNP
NNP
NNP
NNP
PRN|<NP-,>
PRN|<NP-,>
PRN|<,-NP-LOC>
PRN|<,-NP-LOC>
NP-LOC
NP-LOC
NNP
NNP
VBD
VBD


NNP
NP
NP
NNPS
NNPS
NNP
NNP
NNS
NNS
SBAR
SBAR
WHNP-168
WHNP-168
-NONE-
-NONE-
VBP
VBP
VBZ
VBZ
RB
RB
SBAR
SBAR
WHNP-169
WHNP-169
-NONE-
-NONE-
VBP
VBP
VP
VP
JJ
JJ
VP
VP
VP|<RB-ADVP-LOC-PRD>
VP|<RB-ADVP-LOC-PRD>
ADVP-LOC-PRD
ADVP-LOC-PRD
NNP
NNP
NNP
NNP
NNP
NNP
FRAG
FRAG
FRAG|<CC-SBAR-ADV>
FRAG|<CC-SBAR-ADV>
CC
CC
FRAG|<SBAR-ADV-.>
FRAG|<SBAR-ADV-.>
S
S
NP-SBJ-99
NP-SBJ-99
VBN
VBN
-NONE-
-NONE-
RB
RB
FRAG|<.-''>
FRAG|<.-''>
S
S
S|<VP-,>
S|<VP-,>
NNP
NNP
NNP
NNP
NP
NP
NP|<$-CD>
NP|<$-CD>
CD
CD
S-PRP
S-PRP
S-PRP|<NP-SBJ-VP>
S-PRP|<NP-SBJ-VP>
NN
NN
NP
NP
NP|<JJ-NNPS>
NP|<JJ-NNPS>
NP|<NNPS-CC>
NP|<NNPS-CC>
NP|<CC-NNPS>
NP|<CC-NNPS>
NP|<CC-NNPS>
NP|<CC-NNPS>
NNPS
NNPS
NN
NN
SBAR
SBAR
WHNP-170
WHNP-170
-NONE-
-NONE-
SBAR
SBAR
WHNP-171
WHNP-171
-NONE-
-NONE-
S
S
NP-SBJ-100
NP-SBJ-100
VBN
VBN
-NONE-
-NONE-
S
S
VB
VB
NP
NP
DT
DT
S-ADV
S-ADV
S-ADV|<ADVP-VP>
S-ADV|<ADVP-VP>
VBG
VBG
S|<:-S>
S|<:-S>
NP-SBJ
NP-SBJ
CD
CD
JJ
JJ
S|<,-CC>
S|<,-CC>
S|<CC-S-1>
S|<CC-S-1>
S|<S-1-.>
S|<S-1-.>
NNS
NNS
VP
VP
VP

NNP
NNP
NNP
SBAR
SBAR
WHNP-193
WHNP-193
-NONE-
-NONE-
NN
NN
RB
RB
VBN
VBN
NNS
NNS
WHNP-1
WHNP-1
WHPP
WHPP
NN
NN
JJ
JJ
VP
VP
VBP
VBP
VP|<ADVP-LOC-PP-CLR>
VP|<ADVP-LOC-PP-CLR>
RB
RB
NP
NP
NNS
NNS
NP|<NN-CC>
NP|<NN-CC>
NP|<CC-VBG>
NP|<CC-VBG>
NNS
NNS
NP-SBJ
NP-SBJ
NP-SBJ|<DT-``>
NP-SBJ|<DT-``>
NP-SBJ|<``-NN>
NP-SBJ|<``-NN>
NP-SBJ|<NN-''>
NP-SBJ|<NN-''>
NN
NN
SINV|<''-VP>
SINV|<''-VP>
SINV|<VP-NP-SBJ-2>
SINV|<VP-NP-SBJ-2>
SINV|<NP-SBJ-2-S>
SINV|<NP-SBJ-2-S>
NNP
NNP
NNP
NNP
SINV|<S-.>
SINV|<S-.>
NP
NP
NP|<JJ-VBG>
NP|<JJ-VBG>
NNS
NNS
SINV
SINV
VBZ
VBZ
SINV|<NP-SBJ-:>
SINV|<NP-SBJ-:>
NNP
NNP
NNP
NNP
SINV|<:-``>
SINV|<:-``>
SINV|<S-1-.>
SINV|<S-1-.>
NNS
NNS
S
S
NP-SBJ-119
NP-SBJ-119
NNP
NNP
-NONE-
-NONE-
NN
NN
PRN
PRN
PRN|<PP-TMP-,>
PRN|<PP-TMP-,>
NN
NN
NP-SBJ
NP-SBJ
JJ
JJ
NNS
NNS
NP-SBJ|<,-RB>
NP-SBJ|<,-RB>
NP-SBJ|<RB-NP>
NP-SBJ|<RB-NP>
VBP
VBP
PP
PP
PP|<NP-,>
PP|<NP-,>
NP
NP
NN
NN
VP|<VBN-NP>
VP|<VBN-NP>
VBN
VBN
VP|<NP-PP-LGS>
VP|<NP-PP-LGS>
NN
NN
NN
NN
VBP
VBP
NN
NN
NNP
NNP
NNP
NNP
NNP
NNP


S-NOM
S-NOM
S-NOM|<VP-,>
S-NOM|<VP-,>
NNS
NNS
SINV
SINV
NNS
NNS
JJ
JJ
-NONE-
-NONE-
JJ
JJ
JJ
JJ
NNS
NNS
NN
NN
JJ
JJ
NN
NN
VBD
VBD
CD
CD
NP
NP
JJ
JJ
NN
NN
NP|<PRN-VP>
NP|<PRN-VP>
VBP
VBP
NN
NN
NN
NN
JJ
JJ
VBN
VBN
NNS
NNS
NP|<''-PP>
NP|<''-PP>
NP|<PP-:>
NP|<PP-:>
JJ
JJ
NN
NN
VBG
VBG
VP
VP
PRN
PRN
PRN|<PP-LOC-,>
PRN|<PP-LOC-,>
QP
QP
QP|<JJ-IN>
QP|<JJ-IN>
S
S
VBP
VBP
VBP
VBP
VBG
VBG
NN
NN
NP-SBJ|<NN-,>
NP-SBJ|<NN-,>
NN
NN
NP-SBJ|<,-CC>
NP-SBJ|<,-CC>
NP-SBJ|<CC-NN>
NP-SBJ|<CC-NN>
NP-SBJ|<NN-,>
NP-SBJ|<NN-,>
VBP
VBP
NNS
NNS
VBP
VBP
NNS
NNS
NP|<,-''>
NP|<,-''>
SBAR
SBAR
WHNP-224
WHNP-224
-NONE-
-NONE-
JJ
JJ
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
QP
QP
QP|<CC-RB>
QP|<CC-RB>
NP-PRD|<``-NN>
NP-PRD|<``-NN>
NNP
NNP
NNP
NNP
NP-SBJ
NP-SBJ
CD
CD
JJ
JJ
VP
VP
NN
NN
VP|<S-ADV-,>
VP|<S-ADV-,>
S-ADV
S-ADV
NN
NN
S-ADV|<ADVP-LOC-PRD-PP>
S-ADV|<ADVP-LOC-PRD-PP>
ADVP-LOC-PRD
ADVP-LOC-PRD
NN
NN
VBG
VBG
NN
NN
SBAR
SBAR
WHNP-225
WHNP-225
-NONE-
-NONE-
VBZ
VBZ
NN
NN
NN
NN
NN
NN
NP-SBJ
NP-SBJ
VBZ
VBZ
NP
NP
VBG
VBG
VP
VP


S-TPC-2
S-TPC-2
VB
VB
JJ
JJ
NN
NN
NNP
NNP
JJ
JJ
VBD
VBD
VP|<ADVP-CLR-PP-TMP>
VP|<ADVP-CLR-PP-TMP>
NP-1
NP-1
VB
VB
VB
VB
NN
NN
NNS
NNS
NNS
NNS
NNS
NNS
NNP
NNP
NN
NN
SBAR
SBAR
WHNP-245
WHNP-245
-NONE-
-NONE-
VB
VB
PRT
PRT
VP|<NP-S>
VP|<NP-S>
VP|<S-PP-LOC>
VP|<S-PP-LOC>
NP-PRD
NP-PRD
NP-PRD|<PRN-PP>
NP-PRD|<PRN-PP>
NN
NN
JJ
JJ
VBN
VBN
NN
NN
NNP
NNP
NNP
NNP
VBD
VBD
VB
VB
NNS
NNS
NNS
NNS
S
S
RB
RB
VBD
VBD
NN
NN
JJ
JJ
VP
VP
VP|<ADJP-PRD-:>
VP|<ADJP-PRD-:>
S
S
NNS
NNS
NN
NN
NN
NN
VBG
VBG
VBN
VBN
JJ
JJ
NNS
NNS
NP-SBJ
NP-SBJ
NP-SBJ|<NNS-RB>
NP-SBJ|<NNS-RB>
NNS
NNS
NN
NN
NNS
NNS
NNS
NNS
VP
VP
VP|<ADVP-LOC-CLR-PP-LOC>
VP|<ADVP-LOC-CLR-PP-LOC>
ADVP-LOC-CLR
ADVP-LOC-CLR
RB
RB
JJ
JJ
NP-SBJ-1
NP-SBJ-1
NP-SBJ-1|<CC-NNS>
NP-SBJ-1|<CC-NNS>
NNS
NNS
SBAR
SBAR
WHNP-246
WHNP-246
-NONE-
-NONE-
NNS
NNS
NP-TMP
NP-TMP
VBN
VBN
NNS
NNS
NNS
NNS
VBD
VBD
VBN
VBN
NP-SBJ
NP-SBJ
NP-SBJ|<``-S-TTL>
NP-SBJ|<``-S-TTL>
NP-SBJ|<S-TTL-''>
NP-SBJ|<S-TTL-''>
S-TTL
S-TTL
NP-SBJ
NP-SBJ
NNS
NNS
VBP
VBP
NN
NN
NNS
NNS
VBZ
VBZ
JJ
JJ


VBN
NN
NN
S
S
S|<NP-SBJ-NP-TMP>
S|<NP-SBJ-NP-TMP>
SBAR
SBAR
WHNP-255
WHNP-255
-NONE-
-NONE-
VBN
VBN
NNP
NNP
NNP
NNP
SBAR
SBAR
WHNP-256
WHNP-256
-NONE-
-NONE-
NNP
NNP
NNP
NNP
SBAR
SBAR
WHNP-257
WHNP-257
-NONE-
-NONE-
VBP
VBP
JJ
JJ
NNS
NNS
VP
VP
JJ
JJ
NP
NP
NP|<NNS-RB>
NP|<NNS-RB>
S
S
S|<ADVP-LOC-:>
S|<ADVP-LOC-:>
ADJP
ADJP
ADJP|<RB-SBAR>
ADJP|<RB-SBAR>
-NONE-
-NONE-
ADJP-PRD|<CC-ADJP>
ADJP-PRD|<CC-ADJP>
ADJP-PRD|<ADJP-SBAR-2>
ADJP-PRD|<ADJP-SBAR-2>
ADJP
ADJP
NN
NN
NN
NN
VP
VP
NN
NN
SBAR
SBAR
WHNP
WHNP
VP
VP
VBZ
VBZ
NN
NN
NN
NN
NNS
NNS
VP
VP
VP+VP
VP+VP
NN
NN
VP+VP
VP+VP
VP+VP|<RB-VP>
VP+VP|<RB-VP>
NP
NP
NP|<DT-NNP>
NP|<DT-NNP>
NNS
NNS
VP
VP
S-PRP-CLR
S-PRP-CLR
VB
VB
VP|<NP-SBAR-TMP>
VP|<NP-SBAR-TMP>
VP|<SBAR-TMP-PRN>
VP|<SBAR-TMP-PRN>
VBZ
VBZ
PRN
PRN
PRN|<CC-PP-CLR>
PRN|<CC-PP-CLR>
VBN
VBN
NN
NN
VP
VP
VP|<ADJP-PRD-,>
VP|<ADJP-PRD-,>
NN
NN
SINV
SINV
S-TPC-2
S-TPC-2
S-TPC-2|<,-``>
S-TPC-2|<,-``>
S-TPC-2|<``-NP-SBJ-161>
S-TPC-2|<``-NP-SBJ-161>
S-TPC-2|<NP-SBJ-161-VP>
S-TPC-2|<NP-SBJ-161-VP

S-TPC-1|<SBAR-TMP-,>
IN
IN
NP
NP
NN
NN
VP
VP
VBG
VBG
S
S
SBAR-ADV
SBAR-ADV
SBAR-ADV|<CC-WHADVP-1>
SBAR-ADV|<CC-WHADVP-1>
SBAR-ADV|<WHADVP-1-S>
SBAR-ADV|<WHADVP-1-S>
S
S
NP-SBJ-15
NP-SBJ-15
S|<,-NP-SBJ-16>
S|<,-NP-SBJ-16>
S|<NP-SBJ-16-VP>
S|<NP-SBJ-16-VP>
NP-SBJ-16
NP-SBJ-16
VBN
VBN
VBN
VBN
JJR
JJR
VP
VP
VBN
VBN
S
S
JJ
JJ
NN
NN
PP-CLR
PP-CLR
VBG
VBG
VBG
VBG
NNP
NNP
VB
VB
NX
NX
VP|<,-PP-LOC>
VP|<,-PP-LOC>
NNP
NNP
NP
NP
NP|<JJS-VBN>
NP|<JJS-VBN>
NNP
NNP
NN
NN
NNP
NNP
NNP
NNP
VBG
VBG
JJ
JJ
NN
NN
NN
NN
NP-SBJ
NP-SBJ
NP-SBJ|<PP-PP-LOC>
NP-SBJ|<PP-PP-LOC>
NN
NN
NNP
NNP
RB
RB
NNP
NNP
NP-PRD
NP-PRD
NP-PRD|<NNP-NNPS>
NP-PRD|<NNP-NNPS>
NNP
NNP
NP-PRD|<NNPS-NNP>
NP-PRD|<NNPS-NNP>
NNPS
NNPS
NP-PRD|<NNP-NNP>
NP-PRD|<NNP-NNP>
NP-SBJ
NP-SBJ
NP-SBJ|<``-NP-TTL>
NP-SBJ|<``-NP-TTL>
NP-SBJ|<NP-TTL-''>
NP-SBJ|<NP-TTL-''>
NNP
NNP
S
S
NP-SBJ-17
NP-SBJ-17
VBD
VBD
NN
NN
S-PRD
S-PRD
NP-SBJ-18
NP-SBJ-18
NNS
NNS
RB
RB
JJ
JJ
JJ
JJ
NP
NP
NP|<RB-JJ>
NP|<RB-JJ>
NNS
NNS
NP
NP
NNS
NNS
UCP-PRD
UCP-PRD
VP
VP
VBN
VBN
UCP-P

VBN
VBN
NNS
NNS
NNP
NNP
NP
NP
NP|<VBD-NNS>
NP|<VBD-NNS>
NNS
NNS
NNP
NNP
NNPS
NNPS
CD
CD
NP|<:-NP+NP>
NP|<:-NP+NP>
NP|<NP+NP-:>
NP|<NP+NP-:>
NP+NP
NP+NP
CD
CD
NNP
NNP
NNS
NNS
VBG
VBG
VP
VP
VP|<ADVP-PP-MNR>
VP|<ADVP-PP-MNR>
NN
NN
NNP
NNP
NNPS
NNPS
NP+NP
NP+NP
NNS
NNS
NP+NP|<PP-:>
NP+NP|<PP-:>
NAC-TMP
NAC-TMP
NAC-TMP|<,-NNP>
NAC-TMP|<,-NNP>
NAC-TMP|<NNP-CD>
NAC-TMP|<NNP-CD>
NP+NP|<:-NP>
NP+NP|<:-NP>
CD
CD
CD
CD
NP
NP
NNP
NNP
NNP
NNP
NP|<NNP-NNP>
NP|<NNP-NNP>
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NP|<:-VP>
NP|<:-VP>
VBN
VBN
CD
CD
NNS
NNS
JJ
JJ
JJ
JJ
NNP
NNP
NNP
NNP
NNP
NNP
NP|<PP-PRN>
NP|<PP-PRN>
NP|<PRN-NP>
NP|<PRN-NP>
CD
CD
CD
CD
CD
CD
NNP
NNP
NNP
NNP
NNP
NNP
NNPS
NNPS
NNP
NNP
CD
CD
NP
NP
VBN
VBN
NP|<:-RB>
NP|<:-RB>
NP|<RB-NP>
NP|<RB-NP>
NN
NN
SBAR-ADV
SBAR-ADV
VBD
VBD
VBN
VBN
NNP
NNP
NNP
NNP
WHNP-2
WHNP-2
WHNP
WHNP
WHNP|<NN-NN>
WHNP|<NN-NN>
WHNP-2|<,-NP>
WHNP-2|<,-NP>
WHNP-2|<NP-,>
WHNP-2|<NP-,>
NNP
NNP
NNPS
NNPS
JJ
JJ
NNS
NNS
VP|<RB-ADJP-PRD>
VP|<RB-ADJP-PRD>
JJ
JJ
NN
NN
VBD
VBD
PP-CLR
PP-CL

JJ
JJ
NNP
NNP
WHNP-47
WHNP-47
VBD
VBD
NN
NN
NNP
NNP
NNP
NNP
JJ
JJ
NN
NN
NN
NN
JJ
JJ
NNP
NNP
NNP
NNP
S
S
RB
RB
NN
NN
VBZ
VBZ
S
S
CD
CD
S|<NP-SBJ-64-VP>
S|<NP-SBJ-64-VP>
NP-SBJ-64
NP-SBJ-64
PP
PP
PP|<NP-LGS+NP-SBAR>
PP|<NP-LGS+NP-SBAR>
NP-LGS+NP
NP-LGS+NP
NP-LGS+NP|<JJ-,>
NP-LGS+NP|<JJ-,>
JJ
JJ
NP-LGS+NP|<,-JJ>
NP-LGS+NP|<,-JJ>
NP-LGS+NP|<JJ-NN>
NP-LGS+NP|<JJ-NN>
JJ
JJ
CD
CD
S
S
RB
RB
VBD
VBD
NNS
NNS
VBD
VBD
NNP
NNP
WHNP-48
WHNP-48
VBD
VBD
NNP
NNP
VBD
VBD
VBG
VBG
NNS
NNS
JJ
JJ
S|<,-NP-SBJ>
S|<,-NP-SBJ>
S|<NP-SBJ-ADVP-MNR>
S|<NP-SBJ-ADVP-MNR>
S|<ADVP-MNR-VP>
S|<ADVP-MNR-VP>
RB
RB
VBD
VBD
JJ
JJ
NNS
NNS
NNP
NNP
NNS
NNS
NP-SBJ|<NNS-NN>
NP-SBJ|<NNS-NN>
NN
NN
NP-SBJ|<CC-NNS>
NP-SBJ|<CC-NNS>
NNS
NNS
NN
NN
NNP
NNP
NNP
NNP
VP|<PRT-PP-LOC>
VP|<PRT-PP-LOC>
NN
NN
VP
VP
VBN
VBN
VP
VP
CD
CD
PP-PUT
PP-PUT
JJ
JJ
PP-TMP
PP-TMP
NP-EXT
NP-EXT
NNS
NNS
NNS
NNS
VP
VP
VP|<PP-LOC-PRD-S-ADV>
VP|<PP-LOC-PRD-S-ADV>
VBG
VBG
JJ
JJ
NP-SBJ
NP-SBJ
VP
VP
VBG
VBG
S
S
S|<FRAG-TTL-SBJ-1-''>
S|<FRAG-TTL-SBJ-1-''>
FRAG-TTL-S

VBP
NNS
NNS
NP-SBJ|<CC-NNP>
NP-SBJ|<CC-NNP>
NNP
NNP
VB
VB
VB
VB
NNP
NNP
S
S
S|<,-NP-SBJ-84>
S|<,-NP-SBJ-84>
S|<NP-SBJ-84-VP>
S|<NP-SBJ-84-VP>
NP-SBJ-84
NP-SBJ-84
NNP
NNP
NNP
NNP
VP
VP
VBN
VBN
VP|<NP-NP-ADV>
VP|<NP-NP-ADV>
S
S
NP-SBJ-85
NP-SBJ-85
NP-SBJ-85|<PP-,>
NP-SBJ-85|<PP-,>
NP-SBJ-85|<,-SBAR>
NP-SBJ-85|<,-SBAR>
NP-SBJ-85|<SBAR-,>
NP-SBJ-85|<SBAR-,>
WHNP-1
WHNP-1
WHNP
WHNP
CD
CD
NP|<PP-PP>
NP|<PP-PP>
VBN
VBN
VBG
VBG
NNP
NNP
QP
QP
VB
VB
VBG
VBG
NNS
NNS
VBP
VBP
PP
PP
NP
NP
VB
VB
VP|<NP-TMP-,>
VP|<NP-TMP-,>
VP|<,-CC>
VP|<,-CC>
VP|<CC-NP-1>
VP|<CC-NP-1>
NP
NP
S|<S-TPC-2-,>
S|<S-TPC-2-,>
S-TPC-2
S-TPC-2
S-TPC-2|<,-S>
S-TPC-2|<,-S>
S-TPC-2|<S-,>
S-TPC-2|<S-,>
NN
NN
VP
VP
NP
NP
NP|<''-ADVP>
NP|<''-ADVP>
NNP
NNP
NNP
NNP
NP|<,-NP-LOC>
NP|<,-NP-LOC>
VB
VB
S
S
NNS
NNS
NP
NP
PDT
PDT
VP|<NP-NP-EXT>
VP|<NP-NP-EXT>
VP
VP
VP|<PRT-ADVP-MNR>
VP|<PRT-ADVP-MNR>
RB
RB
VP
VP
S
S
S|<PP-VP>
S|<PP-VP>
VBZ
VBZ
JJ
JJ
SBAR
SBAR
WHPP-3
WHPP-3
NN
NN
VB
VB
IN
IN
NNS
NNS
VP
VP
VBN
VBN
NN
NN
NN
NN
NNS
NNS
JJ
JJ
JJ

CD
NP|<VP-,>
NP|<VP-,>
CD
CD
PP
PP
NP|<,-PP>
NP|<,-PP>
NNP
NNP
NNP
NNP
NP|<CC-NNP>
NP|<CC-NNP>
JJ
JJ
NNS
NNS
NNP
NNP
NP
NP
NP
NP
VB
VB
NP|<NP-TMP-,>
NP|<NP-TMP-,>
NP|<,-PP>
NP|<,-PP>
NP|<PP-S>
NP|<PP-S>
NP|<VBN-CD>
NP|<VBN-CD>
NP|<S-.>
NP|<S-.>
CD
CD
NP-SBJ
NP-SBJ
NP-SBJ|<CD-JJ>
NP-SBJ|<CD-JJ>
NN
NN
VP
VP
VP|<ADJP-PRD-PP-TMP>
VP|<ADJP-PRD-PP-TMP>
VP|<PP-TMP-PP-CLR>
VP|<PP-TMP-PP-CLR>
VP
VP
VP|<S-ADVP-TMP-CLR>
VP|<S-ADVP-TMP-CLR>
ADVP-TMP-CLR
ADVP-TMP-CLR
NNP
NNP
NP
NP
NP
NP
NP|<PP-TMP-,>
NP|<PP-TMP-,>
CD
CD
CD
CD
VP|<,-PP>
VP|<,-PP>
NNP
NNP
NNP
NNP
NP
NP
CD
CD
NNP
NNP
NNP
NNP
NNP
NNP
NN
NN
NP-SBJ-1
NP-SBJ-1
NP-SBJ-1|<NNP-NNPS>
NP-SBJ-1|<NNP-NNPS>
NP-SBJ-1|<NNPS-NN>
NP-SBJ-1|<NNPS-NN>
NNPS
NNPS
NNP
NNP
JJ
JJ
NN
NN
NN
NN
NNP
NNP
FRAG
FRAG
NN
NN
FRAG|<,-SBAR-PRP>
FRAG|<,-SBAR-PRP>
FRAG|<SBAR-PRP-.>
FRAG|<SBAR-PRP-.>
NP-1
NP-1
NP-1|<NNP-NNS>
NP-1|<NNP-NNS>
VB
VB
VP
VP
VBZ
VBZ
NNP
NNP
VB
VB
NP-PRD
NP-PRD
JJ
JJ
NN
NN
NP
NP
NP|<CD-JJS>
NP|<CD-JJS>
VP|<PP-CLR-,>
VP|<PP-CLR-,>
NNS
NNS
VP|<,-VP-

VP-TPC-1|<NP-PP-CLR>
VP-TPC-1|<NP-PP-CLR>
SINV|<VP-NP-SBJ-2>
SINV|<VP-NP-SBJ-2>
SINV|<NP-SBJ-2-.>
SINV|<NP-SBJ-2-.>
NP-SBJ-2
NP-SBJ-2
NNP
NNP
NNP
NNP
NP-SBJ-2|<CC-NP>
NP-SBJ-2|<CC-NP>
NNP
NNP
NNS
NNS
PP-CLR
PP-CLR
JJ
JJ
NN
NN
NNS
NNS
VP
VP
VP|<NP-PP-TMP-CLR>
VP|<NP-PP-TMP-CLR>
NN
NN
VBD
VBD
JJ
JJ
NP-SBJ-1
NP-SBJ-1
NNP
NNP
NNP
NNP
NNP
NNP
NN
NN
JJ
JJ
NP-SBJ-1
NP-SBJ-1
NNP
NNP
NNP
NNP
CD
CD
RB
RB
NP|<VBN-CD>
NP|<VBN-CD>
VBN
VBN
CD
CD
CD
CD
CD
CD
CD
CD
VP
VP
VP|<SBAR-PP-1>
VP|<SBAR-PP-1>
JJ
JJ
VP|<ADJP-PRD-''>
VP|<ADJP-PRD-''>
CD
CD
CD
CD
VB
VB
NP-SBJ
NP-SBJ
JJR
JJR
CD
CD
CD
CD
CD
CD
CD
CD
CD
CD
CD
CD
S-TPC-2
S-TPC-2
VBN
VBN
VBP
VBP
JJ
JJ
NN
NN
NN
NN
RB
RB
S
S
NP-SBJ-2|<NN-NN>
NP-SBJ-2|<NN-NN>
NNS
NNS
VBN
VBN
VP|<PP-TMP-PP>
VP|<PP-TMP-PP>
NNP
NNP
NNP
NNP
VBG
VBG
S-NOM
S-NOM
S-NOM|<CC-S>
S-NOM|<CC-S>
NP-SBJ
NP-SBJ
CD
CD
NNS
NNS
CD
CD
NN
NN
VP
VP
RB
RB
PP-DIR
PP-DIR
CD
CD
PP-DIR|<PP-PP-1>
PP-DIR|<PP-PP-1>
CD
CD
CD
CD
CD
CD
NN
NN
JJ
JJ
NNP
NNP
NNP
NNP
S-TPC-1
S-TPC-1
JJ
JJ
NP|<,-UCP>
NP|<,-U

NN
NN
VP
VP
VP|<PP-,>
VP|<PP-,>
NP
NP
JJ
JJ
NP-SBJ-1
NP-SBJ-1
VP|<S-,>
VP|<S-,>
VB
VB
NNS
NNS
CD
CD
NNS
NNS
NNP
NNP
VP
VP
CD
CD
VBD
VBD
JJ
JJ
JJ
JJ
VP
VP
NNP
NNP
NNS
NNS
NNP
NNP
NNP
NNP
NP-1
NP-1
NN
NN
JJ
JJ
NNP
NNP
NN
NN
NN
NN
VP|<ADJP-PRD-PP-TMP>
VP|<ADJP-PRD-PP-TMP>
JJ
JJ
JJ
JJ
NNS
NNS
VB
VB
NNP
NNP
VP
VP
NP-CLR
NP-CLR
NNP
NNP
NNP
NNP
JJ
JJ
VB
VB
VP
VP
VP|<SBAR-NOM-PP-CLR>
VP|<SBAR-NOM-PP-CLR>
NP-PRD
NP-PRD
JJ
JJ
NNS
NNS
VBD
VBD
CD
CD
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
CD
CD
JJ
JJ
NP|<CD--NONE->
NP|<CD--NONE->
NP|<-NONE--NN>
NP|<-NONE--NN>
NN
NN
NP
NP
NP
NP
NP|<NP-ADV-PP>
NP|<NP-ADV-PP>
NNP
NNP
NNS
NNS
NN
NN
NP-SBJ-1
NP-SBJ-1
NP-SBJ-1|<VBN-NN>
NP-SBJ-1|<VBN-NN>
VB
VB
NP|<VBG-NN>
NP|<VBG-NN>
NNP
NNP
NNP
NNP
JJ
JJ
VBD
VBD
JJ
JJ
NP-SBJ
NP-SBJ
NP-SBJ|<,-UCP>
NP-SBJ|<,-UCP>
NP-SBJ|<UCP-,>
NP-SBJ|<UCP-,>
JJ
JJ
NP|<NNS-NN>
NP|<NNS-NN>
NNS
NNS
UCP|<NN-CC>
UCP|<NN-CC>
JJ
JJ
VBN
VBN
NP-PRD
NP-PRD
NP-PRD|<,-''>
NP-PRD|<,-''>
NP-PRD|<''-CC>
NP-PRD|<''-CC>
VBD
VBD
NN
NN
JJ
JJ
VP|<ADVP-TMP-PP-PRD>
V

VP
ADVP-EXT
ADVP-EXT
VP
VP
SBAR-PRP-PRD
SBAR-PRP-PRD
SBAR-PRP-PRD|<IN-S>
SBAR-PRP-PRD|<IN-S>
VBP
VBP
NN
NN
NNS
NNS
VBN
VBN
NNP
NNP
JJ
JJ
NP-TMP-CLR
NP-TMP-CLR
CD
CD
NP
NP
CD
CD
NP|<,-PRN>
NP|<,-PRN>
CD
CD
CD
CD
CD
CD
CD
CD
CD
CD
CD
CD
JJ
JJ
CD
CD
CD
CD
JJ
JJ
NP
NP
VP
VP
VBD
VBD
VP|<ADVP-DIR-NP>
VP|<ADVP-DIR-NP>
JJ
JJ
NNS
NNS
CD
CD
CD
CD
NN
NN
S
S
ADVP-PRP
ADVP-PRP
RB
RB
VB
VB
CD
CD
VBN
VBN
CD
CD
CD
CD
JJ
JJ
NN
NN
VBZ
VBZ
NNS
NNS
VBD
VBD
VP
VP
VP|<PP-CLR-ADVP-PRP>
VP|<PP-CLR-ADVP-PRP>
ADVP-PRP
ADVP-PRP
JJ
JJ
VP|<ADJP-PRD-ADVP>
VP|<ADJP-PRD-ADVP>
NP-ADV
NP-ADV
NP|<JJS-NN>
NP|<JJS-NN>
NP-ADV|<ADVP-SBAR-1>
NP-ADV|<ADVP-SBAR-1>
VP|<NP-EXT-,>
VP|<NP-EXT-,>
VP|<,-PP-DIR>
VP|<,-PP-DIR>
VP|<PP-DIR-,>
VP|<PP-DIR-,>
NNS
NNS
NP
NP
NNS
NNS
VP|<NP-PP-CLR>
VP|<NP-PP-CLR>
VP|<PP-CLR-PP-1>
VP|<PP-CLR-PP-1>
JJ
JJ
VBZ
VBZ
NN
NN
S
S
S|<ADJP-PRD-SBAR-1>
S|<ADJP-PRD-SBAR-1>
VBG
VBG
VBZ
VBZ
NP-SBJ
NP-SBJ
NN
NN
VB
VB
NP
NP
VP|<PP-DIR-NP-TMP>
VP|<PP-DIR-NP-TMP>
NP
NP
NNS
NNS
CD
CD
CD
CD
CD
CD
NNS
NNS
CD
CD
CD


In [6]:
Unary={}
Binary={}
for i in range(len(grammar.productions())):
    if len(grammar.productions()[i].rhs())==1:
        A=str(grammar.productions()[i]).split(' -> ')[0]
        BC=str(grammar.productions()[i]).split(' -> ')[1].split()[:-1]
        if A in Unary.keys():
            Unary[A].append(BC[0])
        else:
            Unary[A]=[]
            Unary[A].append(BC[0])
    else:
        A=str(grammar.productions()[i]).split(' -> ')[0]
        BC=str(grammar.productions()[i]).split(' -> ')[1].split()[:-1]
        if A in Binary.keys():
            Binary[A].append(BC)
        else:
            Binary[A]=[]
            Binary[A].append(BC)

In [7]:
Binary

{'S': [['NP-SBJ', 'S|<VP-.>'],
  ['NP-SBJ-1', 'S|<VP-.>'],
  ['NP-SBJ', 'NP-PRD'],
  ['S-TPC-1', 'S|<,-NP-SBJ>'],
  ['S-TPC-2', 'S|<,-NP-SBJ>'],
  ['NP-SBJ', 'VP'],
  ['SBAR-ADV', 'S|<,-NP-SBJ>'],
  ['NP-SBJ-2', 'VP'],
  ['NP-SBJ-1', 'VP'],
  ['PP-LOC', 'S|<,-NP-SBJ>'],
  ['NP-SBJ-4', 'VP'],
  ['NP-SBJ-5', 'VP'],
  ['PP-TMP', 'S|<,-NP-SBJ>'],
  ['PP-TMP', 'S|<,-NP-SBJ-6>'],
  ['NP-SBJ-7', 'S|<VP-.>'],
  ['NP-SBJ-8', 'VP'],
  ['``', 'S|<CC-NP-SBJ-1>'],
  ['NP-SBJ-9', 'S|<VP-.>'],
  ['NP-SBJ', 'S|<,-PP>'],
  ['PP', 'S|<,-NP-SBJ-1>'],
  ['ADVP', 'S|<,-NP-SBJ>'],
  ['S', 'S|<:-S>'],
  ['NP-SBJ-10', 'S|<VP-.>'],
  ['NP-SBJ-11', 'S|<VP-.>'],
  ['S', 'S|<,-CC>'],
  ['NP-SBJ-12', 'S|<VP-.>'],
  ['PP-LOC', 'S|<NP-SBJ-VP>'],
  ['ADVP-TMP', 'S|<,-NP-SBJ>'],
  ['SBAR-TMP', 'S|<,-NP-SBJ>'],
  ['CC', 'S|<NP-SBJ-1-VP>'],
  ['PP', 'S|<,-NP-SBJ>'],
  ['NP-TMP', 'S|<,-PP>'],
  ['S', 'S|<,-IN>'],
  ['ADVP-TMP', 'S|<,-PP>'],
  ['S-ADV', 'S|<,-NP-SBJ-1>'],
  ['NP-SBJ', 'S|<ADVP-VP>'],
  ['ADVP-LOC', 'S|<,-

In [8]:
total_dict['VP|<VP-PP>']
total1=list(total)

In [27]:
import numpy as np
from tqdm import tqdm_notebook as tqdm
sent="'I' 'saw' 'John' 'with' 'my' 'eyes'"
sent=sent.split(" ")
print(sent)
nonterms=['CC','CD','DT','EX','FW','IN','JJ','JJR','JJS','LS','MD','NN','NNS','NNP','NNPS','PDT','POS','PRP','PRP$','RB','RBR','RBS','RP','SYM','TO','UH','VBD','VB','VBG','VBN','VBP','VBZ','WDT','WP','WP$','WRB']
nonterm=len(lhs_1)
score = [ [ [0.0 for y in range(nonterm ) ] for x in range( len(sent) +1 ) ] for z in range(len(sent) +1)]
back = [ [ [None for y in range( nonterm) ] for x in range( len(sent) +1 ) ] for z in range(len(sent) +1)]
a=0
for i in tqdm(range(len(sent))):
    l=sent[i]
    pks=set()
    for A in tqdm(Unary.keys()):
        for pk in prob.keys():    
            if pk.split(' -> ')[1] in sent:
                pks.add(pk.split(' -> ')[0])
        if str(A+" -> "+l) in prob.keys():
            print(A)
            score[i][i+1][lhs_dict[A]] = float(prob[str(A+' -> '+l)])
            print(score[i][i+1][lhs_dict[A]],i)
            a+=1
    print(pks)
    added=1
    print("started")
    while added:
        added=0
        for k in tqdm(Unary.keys()):
            for item in Unary[k]:
                if item in lhs_dict.keys():
                    if score[i][i+1][lhs_dict[item]]>0:
                        p1=prob[str(k)+' -> '+item]*score[i][i+1][lhs_dict[item]]
                        if p1>score[i][i+1][lhs_dict[k]]:
                            score[i][i+1][lhs_dict[k]]=p1
                            back[i][i+1][lhs_dict[k]]=item
                            print(back[i][i+1][lhs_dict[k]])
                            added=1

["'I'", "'saw'", "'John'", "'with'", "'my'", "'eyes'"]


HBox(children=(IntProgress(value=0, max=6), HTML(value='')))

HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

NNP
0.00021254 0
PRP
0.0658508 0
{'NNP', 'PRP$', 'NNS', 'PRP', 'VBD', 'IN'}
started


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

PRP
PRP
PRP
NNP
PRP
NNP
PRP
NNP
NNP
PRP
NNP
PRP
NNP
PRP
NNP
PRP
NNP
PRP
PRP
PRP
PRP
PRP
PRP
PRP
PRP
NNP
NNP
PRP
NNP
PRP
NNP
NNP
PRP
PRP
PRP
PRP
NNP
PRP
PRP
PRP
PRP
PRP
PRP
NNP
NNP
NNP


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

VBD
0.00164312 1
{'NNP', 'PRP$', 'NNS', 'PRP', 'VBD', 'IN'}
started


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

VBD
VBD


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

NNP
0.00361318 2
{'NNP', 'PRP$', 'NNS', 'PRP', 'VBD', 'IN'}
started


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP
NNP


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

IN
0.0392614 3
{'NNP', 'PRP$', 'NNS', 'PRP', 'VBD', 'IN'}
started


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN
IN


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

PRP$
0.0234987 4
{'NNP', 'PRP$', 'NNS', 'PRP', 'VBD', 'IN'}
started


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

PRP$
PRP$


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

NNS
0.000330743 5
{'NNP', 'PRP$', 'NNS', 'PRP', 'VBD', 'IN'}
started


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS
NNS


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

In [31]:
for i in range(7):
    print([j for j, e in enumerate(score[i][6]) if e != 0])

[]
[]
[]
[]
[]
[203, 595, 915, 1005, 1300, 1348, 1818, 1918, 2214, 2622, 2762, 2803, 2814, 2828, 3047, 3131, 3186, 3760]
[]


In [28]:
Binary['S']
lhs_dict['S']

3835

In [29]:
print(len(sent))
for span in tqdm(range(2,len(sent)+1)):
    print("span:"+str(span))
    for begin in tqdm(range(len(sent)-span+1)):
        end=begin+span
        print("span: "+str(span)+" begin: "+str(begin)+" end: "+str(end))
        for split in tqdm(range(begin+1,end)):
            print("span: "+str(span)+" begin: "+str(begin)+" end: "+str(end)+" split: "+str(split))
            print("Check in grammar")
            for k in Binary.keys():
                for item in Binary[k]:
                    if begin==0 and end==6:
                        
                        if item[0] in lhs_dict.keys() and item[1] in lhs_dict.keys():
                            
                            p1=score[begin][split][lhs_dict[item[0]]]*score[split][end][lhs_dict[item[1]]]*prob[str(k)+' -> '+item[0]+" "+item[1]]
                            if score[begin][split][lhs_dict[item[0]]]!=0 and score[split][end][lhs_dict[item[1]]]: 
                                print(score[begin][split][lhs_dict[item[0]]])
                                print(score[split][end][lhs_dict[item[1]]])
                            if p1>score[begin][end][lhs_dict[A]]:
                                print(p1)
                                score[begin][end][lhs_dict[A]]=p1
                                back[begin][end][lhs_dict[A]]=(split,item[0],item[1])
                                print(back[begin][end][lhs_dict[A]])
                                print("S: "+k)
                                print(begin,end)
        added=1
        while added:
            added=0
            for k in tqdm(Unary.keys()):
                for item in Unary[k]:
                    if item in lhs_dict.keys():
                        if score[begin][end][lhs_dict[item]]>0:
                            p1=prob[str(k)+' -> '+item]*score[begin][end][lhs_dict[item]]
                            print(p1)
                            if p1>score[begin][end][lhs_dict[k]]:
                                score[begin][end][lhs_dict[k]]=p1
                                back[begin][end][lhs_dict[k]]=item
                                added=1

6


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

span:2


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

span: 2 begin: 0 end: 2


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

span: 2 begin: 0 end: 2 split: 1
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 2 begin: 1 end: 3


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

span: 2 begin: 1 end: 3 split: 2
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 2 begin: 2 end: 4


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

span: 2 begin: 2 end: 4 split: 3
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 2 begin: 3 end: 5


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

span: 2 begin: 3 end: 5 split: 4
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 2 begin: 4 end: 6


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

span: 2 begin: 4 end: 6 split: 5
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span:3


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

span: 3 begin: 0 end: 3


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

span: 3 begin: 0 end: 3 split: 1
Check in grammar
span: 3 begin: 0 end: 3 split: 2
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 3 begin: 1 end: 4


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

span: 3 begin: 1 end: 4 split: 2
Check in grammar
span: 3 begin: 1 end: 4 split: 3
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 3 begin: 2 end: 5


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

span: 3 begin: 2 end: 5 split: 3
Check in grammar
span: 3 begin: 2 end: 5 split: 4
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 3 begin: 3 end: 6


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

span: 3 begin: 3 end: 6 split: 4
Check in grammar
span: 3 begin: 3 end: 6 split: 5
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span:4


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

span: 4 begin: 0 end: 4


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

span: 4 begin: 0 end: 4 split: 1
Check in grammar
span: 4 begin: 0 end: 4 split: 2
Check in grammar
span: 4 begin: 0 end: 4 split: 3
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 4 begin: 1 end: 5


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

span: 4 begin: 1 end: 5 split: 2
Check in grammar
span: 4 begin: 1 end: 5 split: 3
Check in grammar
span: 4 begin: 1 end: 5 split: 4
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 4 begin: 2 end: 6


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

span: 4 begin: 2 end: 6 split: 3
Check in grammar
span: 4 begin: 2 end: 6 split: 4
Check in grammar
span: 4 begin: 2 end: 6 split: 5
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span:5


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

span: 5 begin: 0 end: 5


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

span: 5 begin: 0 end: 5 split: 1
Check in grammar
span: 5 begin: 0 end: 5 split: 2
Check in grammar
span: 5 begin: 0 end: 5 split: 3
Check in grammar
span: 5 begin: 0 end: 5 split: 4
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span: 5 begin: 1 end: 6


HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

span: 5 begin: 1 end: 6 split: 2
Check in grammar
span: 5 begin: 1 end: 6 split: 3
Check in grammar
span: 5 begin: 1 end: 6 split: 4
Check in grammar
span: 5 begin: 1 end: 6 split: 5
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

span:6


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

span: 6 begin: 0 end: 6


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

span: 6 begin: 0 end: 6 split: 1
Check in grammar
span: 6 begin: 0 end: 6 split: 2
Check in grammar
span: 6 begin: 0 end: 6 split: 3
Check in grammar
span: 6 begin: 0 end: 6 split: 4
Check in grammar
span: 6 begin: 0 end: 6 split: 5
Check in grammar


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

In [13]:
class GrammarTree(object):
    '''
    Tree data structure used to represent the grammar tree output generated by the cky algorithm
    '''
    def __init__(self, data):
        self.data = data
        self.left = None
        self.right = None

    def insertLeft(self, new_node):
            self.left = new_node

    def insertRight(self, new_node):
            self.right = new_node

In [14]:
from collections import deque
def print_level_order(head, queue = deque()):
    if isinstance(head,str):
        print(head)
        return
    print(head.data)
    [queue.append(node) for node in [head.left, head.right] if node]
    if queue:
        print_level_order(queue.popleft(), queue)

In [15]:
def build_tree(start,end,idx,back,non_terms):
    '''
    build_tree() builds tree from the backpointer matrix obtained in the cky() function
    :param start: start index for tree
    :param end: end index for tree
    :param idx: index used to find non_terminal
    :param back: the backpointer matrix
    :param non_terms: a list of non-terminals
    :return:
    '''
    tree = GrammarTree(non_terms[idx])
    node = back[start][end][idx]
    if isinstance(node,tuple):
        split,left_rule,right_rule = node
        tree.insertLeft(build_tree(start,split,left_rule,back,non_terms))
        tree.insertRight(build_tree(split,end,right_rule,back,non_terms))
        return tree
"""
    else:
        if node>0:
            tree.insertLeft(GrammarTree(non_terms[node]))
        return tree
"""

'\n    else:\n        if node>0:\n            tree.insertLeft(GrammarTree(non_terms[node]))\n        return tree\n'

In [16]:
def get_parse_tree(score, back,non_terms):
    '''
    get_parse_tree() calls the build_tree() method
    :param score: score matrix
    :param back: backpointer matrix
    :param non_terms: list of non_terminals
    :return: GrammarTree the final parse tree
    '''
    root_index = score[0][len(score)-1].index(max(score[0][len(score)-1]))
    print(root_index)
    print(score[0][len(score)-1][root_index])
    tree = build_tree(0,len(score)-1,root_index,back,non_terms)
    print(tree)
    return tree

In [30]:
tree=get_parse_tree(score,back,reverse_dict)

0
0.0
None


In [18]:
reverse_dict[0]

'UCP|<CC-NNP>'