# CoDeLin Demo

Constituent and Dependency Linearization System usage as a python library.

## Constituent Parsing Linearization

### In this example we encode a tree in bracketing format into a sequence of labels using the Naive Absolute Encoding. After that, we decode the labels back into our constituent tree.

In [1]:
from src.models.const_tree import C_Tree
from src.models.linearized_tree import LinearizedTree
from src.utils.constants import C_STRAT_FIRST, C_STRAT_MAX
from src.encs.enc_const import *

print("\n[*] Original tree:")
original_tree = "(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))"
c_tree = C_Tree.from_string(original_tree)
print(c_tree)

print("\n[*] Encoding:")
encoder = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+")
print(encoder)

print("\n[*] Linearized tree:")
lc_tree = encoder.encode(c_tree)
print(lc_tree)

print("\n[*] Decoded tree:")
c_tree = encoder.decode(lc_tree)
c_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(c_tree)


[*] Original tree:
(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))

[*] Encoding:
Constituent Naive Absolute Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-
The	DT	2_NP
owls	NNS	1_S
are	VBP	2_VP
not	RB	2_VP
what	WP	3_SBAR_WHNP
they	PRP	4_S_NP
seem	VBP	1_S_VP
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-


[*] Decoded tree:
(S-||-S-||-S (NP (DT The) (NNS owls)) (VP-||-VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))


### The following example deals with constituent trees with features embedded on the part of speech tags using Dynamic Encoding. The sample sentence is extracted from the SPMRL German treebank.

In [2]:
print("\n[*] Original tree:")
original_tree = "(PP (APPR-AC##lem=in|_## IM) (NN-NK##lem=Blick|case=dat|number=sg|gender=masc## BLICK))"
f_idx_dict={"lem":0,"case":1,"number":2,"gender":3}

c_tree = C_Tree.from_string(original_tree)
print(c_tree)

print("\n[*] Encoding:")
encoder = C_NaiveDynamicEncoding(separator="_", unary_joiner="+")
print(encoder)

print("\n[*] Linearized tree:")
lc_tree = encoder.encode(c_tree)
print(lc_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
c_tree = encoder.decode(lc_tree)
c_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(c_tree)


[*] Original tree:
(PP (APPR-AC##lem=in|_## IM) (NN-NK##lem=Blick|case=dat|number=sg|gender=masc## BLICK))

[*] Encoding:
Constituent Naive Dynamic Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
IM	APPR-AC	in	_	_	_	_	1*_PP
BLICK	NN-NK	Blick	dat	sg	masc	_	0*_PP
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
(PP-||-PP (APPR-AC IM) (NN-NK BLICK))


### Example of decoding labels from a string, binarize them and restore them

In [3]:
labels_str = "-BOS-	-BOS-	-BOS-"+'\n'+\
"``	``	1_S"+'\n'+\
"Managed	VBD	3_S+VP"+'\n'+\
"properly	RB	3_S+VP_ADVP"+'\n'+\
",	,	2_PP"+'\n'+\
"and	CC	2_PP"+'\n'+\
"with	IN	4_PP"+'\n'+\
"a	DT	5_NP"+'\n'+\
"long-term	JJ	5_NP"+'\n'+\
"outlook	NN	1_S"+'\n'+\
",	,	1_S"+'\n'+\
"these	DT	1_S_NP"+'\n'+\
"can	MD	2_VP"+'\n'+\
"become	VB	3_VP"+'\n'+\
"investment-grade	JJ	4_NP"+'\n'+\
"quality	NN	4_NP"+'\n'+\
"properties	NNS	1_S"+'\n'+\
".	.	1_S"+'\n'+\
"-EOS-	-EOS-	-EOS-"

lt = LinearizedTree.from_string(labels_str, mode="CONST")
enc = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+")
ct = enc.decode(lt)
ct = ct.postprocess_tree(conflict_strat=C_STRAT_MAX, clean_nulls=True)
# binarize
bt = C_Tree.to_binary(ct)
# decode
dt = C_Tree.restore_from_binary(bt)

print(ct==dt)

True


### The following block of code will run all the available encodings for CoDeLin.

In [4]:
print("\n[*] Original tree:")
original_tree = "(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))"
c_tree = C_Tree.from_string(original_tree)
print(c_tree)

print("\n[*] Linearized tree with Absolute Encoding:")
a_encoder = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+")
a_lc_tree = a_encoder.encode(C_Tree.from_string(original_tree))
print(a_lc_tree)
a_d_tree = a_encoder.decode(a_lc_tree)
a_d_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(a_d_tree)
print("Do trees match?",(a_d_tree==c_tree))

print("\n[*] Linearized tree with Relative Encoding:")
r_encoder = C_NaiveRelativeEncoding(separator="_", unary_joiner="+")
r_lc_tree = r_encoder.encode(C_Tree.from_string(original_tree))
print(r_lc_tree)
r_d_tree = r_encoder.decode(r_lc_tree)
r_d_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(r_d_tree)
print("Do trees match?",(r_d_tree==c_tree))


print("\n[*] Linearized tree with Dynamic Encoding:")
d_encoder = C_NaiveDynamicEncoding(separator="_", unary_joiner="+")
d_lc_tree = d_encoder.encode(C_Tree.from_string(original_tree))
print(d_lc_tree)
d_d_tree = d_encoder.decode(d_lc_tree)
d_d_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(d_d_tree)
print("Do trees match?",(d_d_tree==c_tree))

print("\n[*] Linearized tree with Incremental Encoding:")
i_encoder = C_NaiveIncrementalEncoding(separator="_", unary_joiner="+")
i_lc_tree = i_encoder.encode(C_Tree.from_string(original_tree))
print(i_lc_tree)
i_d_tree = i_encoder.decode(i_lc_tree)
i_d_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(i_d_tree)
print("Do trees match?",(i_d_tree==c_tree))



[*] Original tree:
(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))

[*] Linearized tree with Absolute Encoding:
-BOS-	-BOS-	-BOS-
The	DT	2_NP
owls	NNS	1_S
are	VBP	2_VP
not	RB	2_VP
what	WP	3_SBAR_WHNP
they	PRP	4_S_NP
seem	VBP	1_S_VP
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

(S-||-S-||-S (NP (DT The) (NNS owls)) (VP-||-VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? False

[*] Linearized tree with Relative Encoding:
-BOS-	-BOS-	-BOS-
The	DT	2*_NP
owls	NNS	-1*_S
are	VBP	1*_VP
not	RB	0*_VP
what	WP	1*_SBAR_WHNP
they	PRP	1*_S_NP
seem	VBP	-3*_S_VP
.	PUNCT	0*_S
-EOS-	-EOS-	-EOS-

(S-||-S-||-S (NP (DT The) (NNS owls)) (VP-||-VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? False

[*] Linearized tree with Dynamic Encoding:
-BOS-	-BOS-	-BOS-
The	DT	2*_NP
owls	NNS	-1*_S
are	VBP	1*_VP
not	RB	0*_VP
what	WP	1*_SBAR_

## Dependency Parsing Linearization

### Test with Naive Absolute Encoding

In [5]:
from src.models.deps_tree import D_Tree
from src.encs.enc_deps import *
from src.utils.constants import D_ROOT_HEAD

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_NaiveAbsoluteEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	['Definite=Def', 'PronType=Art']	2	det	_	_
2	owls	owl	NOUN	NNS	['Number=Plur']	3	nsubj	_	_
3	are	be	AUX	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	0	root	_	_
4	not	not	PART	RB	[None]	3	advmod	_	_
5	what	what	PRON	WP	['PronType=Int']	6	nsubj	_	_
6	they	they	PRON	PRP	['Case=Nom', 'Number=Plur', 'Person=3', 'PronType=Prs']	3	parataxis	_	_
7	seem	seem	VERB	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	6	ccomp	_	_
8	.	.	PUNCT	.	[None]	3	punct	_	_



[*] Encoding:
Dependency Naive Absolute Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	2_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	3_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	0_root
not	PART	_	_	_	_	_	_	_	_	_	3_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	6_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	3_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	6_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	3_punct
-EOS-	-EOS-	-EOS-

### Test of all Dependency Encodings

In [6]:
from src.models.deps_tree import D_Tree
from src.encs.enc_deps import *
from src.utils.constants import D_ROOT_HEAD

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_NaiveRelativeEncoding(separator="_", hang_from_root=True)
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	['Definite=Def', 'PronType=Art']	2	det	_	_
2	owls	owl	NOUN	NNS	['Number=Plur']	3	nsubj	_	_
3	are	be	AUX	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	0	root	_	_
4	not	not	PART	RB	[None]	3	advmod	_	_
5	what	what	PRON	WP	['PronType=Int']	6	nsubj	_	_
6	they	they	PRON	PRP	['Case=Nom', 'Number=Plur', 'Person=3', 'PronType=Prs']	3	parataxis	_	_
7	seem	seem	VERB	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	6	ccomp	_	_
8	.	.	PUNCT	.	[None]	3	punct	_	_



[*] Encoding:
Dependency Naive Relative Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	1_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	1_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	-NONE-_root
not	PART	_	_	_	_	_	_	_	_	_	-1_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	1_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	-3_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	-1_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	-5_punct
-EOS-	-E

In [7]:
from src.models.deps_tree import D_Tree
from src.encs.enc_deps import *
from src.utils.constants import D_ROOT_HEAD

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_BrkBasedEncoding(separator="_", displacement=True)
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	['Definite=Def', 'PronType=Art']	2	det	_	_
2	owls	owl	NOUN	NNS	['Number=Plur']	3	nsubj	_	_
3	are	be	AUX	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	0	root	_	_
4	not	not	PART	RB	[None]	3	advmod	_	_
5	what	what	PRON	WP	['PronType=Int']	6	nsubj	_	_
6	they	they	PRON	PRP	['Case=Nom', 'Number=Plur', 'Person=3', 'PronType=Prs']	3	parataxis	_	_
7	seem	seem	VERB	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	6	ccomp	_	_
8	.	.	PUNCT	.	[None]	3	punct	_	_



[*] Encoding:
Dependency Bracketing Based Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	<\_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	<\_root
not	PART	_	_	_	_	_	_	_	_	_	/>//_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	<\>_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	/>_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	>_punct
-EOS-	-EO

In [8]:
from src.models.deps_tree import D_Tree
from src.encs.enc_deps import *
from src.utils.constants import D_ROOT_HEAD

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_PosBasedEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	['Definite=Def', 'PronType=Art']	2	det	_	_
2	owls	owl	NOUN	NNS	['Number=Plur']	3	nsubj	_	_
3	are	be	AUX	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	0	root	_	_
4	not	not	PART	RB	[None]	3	advmod	_	_
5	what	what	PRON	WP	['PronType=Int']	6	nsubj	_	_
6	they	they	PRON	PRP	['Case=Nom', 'Number=Plur', 'Person=3', 'PronType=Prs']	3	parataxis	_	_
7	seem	seem	VERB	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	6	ccomp	_	_
8	.	.	PUNCT	.	[None]	3	punct	_	_



[*] Encoding:
Dependency Part-of-Speech Based Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	1--NOUN_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	1--AUX_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	-1---ROOT-_root
not	PART	_	_	_	_	_	_	_	_	_	-1--AUX_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	1--PRON_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	-1--AUX_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	-1--PRON_ccomp


In [9]:
from src.models.deps_tree import D_Tree
from src.encs.enc_deps import *
from src.utils.constants import D_ROOT_HEAD, D_2P_GREED

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)
    
print("\n[*] Encoding:")
encoder = D_Brk2PBasedEncoding(separator="_", displacement=False, planar_alg=D_2P_GREED)
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	['Definite=Def', 'PronType=Art']	2	det	_	_
2	owls	owl	NOUN	NNS	['Number=Plur']	3	nsubj	_	_
3	are	be	AUX	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	0	root	_	_
4	not	not	PART	RB	[None]	3	advmod	_	_
5	what	what	PRON	WP	['PronType=Int']	6	nsubj	_	_
6	they	they	PRON	PRP	['Case=Nom', 'Number=Plur', 'Person=3', 'PronType=Prs']	3	parataxis	_	_
7	seem	seem	VERB	VBP	['Mood=Ind', 'Tense=Pres', 'VerbForm=Fin']	6	ccomp	_	_
8	.	.	PUNCT	.	[None]	3	punct	_	_



[*] Encoding:
Dependency 2-Planar Bracketing Based Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	<_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	\<_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	\///_root
not	PART	_	_	_	_	_	_	_	_	_	>_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	<_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	\>/_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	>_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	>_punct
