# CoDeLin Demo

Constituent and Dependency Linearization System usage as a python library.

## Constituent Parsing Linearization

### In this example we encode a tree in bracketing format into a sequence of labels using the Naive Absolute Encoding. After that, we decode the labels back into our constituent tree.

In [1]:
from codelin.models.const_tree import C_Tree
from codelin.models.linearized_tree import LinearizedTree
from codelin.utils.constants import C_STRAT_FIRST, C_STRAT_MAX
from codelin.encs.enc_const import *

print("\n[*] Original tree:")
original_tree = "(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))"
c_tree = C_Tree.from_string(original_tree)
print(c_tree)

print("\n[*] Encoding:")
encoder = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+", reverse=False, binary=False)
print(encoder)

print("\n[*] Linearized tree:")
lc_tree = encoder.encode(c_tree)
print(lc_tree)

print("\n[*] Decoded tree:")
c_tree = encoder.decode(lc_tree)
c_tree = c_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(c_tree)


[*] Original tree:
(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))

[*] Encoding:
Constituent Naive Absolute Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-
The	DT	2_NP
owls	NNS	1_S
are	VBP	2_VP
not	RB	2_VP
what	WP	3_SBAR_WHNP
they	PRP	4_S_NP
seem	VBP	1_S_VP
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-


[*] Decoded tree:
(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))


### The following example deals with constituent trees with features embedded on the part of speech tags using Dynamic Encoding. The sample sentence is extracted from the SPMRL German treebank.

In [2]:
print("\n[*] Original tree:")
original_tree = "(PP (APPR-AC##lem=in|_## IM) (NN-NK##lem=Blick|case=dat|number=sg|gender=masc## BLICK))"
f_idx_dict={"lem":0,"case":1,"number":2,"gender":3}

c_tree = C_Tree.from_string(original_tree)
print(c_tree)

print("\n[*] Encoding:")
encoder = C_NaiveDynamicEncoding(separator="_", unary_joiner="+", reverse=True, binary=True, binary_direction="R", binary_marker="[b]")
print(encoder)

print("\n[*] Linearized tree:")
lc_tree = encoder.encode(c_tree)
print(lc_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
c_tree = encoder.decode(lc_tree)
c_tree = c_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(c_tree)


[*] Original tree:
(PP (APPR-AC##lem=in|_## IM) (NN-NK##lem=Blick|case=dat|number=sg|gender=masc## BLICK))

[*] Encoding:
Constituent Naive Dynamic Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
IM	APPR-AC	in	_	_	_	_	1_PP
BLICK	NN-NK	Blick	dat	sg	masc	_	1_PP
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
(PP (APPR-AC IM) (NN-NK BLICK))


### Example of decoding labels from a string, binarize them and restore them

In [3]:
original_tree = "(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))"
ct = C_Tree.from_string(original_tree)
print("[*] Original tree:")
print(ct)

# binarize
bt = C_Tree.to_binary_right(ct)
print("[*] Binarized tree:")
print(bt)

# restore
dt = C_Tree.restore_from_binary(bt)
print("[*] Restored tree:")
print(dt)

# equals (shallow, as they are 'different' objects)
print("[*] Are they equal?", ct.shallow_equals(dt))

[*] Original tree:
(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
[*] Binarized tree:
(S (NP (DT The) (NNS owls)) (S* (VP (VBP are) (VP* (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem)))))) (PUNCT .)))
[*] Restored tree:
(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
[*] Are they equal? True


### Naive encodings for constituent parsing examples:

In [15]:
print("[*] Original tree:")
original_tree = "(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))"
c_tree = C_Tree.from_string(original_tree)
print(c_tree)

print("=====================================")
print("[*] Linearized tree with Absolute Encoding:")
e = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+", reverse=False, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Relative Encoding:")
e = C_NaiveRelativeEncoding(separator="_", unary_joiner="+", reverse=False, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Dynamic Encoding:")
e = C_NaiveDynamicEncoding(separator="_", unary_joiner="+", reverse=False, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))


[*] Original tree:
(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
[*] Linearized tree with Absolute Encoding:
-BOS-	-BOS-	-BOS-
The	DT	2_NP
owls	NNS	1_S
are	VBP	2_VP
not	RB	2_VP
what	WP	3_SBAR_WHNP
they	PRP	4_S_NP
seem	VBP	1_S_VP
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Relative Encoding:
-BOS-	-BOS-	-BOS-
The	DT	2*_NP
owls	NNS	-1*_S
are	VBP	1*_VP
not	RB	0*_VP
what	WP	1*_SBAR_WHNP
they	PRP	1*_S_NP
seem	VBP	-3*_S_VP
.	PUNCT	0*_S
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Dynamic Encoding:
-BOS-	-BOS-	-BOS-
The	DT	2_NP
owls	NNS	1_S
are	VBP	2_VP
not	RB	2_VP
what	WP	3_SBAR_WHNP
they	PRP	1*_S_NP
seem	VBP	-3*_S_VP
.	PU

In [16]:
print("=====================================")
print("[*] Linearized tree with Absolute Encoding and incremental parsing:")
e = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+", reverse=True, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Relative Encoding and incremental parsing:")
e = C_NaiveRelativeEncoding(separator="_", unary_joiner="+", reverse=True, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Dynamic Encoding and incremental parsing:")
e = C_NaiveDynamicEncoding(separator="_", unary_joiner="+", reverse=True, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

[*] Linearized tree with Absolute Encoding and incremental parsing:
-BOS-	-BOS-	-BOS-
The	DT	1_S
owls	NNS	2_NP
are	VBP	1_S
not	RB	2_VP
what	WP	2_VP_WHNP
they	PRP	3_SBAR_NP
seem	VBP	4_S_VP
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Relative Encoding and incremental parsing:
-BOS-	-BOS-	-BOS-
The	DT	1*_S
owls	NNS	1*_NP
are	VBP	-1*_S
not	RB	1*_VP
what	WP	0*_VP_WHNP
they	PRP	1*_SBAR_NP
seem	VBP	1*_S_VP
.	PUNCT	-3*_S
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Dynamic Encoding and incremental parsing:
-BOS-	-BOS-	-BOS-
The	DT	1_S
owls	NNS	2_NP
are	VBP	1_S
not	RB	2_VP
what	WP	2_VP_WHNP
they	PRP	3_SBAR_NP
seem	VBP	1*_S_VP
.	PUNCT	-3*_S
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (

In [17]:
print("=====================================")
print("[*] Linearized tree with Absolute Encoding and a priori right branch binarization:")
e = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+", reverse=False, binary=True, binary_direction="R", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Relative Encoding and a priori right branch binarization:")
e = C_NaiveRelativeEncoding(separator="_", unary_joiner="+", reverse=False, binary=True, binary_direction="R", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Dynamic Encoding and a priori right branch binarization:")
e = C_NaiveDynamicEncoding(separator="_", unary_joiner="+", reverse=False, binary=True, binary_direction="R", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

[*] Linearized tree with Absolute Encoding and a priori right branch binarization:
-BOS-	-BOS-	-BOS-
The	DT	2_NP
owls	NNS	1_S
are	VBP	3_VP
not	RB	4_VP[b]
what	WP	5_SBAR_WHNP
they	PRP	6_S_NP
seem	VBP	2_S[b]_VP
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Relative Encoding and a priori right branch binarization:
-BOS-	-BOS-	-BOS-
The	DT	2*_NP
owls	NNS	-1*_S
are	VBP	2*_VP
not	RB	1*_VP[b]
what	WP	1*_SBAR_WHNP
they	PRP	1*_S_NP
seem	VBP	-4*_S[b]_VP
.	PUNCT	-1*_S
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Dynamic Encoding and a priori right branch binarization:
-BOS-	-BOS-	-BOS-
The	DT	2_NP
owls	NNS	1_S
are	VBP	3_VP
not	RB	1*_VP[b]
what	WP	1*_SBAR_WHNP
they	PRP	1*_S_NP
seem	VBP	-4*_S[b]_VP
.	PUNCT	

### Tetratag encoding for constituent parsing examples:

In [5]:
print("=====================================")
print("[*] Linearized tree with Tetratag encoding and preorder traversal:")
e = C_Tetratag(separator="_", unary_joiner="+", mode="preorder", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Tetratag encoding and inorder traversal:")
e = C_Tetratag(separator="_", unary_joiner="+", mode="inorder", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Tetratag encoding and postorder traversal:")
e = C_Tetratag(separator="_", unary_joiner="+", mode="postorder", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

[*] Linearized tree with Tetratag encoding and preorder traversal:
-BOS-	-BOS-	-BOS-
The	DT	RRr_S>NP
owls	NNS	l_-NONE-
are	VBP	LRr_S[b]>VP
not	RB	Lr_VP[b]
what	WP	Lr_SBAR_WHNP
they	PRP	Lr_S_NP
seem	VBP	l_-NONE-_VP
.	PUNCT	l_-NONE-
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Tetratag encoding and inorder traversal:
-BOS-	-BOS-	-BOS-
The	DT	rR_NP
owls	NNS	lR_S
are	VBP	rR_VP
not	RB	rL_VP[b]
what	WP	rL_SBAR_WHNP
they	PRP	rL_S_NP
seem	VBP	lL_S[b]_VP
.	PUNCT	l_S[b]
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Tetratag encoding and postorder traversal:
-BOS-	-BOS-	-BOS-
The	DT	r_-NONE-
owls	NNS	l_-NONE-
are	VBP	Rr_NP
not	RB	l_-NONE-
what	WP	Rr_VP[b]_WHNP
they	PRP	r_-NONE-_NP
seem	VBP	l_-NONE-_VP
.	PUNCT	LLLRlR_

### Attach-Juxtapose encoding for constituent parsing example:

In [6]:
print("=====================================")
print("[*] Linearized tree with Attach-Juxtapose encoding:")
e = C_JuxtaposedEncoding(separator="_", unary_joiner="+", binary=True, binary_direction="R", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

[*] Linearized tree with Attach-Juxtapose encoding:
-BOS-	-BOS-	-BOS-
The	DT	0_an=attach[;]pl=NP
owls	NNS	1_an=attach
are	VBP	1_an=juxtapose[;]pl=VP[;]nl=S
not	RB	2_an=attach[;]pl=VP[b]
what	WP	3_an=attach[;]pl=SBAR_WHNP
they	PRP	4_an=attach[;]pl=S_NP
seem	VBP	5_an=attach_VP
.	PUNCT	2_an=juxtapose[;]nl=S[b]
-EOS-	-EOS-	-EOS-

(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))
Do trees match? True


## Dependency Parsing Linearization

### Test of encodings

In [7]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

encoder = D_NaiveAbsoluteEncoding(separator="_")
print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))


print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.shallow_equals(d_tree)))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	_	_
2	owls	owl	NOUN	NNS	Number=Plur	3	nsubj	_	_
3	are	be	AUX	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	not	not	PART	RB	_	3	advmod	_	_
5	what	what	PRON	WP	PronType=Int	6	nsubj	_	_
6	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	3	parataxis	_	_
7	seem	seem	VERB	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	6	ccomp	_	_
8	.	.	PUNCT	.	_	3	punct	_	_



[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	2_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	3_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	0_root
not	PART	_	_	_	_	_	_	_	_	_	3_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	6_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	3_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	6_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	3_punct
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
1	The	_	DET	_	_	2	det	_	_
2	owls	

In [8]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_NaiveRelativeEncoding(separator="_", hang_from_root=True)
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	_	_
2	owls	owl	NOUN	NNS	Number=Plur	3	nsubj	_	_
3	are	be	AUX	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	not	not	PART	RB	_	3	advmod	_	_
5	what	what	PRON	WP	PronType=Int	6	nsubj	_	_
6	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	3	parataxis	_	_
7	seem	seem	VERB	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	6	ccomp	_	_
8	.	.	PUNCT	.	_	3	punct	_	_



[*] Encoding:
Dependency Naive Relative Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	1_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	1_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	-NONE-_root
not	PART	_	_	_	_	_	_	_	_	_	-1_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	1_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	-3_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	-1_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	-5_punct
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	

In [9]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_BrkBasedEncoding(separator="_", displacement=True)
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	_	_
2	owls	owl	NOUN	NNS	Number=Plur	3	nsubj	_	_
3	are	be	AUX	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	not	not	PART	RB	_	3	advmod	_	_
5	what	what	PRON	WP	PronType=Int	6	nsubj	_	_
6	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	3	parataxis	_	_
7	seem	seem	VERB	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	6	ccomp	_	_
8	.	.	PUNCT	.	_	3	punct	_	_



[*] Encoding:
Dependency Bracketing Based Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	__det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	<\_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	<\_root
not	PART	_	_	_	_	_	_	_	_	_	/>//_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	__nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	<\>_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	/>_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	>_punct
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-

In [10]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_PosBasedEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	_	_
2	owls	owl	NOUN	NNS	Number=Plur	3	nsubj	_	_
3	are	be	AUX	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	not	not	PART	RB	_	3	advmod	_	_
5	what	what	PRON	WP	PronType=Int	6	nsubj	_	_
6	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	3	parataxis	_	_
7	seem	seem	VERB	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	6	ccomp	_	_
8	.	.	PUNCT	.	_	3	punct	_	_



[*] Encoding:
Dependency Part-of-Speech Based Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	1--NOUN_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	1--AUX_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	-1---ROOT-_root
not	PART	_	_	_	_	_	_	_	_	_	-1--AUX_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	1--PRON_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	-1--AUX_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	-1--PRON_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	-1--AUX_punct
-EOS-	-EOS-	-EOS-	

In [11]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD, D_2P_GREED

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)
    
print("\n[*] Encoding:")
encoder = D_Brk2PBasedEncoding(separator="_", displacement=False, planar_alg=D_2P_GREED)
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	_	_
2	owls	owl	NOUN	NNS	Number=Plur	3	nsubj	_	_
3	are	be	AUX	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	not	not	PART	RB	_	3	advmod	_	_
5	what	what	PRON	WP	PronType=Int	6	nsubj	_	_
6	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	3	parataxis	_	_
7	seem	seem	VERB	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	6	ccomp	_	_
8	.	.	PUNCT	.	_	3	punct	_	_



[*] Encoding:
Dependency 2-Planar Bracketing Based Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
The	DET	_	_	Art	_	_	_	_	Def	_	<_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	\<_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	\///_root
not	PART	_	_	_	_	_	_	_	_	_	>_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	<_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	\>/_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	>_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	>_punct
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS

In [13]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD, D_2P_GREED

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)
    
print("\n[*] Encoding:")
encoder = D_Brk4BitsEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
d_tree.remove_dummy()
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	_	_
2	owls	owl	NOUN	NNS	Number=Plur	3	nsubj	_	_
3	are	be	AUX	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	not	not	PART	RB	_	3	advmod	_	_
5	what	what	PRON	WP	PronType=Int	6	nsubj	_	_
6	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	3	parataxis	_	_
7	seem	seem	VERB	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	6	ccomp	_	_
8	.	.	PUNCT	.	_	3	punct	_	_



[*] Encoding:
Dependency Bracketing 4-Bits Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
-ROOT-	-ROOT-	_	_	_	_	_	_	_	_	_	/_-NOREL-
The	DET	_	_	Art	_	_	_	_	Def	_	<*_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	\<*_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	\>*/_root
not	PART	_	_	_	_	_	_	_	_	_	>_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	<*_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	\>/_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	>*_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	>*_punct
-EOS-	-EOS-	-EOS-	-

In [14]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD, D_2P_GREED

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)
    
print("\n[*] Encoding:")
encoder = D_Brk7BitsEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
d_tree.remove_dummy()
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	_	_
2	owls	owl	NOUN	NNS	Number=Plur	3	nsubj	_	_
3	are	be	AUX	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	not	not	PART	RB	_	3	advmod	_	_
5	what	what	PRON	WP	PronType=Int	6	nsubj	_	_
6	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	3	parataxis	_	_
7	seem	seem	VERB	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	6	ccomp	_	_
8	.	.	PUNCT	.	_	3	punct	_	_



[*] Encoding:
Dependency Bracketing 7-Bits Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
-ROOT-	-ROOT-	_	_	_	_	_	_	_	_	_	/0_-NOREL-
The	DET	_	_	Art	_	_	_	_	Def	_	<0*_det
owls	NOUN	Plur	_	_	_	_	_	_	_	_	\0<0*_nsubj
are	AUX	_	Ind	_	Pres	_	_	Fin	_	_	\0>0*/0_root
not	PART	_	_	_	_	_	_	_	_	_	>0_advmod
what	PRON	_	_	Int	_	_	_	_	_	_	<0*_nsubj
they	PRON	Plur	_	Prs	_	_	3	_	_	Nom	\0>0/0_parataxis
seem	VERB	_	Ind	_	Pres	_	_	Fin	_	_	>0*_ccomp
.	PUNCT	_	_	_	_	_	_	_	_	_	>0*_punct
-EOS-

In [1]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD, D_2P_GREED

conllu_sample = "# sent_id = 1\n"+\
"# text = The owls are not what they seem.\n"+\
"1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
"2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
"3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
"5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
"6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
"7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
"8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)
    
print("\n[*] Encoding:")
encoder = D_HexatagEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree)

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
d_tree.remove_dummy()
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	The	the	DET	DT	Definite=Def|PronType=Art	2	det	_	_
2	owls	owl	NOUN	NNS	Number=Plur	3	nsubj	_	_
3	are	be	AUX	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	not	not	PART	RB	_	3	advmod	_	_
5	what	what	PRON	WP	PronType=Int	6	nsubj	_	_
6	they	they	PRON	PRP	Case=Nom|Number=Plur|Person=3|PronType=Prs	3	parataxis	_	_
7	seem	seem	VERB	VBP	Mood=Ind|Tense=Pres|VerbForm=Fin	6	ccomp	_	_
8	.	.	PUNCT	.	_	3	punct	_	_



[*] Encoding:
Dependency Bracketing Hexa-Tags Encoding

[*] Linearized tree:


TypeError: 'NoneType' object is not iterable