# CoDeLin Demo

Constituent and Dependency Linearization System usage as a python library.

## Constituent Parsing Linearization

### In this example we encode a tree in bracketing format into a sequence of labels using the Naive Absolute Encoding. After that, we decode the labels back into our constituent tree.

In [1]:
from codelin.models.const_tree import C_Tree
from codelin.models.linearized_tree import LinearizedTree
from codelin.utils.constants import C_STRAT_FIRST, C_STRAT_MAX
from codelin.encs.enc_const import *

print("\n[*] Original tree:")
# original_tree = "(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))"
# original_tree = "(S (S* (NP (PRP My) (NN daughter)) (VP (VBD broke) (NP (NP (DET the) (NP* (JJ red) (NN toy))) (PP (IN with) (NP (DET a) (NN hammer)))))) (PUNCT .))"
original_tree = ("(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))")
c_tree = C_Tree.from_string(original_tree)
print(c_tree)

print("\n[*] Encoding:")
encoder = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+", reverse=False, binary=False)
encoder_gaps = C_GapsEncoding(separator="_", unary_joiner="+", binary_direction="R", binary_marker="*")

print("\n[*] Linearized tree:")
lc_tree = encoder.encode(c_tree)
print(encoder)
print(lc_tree)

lc_tree_gaps = encoder_gaps.encode(c_tree)
print(encoder_gaps)
print(lc_tree_gaps)


print("\n[*] Decoded tree:")
print(encoder)
c_tree = encoder.decode(lc_tree)
c_tree = c_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(c_tree)

print(encoder_gaps)
c_tree_gaps = encoder_gaps.decode(lc_tree_gaps)
c_tree_gaps = c_tree_gaps.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)

print(c_tree_gaps)


[*] Original tree:
(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))

[*] Encoding:

[*] Linearized tree:
Constituent Naive Absolute Encoding
-BOS-	-BOS-	-BOS-
Agent	NNP	2_NP
Cooper	NNP	1_S
loves	VBZ	2_VP
black	JJ	3_NP
coffee	NN	1_S
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

Constituent Gaps Based Encoding
-BOS-	-BOS-	-BOS-
Agent	NNP	0_NP
Cooper	NNP	1_S
loves	VBZ	0_VP
black	JJ	0_NP
coffee	NN	2_S*
.	PUNCT	2_$$
-EOS-	-EOS-	-EOS-


[*] Decoded tree:
Constituent Naive Absolute Encoding
(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Constituent Gaps Based Encoding
(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))


### The following example deals with constituent trees with features embedded on the part of speech tags using Dynamic Encoding. The sample sentence is extracted from the SPMRL German treebank.

In [2]:
print("\n[*] Original tree:")
original_tree = "(PP (APPR-AC##lem=in|_## IM) (NN-NK##lem=Blick|case=dat|number=sg|gender=masc## BLICK))"
# original_tree = ("(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))")
f_idx_dict={"lem":0,"case":1,"number":2,"gender":3}

c_tree = C_Tree.from_string(original_tree)
print(c_tree)

print("\n[*] Encoding:")
encoder = C_NaiveDynamicEncoding(separator="_", unary_joiner="+", reverse=True, binary=True, binary_direction="R", binary_marker="[b]")
print(encoder)

print("\n[*] Linearized tree:")
lc_tree = encoder.encode(c_tree)
print(lc_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
c_tree = encoder.decode(lc_tree)
c_tree = c_tree.postprocess_tree(conflict_strat=C_STRAT_FIRST,clean_nulls=True)
print(c_tree)


[*] Original tree:
(PP (APPR-AC##lem=in|_## IM) (NN-NK##lem=Blick|case=dat|number=sg|gender=masc## BLICK))

[*] Encoding:
Constituent Naive Dynamic Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
IM	APPR-AC	in	_	_	_	_	1_PP
BLICK	NN-NK	Blick	dat	sg	masc	_	1_PP
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
(PP (APPR-AC IM) (NN-NK BLICK))


### Example of decoding labels from a string, binarize them and restore them

In [3]:
# original_tree = "(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))"
original_tree = ("(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))")
ct = C_Tree.from_string(original_tree)
print("[*] Original tree:")
print(ct)

# binarize
bt = C_Tree.to_binary_right(ct)
print("[*] Binarized tree:")
print(bt)

# restore
dt = C_Tree.restore_from_binary(bt)
print("[*] Restored tree:")
print(dt)

# equals (shallow, as they are 'different' objects)
print("[*] Are they equal?", ct.shallow_equals(dt))

[*] Original tree:
(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
[*] Binarized tree:
(S (NP (NNP Agent) (NNP Cooper)) (S* (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .)))
[*] Restored tree:
(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
[*] Are they equal? True


### Naive encodings for constituent parsing examples:

In [4]:
print("[*] Original tree:")
# original_tree = "(S (NP (DT The) (NNS owls)) (VP (VBP are) (RB not) (SBAR (WHNP (WP what)) (S (NP (PRP they)) (VP (VBP seem))))) (PUNCT .))"
original_tree = ("(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))")
c_tree = C_Tree.from_string(original_tree)
print(c_tree)

print("=====================================")
print("[*] Linearized tree with Absolute Encoding:")
e = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+", reverse=False, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Relative Encoding:")
e = C_NaiveRelativeEncoding(separator="_", unary_joiner="+", reverse=False, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Dynamic Encoding:")
e = C_NaiveDynamicEncoding(separator="_", unary_joiner="+", reverse=False, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))


[*] Original tree:
(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
[*] Linearized tree with Absolute Encoding:
-BOS-	-BOS-	-BOS-
Agent	NNP	2_NP
Cooper	NNP	1_S
loves	VBZ	2_VP
black	JJ	3_NP
coffee	NN	1_S
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Relative Encoding:
-BOS-	-BOS-	-BOS-
Agent	NNP	2*_NP
Cooper	NNP	-1*_S
loves	VBZ	1*_VP
black	JJ	1*_NP
coffee	NN	-2*_S
.	PUNCT	0*_S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Dynamic Encoding:
-BOS-	-BOS-	-BOS-
Agent	NNP	2_NP
Cooper	NNP	1_S
loves	VBZ	2_VP
black	JJ	3_NP
coffee	NN	-2*_S
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True


In [5]:
print("=====================================")
print("[*] Linearized tree with Absolute Encoding and incremental parsing:")
e = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+", reverse=True, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Relative Encoding and incremental parsing:")
e = C_NaiveRelativeEncoding(separator="_", unary_joiner="+", reverse=True, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Dynamic Encoding and incremental parsing:")
e = C_NaiveDynamicEncoding(separator="_", unary_joiner="+", reverse=True, binary=False)
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

[*] Linearized tree with Absolute Encoding and incremental parsing:
-BOS-	-BOS-	-BOS-
Agent	NNP	1_S
Cooper	NNP	2_NP
loves	VBZ	1_S
black	JJ	2_VP
coffee	NN	3_NP
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Relative Encoding and incremental parsing:
-BOS-	-BOS-	-BOS-
Agent	NNP	1*_S
Cooper	NNP	1*_NP
loves	VBZ	-1*_S
black	JJ	1*_VP
coffee	NN	1*_NP
.	PUNCT	-2*_S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Dynamic Encoding and incremental parsing:
-BOS-	-BOS-	-BOS-
Agent	NNP	1_S
Cooper	NNP	2_NP
loves	VBZ	1_S
black	JJ	2_VP
coffee	NN	3_NP
.	PUNCT	-2*_S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True


In [6]:
print("=====================================")
print("[*] Linearized tree with Absolute Encoding and a priori right branch binarization:")
e = C_NaiveAbsoluteEncoding(separator="_", unary_joiner="+", reverse=False, binary=True, binary_direction="R", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Relative Encoding and a priori right branch binarization:")
e = C_NaiveRelativeEncoding(separator="_", unary_joiner="+", reverse=False, binary=True, binary_direction="R", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Dynamic Encoding and a priori right branch binarization:")
e = C_NaiveDynamicEncoding(separator="_", unary_joiner="+", reverse=False, binary=True, binary_direction="R", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

[*] Linearized tree with Absolute Encoding and a priori right branch binarization:
-BOS-	-BOS-	-BOS-
Agent	NNP	2_NP
Cooper	NNP	1_S
loves	VBZ	3_VP
black	JJ	4_NP
coffee	NN	2_S[b]
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Relative Encoding and a priori right branch binarization:
-BOS-	-BOS-	-BOS-
Agent	NNP	2*_NP
Cooper	NNP	-1*_S
loves	VBZ	2*_VP
black	JJ	1*_NP
coffee	NN	-2*_S[b]
.	PUNCT	-1*_S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Dynamic Encoding and a priori right branch binarization:
-BOS-	-BOS-	-BOS-
Agent	NNP	2_NP
Cooper	NNP	1_S
loves	VBZ	3_VP
black	JJ	1*_NP
coffee	NN	-2*_S[b]
.	PUNCT	1_S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True


### Tetratag encoding for constituent parsing examples:

In [7]:
print("=====================================")
print("[*] Linearized tree with Tetratag encoding and preorder traversal:")
e = C_Tetratag(separator="_", unary_joiner="+", mode="preorder", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Tetratag encoding and inorder traversal:")
e = C_Tetratag(separator="_", unary_joiner="+", mode="inorder", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

print("=====================================")
print("[*] Linearized tree with Tetratag encoding and postorder traversal:")
e = C_Tetratag(separator="_", unary_joiner="+", mode="postorder", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

[*] Linearized tree with Tetratag encoding and preorder traversal:
-BOS-	-BOS-	-BOS-
Agent	NNP	RRr_S>NP
Cooper	NNP	l_-NONE-
loves	VBZ	LRr_S[b]>VP
black	JJ	Lr_NP
coffee	NN	l_-NONE-
.	PUNCT	l_-NONE-
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Tetratag encoding and inorder traversal:
-BOS-	-BOS-	-BOS-
Agent	NNP	rR_NP
Cooper	NNP	lR_S
loves	VBZ	rR_VP
black	JJ	rL_NP
coffee	NN	lL_S[b]
.	PUNCT	l_S[b]
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True
[*] Linearized tree with Tetratag encoding and postorder traversal:
-BOS-	-BOS-	-BOS-
Agent	NNP	r_-NONE-
Cooper	NNP	l_-NONE-
loves	VBZ	Rr_NP
black	JJ	r_-NONE-
coffee	NN	l_-NONE-
.	PUNCT	LLRlR_NP>VP>S[b]>S
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True


### Attach-Juxtapose encoding for constituent parsing example:

In [8]:
print("=====================================")
original_tree = ("(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))")

print("[*] Original tree:")
c_tree = C_Tree.from_string(original_tree)
print(c_tree)
print("[*] Linearized tree with Attach-Juxtapose encoding:")
e = C_JuxtaposedEncoding(separator="_", unary_joiner="+", binary=True, binary_direction="R", binary_marker="[b]")
lt = e.encode(C_Tree.from_string(original_tree))
print(lt)
dt = e.decode(lt)
dt = dt.postprocess_tree(conflict_strat=C_STRAT_FIRST, clean_nulls=True)
print(dt)
print("Do trees match?",c_tree.shallow_equals(dt))

[*] Original tree:
(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
[*] Linearized tree with Attach-Juxtapose encoding:
-BOS-	-BOS-	-BOS-
Agent	NNP	0_an=attach[;]pl=NP
Cooper	NNP	1_an=attach
loves	VBZ	1_an=juxtapose[;]pl=VP[;]nl=S
black	JJ	2_an=attach[;]pl=NP
coffee	NN	3_an=attach
.	PUNCT	2_an=juxtapose[;]nl=S[b]
-EOS-	-EOS-	-EOS-

(S (NP (NNP Agent) (NNP Cooper)) (VP (VBZ loves) (NP (JJ black) (NN coffee))) (PUNCT .))
Do trees match? True


## Dependency Parsing Linearization

### Test of encodings

In [9]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD

# conllu_sample = "# sent_id = 1\n"+\
# "# text = The owls are not what they seem.\n"+\
# "1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
# "2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
# "3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
# "4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
# "5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
# "6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
# "7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
# "8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

conllu_sample = "# sent_id = 1\n"+\
"# text = Agent Cooper loves black coffee\n"+\
"1\tAgent\tagent\tNOUN\tNN\tNumber=Sing\t2\tcompound\t_\t_\n"+\
"2\tCooper\tCooper\tPROPN\tNNP\tNumber=Sing\t3\tnsubj\t_\t_\n"+\
"3\tloves\tlove\tVERB\tVBZ\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tblack\tblack\tADJ\tJJ\tDegree=Pos\t5\tamod\t_\t_\n"+\
"5\tcoffee\tcoffee\tNOUN\tNN\tNumber=Sing\t3\tobj\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

encoder = D_NaiveAbsoluteEncoding(separator="_")
print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))


print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.shallow_equals(d_tree)))

  from .autonotebook import tqdm as notebook_tqdm



[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	Agent	agent	NOUN	NN	Number=Sing	2	compound	_	_
2	Cooper	Cooper	PROPN	NNP	Number=Sing	3	nsubj	_	_
3	loves	love	VERB	VBZ	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	black	black	ADJ	JJ	Degree=Pos	5	amod	_	_
5	coffee	coffee	NOUN	NN	Number=Sing	3	obj	_	_



[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
Agent	NOUN	Sing	_	_	_	_	_	_	_	_	2_compound
Cooper	PROPN	Sing	_	_	_	_	_	_	_	_	3_nsubj
loves	VERB	_	Ind	_	Pres	_	_	Fin	_	_	0_root
black	ADJ	_	_	_	_	_	_	_	_	_	5_amod
coffee	NOUN	Sing	_	_	_	_	_	_	_	_	3_obj
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
1	Agent	_	NOUN	_	_	2	compound	_	_
2	Cooper	_	PROPN	_	_	3	nsubj	_	_
3	loves	_	VERB	_	_	0	root	_	_
4	black	_	ADJ	_	_	5	amod	_	_
5	coffee	_	NOUN	_	_	3	obj	_	_



 [*] Do trees match? True


In [10]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD

# conllu_sample = "# sent_id = 1\n"+\
# "# text = The owls are not what they seem.\n"+\
# "1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
# "2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
# "3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
# "4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
# "5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
# "6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
# "7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
# "8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

conllu_sample = "# sent_id = 1\n"+\
"# text = Agent Cooper loves black coffee\n"+\
"1\tAgent\tagent\tNOUN\tNN\tNumber=Sing\t2\tcompound\t_\t_\n"+\
"2\tCooper\tCooper\tPROPN\tNNP\tNumber=Sing\t3\tnsubj\t_\t_\n"+\
"3\tloves\tlove\tVERB\tVBZ\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tblack\tblack\tADJ\tJJ\tDegree=Pos\t5\tamod\t_\t_\n"+\
"5\tcoffee\tcoffee\tNOUN\tNN\tNumber=Sing\t3\tobj\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_NaiveRelativeEncoding(separator="_", hang_from_root=True)
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	Agent	agent	NOUN	NN	Number=Sing	2	compound	_	_
2	Cooper	Cooper	PROPN	NNP	Number=Sing	3	nsubj	_	_
3	loves	love	VERB	VBZ	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	black	black	ADJ	JJ	Degree=Pos	5	amod	_	_
5	coffee	coffee	NOUN	NN	Number=Sing	3	obj	_	_



[*] Encoding:
Dependency Naive Relative Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
Agent	NOUN	Sing	_	_	_	_	_	_	_	_	1_compound
Cooper	PROPN	Sing	_	_	_	_	_	_	_	_	1_nsubj
loves	VERB	_	Ind	_	Pres	_	_	Fin	_	_	-NONE-_root
black	ADJ	_	_	_	_	_	_	_	_	_	1_amod
coffee	NOUN	Sing	_	_	_	_	_	_	_	_	-2_obj
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
1	Agent	_	NOUN	_	_	2	compound	_	_
2	Cooper	_	PROPN	_	_	3	nsubj	_	_
3	loves	_	VERB	_	_	0	root	_	_
4	black	_	ADJ	_	_	5	amod	_	_
5	coffee	_	NOUN	_	_	3	obj	_	_



 [*] Do trees match? True


In [11]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD

# conllu_sample = "# sent_id = 1\n"+\
# "# text = The owls are not what they seem.\n"+\
# "1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
# "2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
# "3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
# "4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
# "5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
# "6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
# "7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
# "8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

conllu_sample = "# sent_id = 1\n"+\
"# text = Agent Cooper loves black coffee\n"+\
"1\tAgent\tagent\tNOUN\tNN\tNumber=Sing\t2\tcompound\t_\t_\n"+\
"2\tCooper\tCooper\tPROPN\tNNP\tNumber=Sing\t3\tnsubj\t_\t_\n"+\
"3\tloves\tlove\tVERB\tVBZ\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tblack\tblack\tADJ\tJJ\tDegree=Pos\t5\tamod\t_\t_\n"+\
"5\tcoffee\tcoffee\tNOUN\tNN\tNumber=Sing\t3\tobj\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_BrkBasedEncoding(separator="_", displacement=True)
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	Agent	agent	NOUN	NN	Number=Sing	2	compound	_	_
2	Cooper	Cooper	PROPN	NNP	Number=Sing	3	nsubj	_	_
3	loves	love	VERB	VBZ	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	black	black	ADJ	JJ	Degree=Pos	5	amod	_	_
5	coffee	coffee	NOUN	NN	Number=Sing	3	obj	_	_



[*] Encoding:
Dependency Bracketing Based Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
Agent	NOUN	Sing	_	_	_	_	_	_	_	_	__compound
Cooper	PROPN	Sing	_	_	_	_	_	_	_	_	<\_nsubj
loves	VERB	_	Ind	_	Pres	_	_	Fin	_	_	<\_root
black	ADJ	_	_	_	_	_	_	_	_	_	/_amod
coffee	NOUN	Sing	_	_	_	_	_	_	_	_	<\>_obj
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
1	Agent	_	NOUN	_	_	2	compound	_	_
2	Cooper	_	PROPN	_	_	3	nsubj	_	_
3	loves	_	VERB	_	_	0	root	_	_
4	black	_	ADJ	_	_	5	amod	_	_
5	coffee	_	NOUN	_	_	3	obj	_	_



 [*] Do trees match? True


In [12]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD

# conllu_sample = "# sent_id = 1\n"+\
# "# text = The owls are not what they seem.\n"+\
# "1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
# "2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
# "3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
# "4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
# "5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
# "6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
# "7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
# "8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

conllu_sample = "# sent_id = 1\n"+\
"# text = Agent Cooper loves black coffee\n"+\
"1\tAgent\tagent\tNOUN\tNN\tNumber=Sing\t2\tcompound\t_\t_\n"+\
"2\tCooper\tCooper\tPROPN\tNNP\tNumber=Sing\t3\tnsubj\t_\t_\n"+\
"3\tloves\tlove\tVERB\tVBZ\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tblack\tblack\tADJ\tJJ\tDegree=Pos\t5\tamod\t_\t_\n"+\
"5\tcoffee\tcoffee\tNOUN\tNN\tNumber=Sing\t3\tobj\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)

print("\n[*] Encoding:")
encoder = D_PosBasedEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	Agent	agent	NOUN	NN	Number=Sing	2	compound	_	_
2	Cooper	Cooper	PROPN	NNP	Number=Sing	3	nsubj	_	_
3	loves	love	VERB	VBZ	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	black	black	ADJ	JJ	Degree=Pos	5	amod	_	_
5	coffee	coffee	NOUN	NN	Number=Sing	3	obj	_	_



[*] Encoding:
Dependency Part-of-Speech Based Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
Agent	NOUN	Sing	_	_	_	_	_	_	_	_	1--PROPN_compound
Cooper	PROPN	Sing	_	_	_	_	_	_	_	_	1--VERB_nsubj
loves	VERB	_	Ind	_	Pres	_	_	Fin	_	_	-1---ROOT-_root
black	ADJ	_	_	_	_	_	_	_	_	_	1--NOUN_amod
coffee	NOUN	Sing	_	_	_	_	_	_	_	_	-1--VERB_obj
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
1	Agent	_	NOUN	_	_	2	compound	_	_
2	Cooper	_	PROPN	_	_	3	nsubj	_	_
3	loves	_	VERB	_	_	0	root	_	_
4	black	_	ADJ	_	_	5	amod	_	_
5	coffee	_	NOUN	_	_	3	obj	_	_



 [*] Do trees match? True


In [13]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD, D_2P_GREED

# conllu_sample = "# sent_id = 1\n"+\
# "# text = The owls are not what they seem.\n"+\
# "1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
# "2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
# "3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
# "4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
# "5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
# "6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
# "7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
# "8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

# conllu_sample = "# sent_id = 1\n"+\
# "# text = Apple Inc plans to open a store in Berlin\n"+\
# "1\tApple\tApple\tPROPN\tNNP\t_\t2\tnsubj\t_\t_\n"+\
# "2\tInc\tInc\tPROPN\tNNP\t_\t3\tflat\t_\t_\n"+\
# "3\tplans\tplan\tVERB\tVBZ\t_\t5\tcomp\t_\t_\n"+\
# "4\tto\tto\tPART\tTO\t_\t5\taux\t_\t_\n"+\
# "5\topen\topen\tVERB\tVB\t_\t0\troot\t_\t_\n"+\
# "6\ta\ta\tDET\tDT\t_\t7\tdet\t_\t_\n"+\
# "7\tstore\tstore\tNOUN\tNN\t_\t5\tdobj\t_\t_\n"+\
# "8\tin\tin\tADP\tIN\t_\t5\tprep\t_\t_\n"+\
# "9\tBerlin\tBerlin\tPROPN\tNNP\t_\t8\tpobj\t_\t_\n"

conllu_sample = "# sent_id = 1\n"+\
"# text = Agent Cooper loves black coffee\n"+\
"1\tAgent\tagent\tNOUN\tNN\tNumber=Sing\t2\tcompound\t_\t_\n"+\
"2\tCooper\tCooper\tPROPN\tNNP\tNumber=Sing\t3\tnsubj\t_\t_\n"+\
"3\tloves\tlove\tVERB\tVBZ\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tblack\tblack\tADJ\tJJ\tDegree=Pos\t5\tamod\t_\t_\n"+\
"5\tcoffee\tcoffee\tNOUN\tNN\tNumber=Sing\t3\tobj\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)
    
print("\n[*] Encoding:")
encoder = D_BrkBasedEncoding(separator="_", displacement=False)
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	Agent	agent	NOUN	NN	Number=Sing	2	compound	_	_
2	Cooper	Cooper	PROPN	NNP	Number=Sing	3	nsubj	_	_
3	loves	love	VERB	VBZ	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	black	black	ADJ	JJ	Degree=Pos	5	amod	_	_
5	coffee	coffee	NOUN	NN	Number=Sing	3	obj	_	_



[*] Encoding:
Dependency Bracketing Based Encoding

[*] Linearized tree:
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
Agent	NOUN	Sing	_	_	_	_	_	_	_	_	<_compound
Cooper	PROPN	Sing	_	_	_	_	_	_	_	_	\<_nsubj
loves	VERB	_	Ind	_	Pres	_	_	Fin	_	_	\/_root
black	ADJ	_	_	_	_	_	_	_	_	_	<_amod
coffee	NOUN	Sing	_	_	_	_	_	_	_	_	\>_obj
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
1	Agent	_	NOUN	_	_	2	compound	_	_
2	Cooper	_	PROPN	_	_	3	nsubj	_	_
3	loves	_	VERB	_	_	0	root	_	_
4	black	_	ADJ	_	_	5	amod	_	_
5	coffee	_	NOUN	_	_	3	obj	_	_



 [*] Do trees match? True


In [2]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD, D_2P_GREED

# conllu_sample = "# sent_id = 1\n"+\
# "# text = The owls are not what they seem.\n"+\
# "1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
# "2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
# "3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
# "4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
# "5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
# "6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
# "7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
# "8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

conllu_sample = "# sent_id = 1\n"+\
"# text = Agent Cooper loves black coffee.\n"+\
"1\tAgent\tagent\tNOUN\tNN\tNumber=Sing\t2\tcompound\t_\t_\n"+\
"2\tCooper\tCooper\tPROPN\tNNP\tNumber=Sing\t3\tnsubj\t_\t_\n"+\
"3\tloves\tlove\tVERB\tVBZ\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tblack\tblack\tADJ\tJJ\tDegree=Pos\t5\tamod\t_\t_\n"+\
"5\tcoffee\tcoffee\tNOUN\tNN\tNumber=Sing\t3\tobj\t_\t_\n"+\
"6\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)
    
print("\n[*] Encoding:")
encoder = D_Brk4BitsEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(["".join([str(x) for x in y]) for y in encoder.labels_to_bits(ld_tree.labels)])
print(ld_tree.to_string(f_idx_dict))


print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
d_tree.remove_dummy()
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	Agent	agent	NOUN	NN	Number=Sing	2	compound	_	_
2	Cooper	Cooper	PROPN	NNP	Number=Sing	3	nsubj	_	_
3	loves	love	VERB	VBZ	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	black	black	ADJ	JJ	Degree=Pos	5	amod	_	_
5	coffee	coffee	NOUN	NN	Number=Sing	3	obj	_	_
6	.	.	PUNCT	.	_	3	punct	_	_



[*] Encoding:
Dependency Bracketing 4-Bits Encoding

[*] Linearized tree:
['0001', '0100', '0110', '1111', '0100', '1010', '1100']
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
-ROOT-	-ROOT-	_	_	_	_	_	_	_	_	_	/_-NOREL-
Agent	NOUN	Sing	_	_	_	_	_	_	_	_	<*_compound
Cooper	PROPN	Sing	_	_	_	_	_	_	_	_	\<*_nsubj
loves	VERB	_	Ind	_	Pres	_	_	Fin	_	_	\>*/_root
black	ADJ	_	_	_	_	_	_	_	_	_	<*_amod
coffee	NOUN	Sing	_	_	_	_	_	_	_	_	\>_obj
.	PUNCT	_	_	_	_	_	_	_	_	_	>*_punct
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
1	Agent	_	NOUN	_	_	2	compound	_	_
2	Cooper	_	PROPN	_	_	3	nsubj	_	_
3	loves	_	

In [3]:
from codelin.models.deps_tree import D_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD, D_2P_GREED

# conllu_sample = "# sent_id = 1\n"+\
# "# text = The owls are not what they seem.\n"+\
# "1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
# "2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
# "3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
# "4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
# "5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
# "6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
# "7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
# "8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

# conllu_sample = "# sent_id = 1\n"+\
# "# text = Agent Cooper loves black coffee.\n"+\
# "1\tAgent\tagent\tNOUN\tNN\tNumber=Sing\t2\tcompound\t_\t_\n"+\
# "2\tCooper\tCooper\tPROPN\tNNP\tNumber=Sing\t3\tnsubj\t_\t_\n"+\
# "3\tloves\tlove\tVERB\tVBZ\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
# "4\tblack\tblack\tADJ\tJJ\tDegree=Pos\t5\tamod\t_\t_\n"+\
# "5\tcoffee\tcoffee\tNOUN\tNN\tNumber=Sing\t3\tobj\t_\t_\n"+\
# "6\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"


conllu_sample = "# sent_id = 1\n"+\
"# text = Agent Cooper loves black coffee.\n"+\
"1\tAgent\tagent\tNOUN\tNN\tNumber=Sing\t2\tcompound\t_\t_\n"+\
"2\tCooper\tCooper\tPROPN\tNNP\tNumber=Sing\t3\tnsubj\t_\t_\n"+\
"3\tloves\tlove\tVERB\tVBZ\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tblack\tblack\tADJ\tJJ\tDegree=Pos\t5\tamod\t_\t_\n"+\
"5\tcoffee\tcoffee\tNOUN\tNN\tNumber=Sing\t3\tobj\t_\t_\n"+\
"6\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

f_idx_dict={"Number":0,"Mood":1,"PronType":2,"Tense":3,"VerbForm":4, "Person":5, "VerbForm":6, "Definite":7, "Case":8}

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)
    
print("\n[*] Encoding:")
encoder = D_Brk7BitsEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(["".join([str(x) for x in y]) for y in encoder.labels_to_bits(ld_tree.labels)])

print(ld_tree.to_string(f_idx_dict))

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
d_tree.remove_dummy()
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))


[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	Agent	agent	NOUN	NN	Number=Sing	2	compound	_	_
2	Cooper	Cooper	PROPN	NNP	Number=Sing	3	nsubj	_	_
3	loves	love	VERB	VBZ	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	black	black	ADJ	JJ	Degree=Pos	2	amod	_	_
5	coffee	coffee	NOUN	NN	Number=Sing	3	obj	_	_
6	.	.	PUNCT	.	_	3	punct	_	_



[*] Encoding:
Dependency Bracketing 7-Bits Encoding

[*] Linearized tree:
['0000100', '1010000', '1011001', '0011100', '0110000', '0000000', '0010000']
-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-	-BOS-
-ROOT-	-ROOT-	_	_	_	_	_	_	_	_	_	/0_-NOREL-
Agent	NOUN	Sing	_	_	_	_	_	_	_	_	<0*_compound
Cooper	PROPN	Sing	_	_	_	_	_	_	_	_	\0<0*/1_nsubj
loves	VERB	_	Ind	_	Pres	_	_	Fin	_	_	\0>0*/0_root
black	ADJ	_	_	_	_	_	_	_	_	_	>1*_amod
coffee	NOUN	Sing	_	_	_	_	_	_	_	_	>0_obj
.	PUNCT	_	_	_	_	_	_	_	_	_	>0*_punct
-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-	-EOS-


[*] Decoded tree:
1	Agent	_	NOUN	_	_	2	compound	_	_
2	Cooper	_	

In [1]:
from codelin.models.deps_tree import D_Tree
from codelin.models.const_tree import C_Tree
from codelin.encs.enc_deps import *
from codelin.utils.constants import D_ROOT_HEAD, D_2P_GREED

# conllu_sample = "# sent_id = 1\n"+\
# "# text = The owls are not what they seem.\n"+\
# "1\tThe\tthe\tDET\tDT\tDefinite=Def|PronType=Art\t2\tdet\t_\t_\n"+\
# "2\towls\towl\tNOUN\tNNS\tNumber=Plur\t3\tnsubj\t_\t_\n"+\
# "3\tare\tbe\tAUX\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
# "4\tnot\tnot\tPART\tRB\t_\t3\tadvmod\t_\t_\n"+\
# "5\twhat\twhat\tPRON\tWP\tPronType=Int\t6\tnsubj\t_\t_\n"+\
# "6\tthey\tthey\tPRON\tPRP\tCase=Nom|Number=Plur|Person=3|PronType=Prs\t3\tparataxis\t_\t_\n"+\
# "7\tseem\tseem\tVERB\tVBP\tMood=Ind|Tense=Pres|VerbForm=Fin\t6\tccomp\t_\t_\n"+\
# "8\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

conllu_sample = "# sent_id = 1\n"+\
"# text = Agent Cooper loves black coffee.\n"+\
"1\tAgent\tagent\tNOUN\tNN\tNumber=Sing\t2\tcompound\t_\t_\n"+\
"2\tCooper\tCooper\tPROPN\tNNP\tNumber=Sing\t3\tnsubj\t_\t_\n"+\
"3\tloves\tlove\tVERB\tVBZ\tMood=Ind|Tense=Pres|VerbForm=Fin\t0\troot\t_\t_\n"+\
"4\tblack\tblack\tADJ\tJJ\tDegree=Pos\t5\tamod\t_\t_\n"+\
"5\tcoffee\tcoffee\tNOUN\tNN\tNumber=Sing\t3\tobj\t_\t_\n"+\
"6\t.\t.\tPUNCT\t.\t_\t3\tpunct\t_\t_"

print("\n[*] Original tree:")
d_tree = D_Tree.from_string(conllu_sample)
print(d_tree)
print(D_Tree.to_latex(d_tree))

print("\n[*] Binary head tree:")
bht = D_Tree.to_bht(d_tree)
print(bht)
    
print("\n[*] Encoding:")
encoder = D_HexatagEncoding(separator="_")
print(encoder)

print("\n[*] Linearized tree:")
ld_tree = encoder.encode(d_tree)
print(ld_tree)

print("\n[*] Decoded tree:")
dc_tree = encoder.decode(ld_tree)
dc_tree.postprocess_tree(search_root_strat=D_ROOT_HEAD, allow_multi_roots=False)
print(dc_tree)

# matching using heads
d_tree.remove_dummy()
print("\n [*] Do trees match?",(dc_tree.get_heads()==d_tree.get_heads()))

  from .autonotebook import tqdm as notebook_tqdm



[*] Original tree:
0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	Agent	agent	NOUN	NN	Number=Sing	2	compound	_	_
2	Cooper	Cooper	PROPN	NNP	Number=Sing	3	nsubj	_	_
3	loves	love	VERB	VBZ	Mood=Ind|Tense=Pres|VerbForm=Fin	0	root	_	_
4	black	black	ADJ	JJ	Degree=Pos	5	amod	_	_
5	coffee	coffee	NOUN	NN	Number=Sing	3	obj	_	_
6	.	.	PUNCT	.	_	3	punct	_	_


\begin{dependency}[theme = simple]
\begin{deptext}[row sep=.25em, column sep=1.5em]
0 \& 1 \& 2 \& 3 \& 4 \& 5 \& 6 \\ 
-ROOT- \& Agent \& Cooper \& loves \& black \& coffee \& . \\ 
\end{deptext}
\depedge{3}{2}{compound}
\depedge{4}{3}{nsubj}
\depedge{1}{4}{root}
\depedge{6}{5}{amod}
\depedge{4}{6}{obj}
\depedge{4}{7}{punct}
\end{dependency}


[*] Binary head tree:
(L (-ROOT- -ROOT-) (L (L (R (R (NOUN Agent) (PROPN Cooper)) (VERB loves)) (R (ADJ black) (NOUN coffee))) (PUNCT .)))

[*] Encoding:
Dependency Hexa-Tags Encoding

[*] Linearized tree:
(L (-NOREL- (-ROOT- -ROOT-)) (L (L (R (R (compound (NOUN Agent)) (nsubj (PROPN Cooper))) (root (VERB loves)