## Dependency Linearization Playground

In [1]:
from codelin.encs.enc_deps import *
from codelin.models.deps_tree import D_Tree
from codelin.models.linearized_tree import LinearizedTree

deps_treebank = "./ptb-dev.conllx"
trees = D_Tree.read_conllu_file(deps_treebank, filter_projective=True)

sample_tree = trees[0]
print(sample_tree)
print(D_Tree.to_latex(sample_tree))

0	-ROOT-	_	-ROOT-	_	_	0	-NOREL-	_	_
1	Influential	_	JJ	JJ	[None]	2	amod	_	_
2	members	_	NNS	NNS	[None]	10	nsubj	_	_
3	of	_	IN	IN	[None]	2	prep	_	_
4	the	_	DT	DT	[None]	6	det	_	_
5	House	_	NNP	NNP	[None]	6	nn	_	_
6	Ways	_	NNP	NNP	[None]	3	pobj	_	_
7	and	_	CC	CC	[None]	6	cc	_	_
8	Means	_	NNP	NNP	[None]	9	nn	_	_
9	Committee	_	NNP	NNP	[None]	6	conj	_	_
10	introduced	_	VBD	VBD	[None]	0	root	_	_
11	legislation	_	NN	NN	[None]	10	dobj	_	_
12	that	_	WDT	WDT	[None]	14	nsubj	_	_
13	would	_	MD	MD	[None]	14	aux	_	_
14	restrict	_	VB	VB	[None]	11	rcmod	_	_
15	how	_	WRB	WRB	[None]	22	advmod	_	_
16	the	_	DT	DT	[None]	20	det	_	_
17	new	_	JJ	JJ	[None]	20	amod	_	_
18	savings-and-loan	_	JJ	JJ	[None]	20	nn	_	_
19	bailout	_	NN	NN	[None]	20	nn	_	_
20	agency	_	NN	NN	[None]	22	nsubj	_	_
21	can	_	MD	MD	[None]	22	aux	_	_
22	raise	_	VB	VB	[None]	14	ccomp	_	_
23	capital	_	NN	NN	[None]	22	dobj	_	_
24	,	_	,	,	[None]	14	punct	_	_
25	creating	_	VBG	VBG	[None]	14	xcomp	_	_
26	another	_	DT	DT	[None]	28	det	_	_
27	potenti

<div style="text-align:center"><img src="./pics/notebooks/d_tree_1.png" /></div>

### Encode with 4-bits encoding

In [2]:
enc_4b = D_Brk4BitsEncoding(separator="_")
lin_tree = enc_4b.encode(sample_tree)
print(lin_tree)
dec_tree = enc_4b.decode(lin_tree)
print("LAS =",dec_tree.las_score(sample_tree))

-BOS-	-BOS-	-BOS-
-ROOT-	-ROOT-	/_-NOREL-
Influential	JJ	<*_amod
members	NNS	\<*/_nsubj
of	IN	>*/_prep
the	DT	<*_det
House	NNP	<_nn
Ways	NNP	\>*/_pobj
and	CC	>_cc
Means	NNP	<*_nn
Committee	NNP	\>*_conj
introduced	VBD	\>*/_root
legislation	NN	>/_dobj
that	WDT	<*_nsubj
would	MD	<_aux
restrict	VB	\>*/_rcmod
how	WRB	<*_advmod
the	DT	<*_det
new	JJ	<_amod
savings-and-loan	JJ	<_nn
bailout	NN	<_nn
agency	NN	\<_nsubj
can	MD	<_aux
raise	VB	\>/_ccomp
capital	NN	>*_dobj
,	,	>_punct
creating	VBG	>*/_xcomp
another	DT	<*_det
potential	JJ	<_amod
obstacle	NN	\>*/_dobj
to	TO	>*/_prep
the	DT	<*_det
government	NN	\<*/_poss
's	POS	>*_possessive
sale	NN	\>*/_pobj
of	IN	>*/_prep
sick	JJ	<*_amod
thrifts	NNS	\>*_pobj
.	.	>*_punct
-EOS-	-EOS-	-EOS-

LAS = 1.0


### Test for all trees

In [3]:
enc_4b = D_Brk4BitsEncoding(separator="_")
for i, sample_tree in enumerate(trees):
    lin_tree = enc_4b.encode(sample_tree)
    dec_tree = enc_4b.decode(lin_tree)
    las = dec_tree.las_score(sample_tree)
    
    if las != 1:
        print("Error at tree",i,"length",len(sample_tree))
        print("LAS =",dec_tree.las_score(sample_tree))