In [1]:
from astred import *
from astred.enum import *

In [2]:
en_words = [("Sometimes", 3, "advmod"), ("she", 3, "nsubj"), ("asks", 0, "root"), ("me", 3, "obj"), ("why", 7, "advmod"),
            ("I", 7, "nsubj"), ("used", 3, "ccomp"), ("to", 9, "mark"), ("call", 7, "xcomp"), ("her", 11, "nmod"),
            ("father", 9, "obj"), ("Harold", 9, "xcomp"), (".", 3, "punct")]
sent_en = Sentence([Word(id=idx, text=w[0], head=w[1], deprel=w[2]) for idx, w in enumerate(en_words, 1)], side=Side.SRC)

nl_words = [("Soms", 2, "advmod"), ("vraagt", 0, "root"), ("ze", 2, "nsubj"),  ("waarom", 9, "advmod"),
            ("ik", 9, "nsubj"), ("haar", 7, "nmod"), ("vader", 9, "obj"), ("Harold", 9, "xcomp"), ("noemde", 2, "xcomp"),  (".", 2, "punct")]
sent_nl = Sentence([Word(id=idx, text=w[0], head=w[1], deprel=w[2]) for idx, w in enumerate(nl_words, 1)], side=Side.TGT)

aligns = "0-0 1-2 2-1 4-3 5-4 6-8 7-8 8-8 9-5 10-6 11-7 12-9"
aligned = AlignedSentences(sent_en, sent_nl, word_aligns=aligns)

[(0, 0, False), (1, 1, False), (2, 3, False), (3, 2, False), (7, 9, False), (10, 6, False), (13, 10, False)]
[[Null(id=0, is_null=True, side=src)], [Word(id=1, is_null=False, side=src)], [Word(id=2, is_null=False, side=src)], [Word(id=3, is_null=False, side=src)], [Word(id=7, is_null=False, side=src), Word(id=8, is_null=False, side=src), Word(id=9, is_null=False, side=src)], [Word(id=10, is_null=False, side=src), Word(id=11, is_null=False, side=src)], [Word(id=13, is_null=False, side=src)]]
[[Null(id=0, is_null=True, side=tgt)], [Word(id=1, is_null=False, side=tgt)], [Word(id=3, is_null=False, side=tgt)], [Word(id=2, is_null=False, side=tgt)], [Word(id=9, is_null=False, side=tgt)], [Word(id=6, is_null=False, side=tgt), Word(id=7, is_null=False, side=tgt)], [Word(id=10, is_null=False, side=tgt)]]


In [3]:
print("word_cross", aligned.word_cross)
print("seq_cross", aligned.seq_cross)
print("sacr_cross", aligned.sacr_cross)

word_cross 10
seq_cross 2
sacr_cross 3


In [4]:
for word in sent_en:
	if not word.is_null:
		print("text", word.text)
		print("word_cross", word.cross)
		print("seq_cross", word.seq_group.cross, "ID in group:", word.id_in_seq_group)
		print("sacr_cross", word.sacr_group.cross, "ID in group:", word.id_in_sacr_group)
		print()

text Sometimes
word_cross 0
seq_cross 0 ID in group: 0
sacr_cross 0 ID in group: 0

text she
word_cross 1
seq_cross 1 ID in group: 0
sacr_cross 1 ID in group: 0

text asks
word_cross 1
seq_cross 1 ID in group: 0
sacr_cross 1 ID in group: 0

text me
word_cross 0
seq_cross 0 ID in group: 0
sacr_cross 0 ID in group: 0

text why
word_cross 0
seq_cross 0 ID in group: 0
sacr_cross 0 ID in group: 0

text I
word_cross 0
seq_cross 0 ID in group: 1
sacr_cross 0 ID in group: 0

text used
word_cross 3
seq_cross 1 ID in group: 0
sacr_cross 2 ID in group: 0

text to
word_cross 3
seq_cross 1 ID in group: 1
sacr_cross 2 ID in group: 1

text call
word_cross 3
seq_cross 1 ID in group: 2
sacr_cross 2 ID in group: 2

text her
word_cross 3
seq_cross 1 ID in group: 0
sacr_cross 1 ID in group: 0

text father
word_cross 3
seq_cross 1 ID in group: 1
sacr_cross 1 ID in group: 1

text Harold
word_cross 3
seq_cross 1 ID in group: 2
sacr_cross 1 ID in group: 0

text .
word_cross 0
seq_cross 0 ID in group: 0
sacr_c

In [5]:
for span in sent_en.seq_spans:
	if not span.is_null:
		print(span.text, span.is_valid_subtree)
print()
for span in sent_en.sacr_spans:
	if not span.is_null:
		print(span.text, span.is_valid_subtree)

Sometimes True
she True
asks True
me True
why I False
used to call True
her father Harold False
. True

Sometimes True
she True
asks True
me True
why True
I True
used to call True
her father True
Harold True
. True


In [6]:
for word in sent_en:
	if not word.is_null:
		print(f"total changes for {word.text, word.deprel}: {word.num_changes()}")
		for tgt_idx, change in word.changes().items():
			tgt = sent_nl[tgt_idx]
			print(tgt.text, tgt.deprel, change)
		print()

total changes for ('Sometimes', 'advmod'): 0
Soms advmod False

total changes for ('she', 'nsubj'): 0
ze nsubj False

total changes for ('asks', 'root'): 0
vraagt root False

total changes for ('me', 'obj'): 1
[[NULL]] None True

total changes for ('why', 'advmod'): 0
waarom advmod False

total changes for ('I', 'nsubj'): 0
ik nsubj False

total changes for ('used', 'ccomp'): 1
noemde xcomp True

total changes for ('to', 'mark'): 1
noemde xcomp True

total changes for ('call', 'xcomp'): 0
noemde xcomp False

total changes for ('her', 'nmod'): 0
haar nmod False

total changes for ('father', 'obj'): 0
vader obj False

total changes for ('Harold', 'xcomp'): 0
Harold xcomp False

total changes for ('.', 'punct'): 0
. punct False



In [7]:
en_words = [("Does", 3, "aux"), ("he", 3, "nsubj"), ("believe", 0, "root"), ("in", 5, "case"), ("love", 3, "obl"),
            ("?", 3, "punct")]
sent_en = Sentence([Word(id=idx, text=w[0], head=w[1], deprel=w[2]) for idx, w in enumerate(en_words, 1)], side=Side.SRC)

nl_words = [("Gelooft", 0, "root"), ("hij", 1, "nsubj"), ("in", 5, "case"),  ("de", 5, "det"),
            ("liefde", 1, "obl"), ("?", 1, "punct")]
sent_nl = Sentence([Word(id=idx, text=w[0], head=w[1], deprel=w[2]) for idx, w in enumerate(nl_words, 1)], side=Side.TGT)

aligns = "0-0 1-1 2-0 3-2 4-3 4-4 5-5"
aligned = AlignedSentences(sent_en, sent_nl, word_aligns=aligns)

[(0, 0, False), (1, 1, False), (2, 2, False), (4, 3, False), (5, 4, False)]
[[Null(id=0, is_null=True, side=src)], [Word(id=1, is_null=False, side=src)], [Word(id=2, is_null=False, side=src)], [Word(id=4, is_null=False, side=src), Word(id=5, is_null=False, side=src)], [Word(id=5, is_null=False, side=src)]]
[[Null(id=0, is_null=True, side=tgt)], [Word(id=1, is_null=False, side=tgt)], [Word(id=2, is_null=False, side=tgt)], [Word(id=3, is_null=False, side=tgt), Word(id=4, is_null=False, side=tgt), Word(id=5, is_null=False, side=tgt)], [Word(id=4, is_null=False, side=tgt), Word(id=5, is_null=False, side=tgt)]]


In [8]:
print("ASTrED", aligned.ted)
for node1, node2 in aligned.ted_ops:
	node1_repr = node1.node.connected_repr if node1 else None
	node2_repr = node2.node.connected_repr if node2 else None

	print(node1_repr, "-->", node2_repr,
		  "---", node1.astred_op if node1 else EditOperation.INSERTION,
		  f"Cost {node1.astred_cost if node1 else aligned.ted_config.costs[EditOperation.INSERTION]}")

ASTrED 2
1.aux:1.root|3.root:1.root --> 1.aux:1.root|3.root:1.root --- match Cost 0
1.aux:1.root|3.root:1.root --> None --- deletion Cost 1
2.nsubj:2.nsubj --> 2.nsubj:2.nsubj --- match Cost 0
5.obl:4.det,5.obl --> 5.obl:4.det,5.obl --- match Cost 0
None --> 5.obl:4.det,5.obl --- insertion Cost 1
4.case:3.case --> 4.case:3.case --- match Cost 0
6.punct:6.punct --> 6.punct:6.punct --- match Cost 0


In [9]:
for word in sent_en:
	if not word.is_null:
		print(word.text, word.tree.astred_op)
print()
for word in sent_nl:
	if not word.is_null:
		print(word.text, word.tree.astred_op)

Does deletion
he match
believe match
in match
love match
? match

Gelooft match
hij match
in match
de deletion
liefde match
? match


In [10]:
for span in sent_en.no_null_seq_spans:
	print(span.text, span.tree.node.text if span.tree else None)
print()
for span in sent_en.sacr_spans:
	print(span.text, span.tree.node.text if span.tree else None, span.span_type)

Does Does
he he
believe believe
in love ? None

 None sacr
Does Does sacr
he he sacr
in love love sacr
love love sacr
? ? sacr
