# Evaluation of the Dependency Parser

My modified Direct Attentive Dependency Parser that takes into account Latin Sentence Embeddings.

In [1]:
from diaparser.parsers import Parser
from spacy import displacy

In [14]:
model_path = "./exp/sbert-combined-sbatch/model"

## Example Sentences

In [15]:
parser = Parser.load(model_path)

Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


mlp_size - no sbert 800
with sbert 1056


Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


In [19]:
dataset = parser.predict('cedant arma togae, concedat laurea laudi.'.split(" "), prob=True)

In [20]:
dataset.sentences

[1	cedant	_	_	_	_	0	root	_	_
 2	arma	_	_	_	_	1	nsubj	_	_
 3	togae,	_	_	_	_	1	obl:arg	_	_
 4	concedat	_	_	_	_	1	conj	_	_
 5	laurea	_	_	_	_	4	nsubj	_	_
 6	laudi.	_	_	_	_	4	obl:arg	_	_]

In [21]:
sent = dataset.sentences[0]
displacy.render(sent.to_displacy(), style='dep', manual=True, options={'compact': True, 'distance': 120})

In [24]:
dataset = parser.predict('Vtinam tibi istam mentem di immortales duint!'.split(" "), prob=True)
print(dataset.sentences)
sent = dataset.sentences[0]
displacy.render(sent.to_displacy(), style='dep', manual=True, options={'compact': True, 'distance': 120})

[1	Vtinam	_	_	_	_	7	advmod	_	_
2	tibi	_	_	_	_	7	obl:arg	_	_
3	istam	_	_	_	_	4	det	_	_
4	mentem	_	_	_	_	7	obj	_	_
5	di	_	_	_	_	7	nsubj	_	_
6	immortales	_	_	_	_	5	amod	_	_
7	duint!	_	_	_	_	0	root	_	_
]


Start of paddington:

In [8]:
paddington_data = parser.predict('Dominus Brunnus et Domina Brunna primum paddingtoni occurrerunt in crepidine ferroviaria.'.split(" "), prob=True)
sent = paddington_data.sentences[0]
print(sent)
displacy.render(sent.to_displacy(), style='dep', manual=True, options={'compact': True, 'distance': 120})

1	Dominus	_	_	_	_	8	nsubj	_	_
2	Brunnus	_	_	_	_	1	appos	_	_
3	et	_	_	_	_	4	cc	_	_
4	Domina	_	_	_	_	1	conj	_	_
5	Brunna	_	_	_	_	4	appos	_	_
6	primum	_	_	_	_	8	advmod	_	_
7	paddingtoni	_	_	_	_	8	obl	_	_
8	occurrerunt	_	_	_	_	0	root	_	_
9	in	_	_	_	_	10	case	_	_
10	crepidine	_	_	_	_	8	obl	_	_
11	ferroviaria.	_	_	_	_	10	amod	_	_



In [9]:
paddington_data = parser.predict('Gallia est omnis divisa in partes tres'.split(" "), prob=True)
sent = paddington_data.sentences[0]
print(sent)
displacy.render(sent.to_displacy(), style='dep', manual=True, options={'compact': True, 'distance': 120})

1	Gallia	_	_	_	_	4	nsubj:pass	_	_
2	est	_	_	_	_	4	aux:pass	_	_
3	omnis	_	_	_	_	1	det	_	_
4	divisa	_	_	_	_	0	root	_	_
5	in	_	_	_	_	6	case	_	_
6	partes	_	_	_	_	4	obl	_	_
7	tres	_	_	_	_	6	nummod	_	_



### Evaluation against datasets

This isn't particularly useful because it's possible/likely that sentences from the dev and test sets ended up in the combined datasets.

In [2]:
ittb_dev = "../data/ITTB/la_ittb-ud-dev.conllu"
dante_dev = "../data/Dante/la_udante-ud-dev.conllu"
late_dev = "../data/Late/la_llct-ud-dev.conllu"
perseus_test = "../data/Perseus/la_perseus-ud-test.conllu"
proiel_dev = "../data/PROIEL/la_proiel-ud-dev.conllu"

In [11]:
print("ITTB dev:")
loss, metric = parser.evaluate(ittb_dev)
print("Dante dev:")
loss, metric = parser.evaluate(dante_dev)
print("Late dev:")
loss, metric = parser.evaluate(late_dev)
print("Perseus test:")
loss, metric = parser.evaluate(perseus_test)
print("PROIEL dev:")
loss, metric = parser.evaluate(proiel_dev)

2024-02-07 14:25:30 INFO Loading the data


ITTB dev:


2024-02-07 14:25:31 INFO 
Dataset(n_sentences=2101, n_batches=10, n_buckets=8)
2024-02-07 14:25:31 INFO Evaluating the dataset
2024-02-07 14:25:33 INFO loss: 0.2426 - UCM: 76.39% LCM: 64.49% UAS: 96.34% LAS: 94.02%
2024-02-07 14:25:33 INFO 0:00:02.057792s elapsed, 1021.00 Sents/s
2024-02-07 14:25:33 INFO Loading the data


Dante dev:


2024-02-07 14:25:34 INFO 
Dataset(n_sentences=376, n_batches=8, n_buckets=8)
2024-02-07 14:25:34 INFO Evaluating the dataset
2024-02-07 14:25:35 INFO loss: 0.5023 - UCM: 42.29% LCM: 17.82% UAS: 92.96% LAS: 86.45%
2024-02-07 14:25:35 INFO 0:00:00.835834s elapsed, 449.85 Sents/s
2024-02-07 14:25:35 INFO Loading the data


Late dev:


2024-02-07 14:25:36 INFO 
Dataset(n_sentences=850, n_batches=8, n_buckets=8)
2024-02-07 14:25:36 INFO Evaluating the dataset
2024-02-07 14:25:38 INFO loss: 0.1065 - UCM: 84.59% LCM: 75.29% UAS: 98.73% LAS: 97.64%
2024-02-07 14:25:38 INFO 0:00:01.661290s elapsed, 511.65 Sents/s
2024-02-07 14:25:38 INFO Loading the data


Perseus test:


2024-02-07 14:25:38 INFO 
Dataset(n_sentences=939, n_batches=8, n_buckets=8)
2024-02-07 14:25:38 INFO Evaluating the dataset
2024-02-07 14:25:39 INFO loss: 0.5619 - UCM: 70.07% LCM: 47.07% UAS: 93.47% LAS: 86.83%
2024-02-07 14:25:39 INFO 0:00:01.084739s elapsed, 865.65 Sents/s
2024-02-07 14:25:39 INFO Loading the data


PROIEL dev:


2024-02-07 14:25:40 INFO 
Dataset(n_sentences=1233, n_batches=8, n_buckets=8)
2024-02-07 14:25:40 INFO Evaluating the dataset
2024-02-07 14:25:41 INFO loss: 0.4200 - UCM: 73.48% LCM: 59.69% UAS: 94.86% LAS: 91.49%
2024-02-07 14:25:41 INFO 0:00:01.294438s elapsed, 952.54 Sents/s


Evaluating the same dev sets on the prior state of the art parser:

In [3]:
prior_sota_parser = Parser.load("la_ittb_llct.mbert")
print("Prior SOTA Parser")
print("ITTB dev:")
loss, metric = prior_sota_parser.evaluate(ittb_dev)
print("Dante dev:")
loss, metric = prior_sota_parser.evaluate(dante_dev)
print("Late dev:")
loss, metric = prior_sota_parser.evaluate(late_dev)
print("Perseus test:")
loss, metric = prior_sota_parser.evaluate(perseus_test)
print("PROIEL dev:")
loss, metric = prior_sota_parser.evaluate(proiel_dev)

mlp_size - no sbert 800


Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.
2024-02-08 10:11:45 INFO Loading the data


Prior SOTA Parser
ITTB dev:


2024-02-08 10:11:47 INFO 
Dataset(n_sentences=2101, n_batches=9, n_buckets=8)
2024-02-08 10:11:47 INFO Evaluating the dataset
2024-02-08 10:11:48 INFO loss: 6.7166 - UCM: 42.22% LCM: 11.99% UAS: 82.95% LAS: 67.08%
2024-02-08 10:11:48 INFO 0:00:01.565019s elapsed, 1342.48 Sents/s
2024-02-08 10:11:48 INFO Loading the data


Dante dev:


2024-02-08 10:11:49 INFO 
Dataset(n_sentences=376, n_batches=8, n_buckets=8)
2024-02-08 10:11:49 INFO Evaluating the dataset
2024-02-08 10:11:50 INFO loss: 6.0850 - UCM:  6.65% LCM:  0.00% UAS: 69.66% LAS: 50.99%
2024-02-08 10:11:50 INFO 0:00:00.786271s elapsed, 478.21 Sents/s
2024-02-08 10:11:50 INFO Loading the data


Late dev:


2024-02-08 10:11:51 INFO 
Dataset(n_sentences=850, n_batches=8, n_buckets=8)
2024-02-08 10:11:51 INFO Evaluating the dataset
2024-02-08 10:11:52 INFO loss: 3.6616 - UCM: 32.24% LCM: 15.06% UAS: 90.54% LAS: 85.40%
2024-02-08 10:11:52 INFO 0:00:01.081532s elapsed, 785.92 Sents/s
2024-02-08 10:11:52 INFO Loading the data


Perseus test:


2024-02-08 10:11:52 INFO 
Dataset(n_sentences=939, n_batches=8, n_buckets=8)
2024-02-08 10:11:52 INFO Evaluating the dataset
2024-02-08 10:11:53 INFO loss: 4.5610 - UCM: 22.79% LCM:  4.90% UAS: 63.47% LAS: 44.85%
2024-02-08 10:11:53 INFO 0:00:00.664938s elapsed, 1412.16 Sents/s
2024-02-08 10:11:53 INFO Loading the data


PROIEL dev:


2024-02-08 10:11:54 INFO 
Dataset(n_sentences=1233, n_batches=8, n_buckets=8)
2024-02-08 10:11:54 INFO Evaluating the dataset
2024-02-08 10:11:55 INFO loss: 4.8382 - UCM: 27.41% LCM:  6.65% UAS: 70.25% LAS: 51.31%
2024-02-08 10:11:55 INFO 0:00:00.913485s elapsed, 1349.78 Sents/s


Notes:



## Predict Test Data

In [12]:
test_prose = "../test_data/EvaLatin_2024_prose-test-data.conllu"
test_poetry = "../test_data/EvaLatin_2024_poetry_test_data.conllu"
output_prose = "../test_pred/parsing_EvaLatin_2024_prose-test-data_Behr_1.conllu"
output_poetry = "../test_pred/parsing_EvaLatin_2024_poetry_test_data_Behr_1.conllu"

In [13]:
parser.predict(test_prose,pred=output_prose,text=None)

Dataset(n_sentences=299, n_batches=8, n_buckets=8)

In [14]:
parser.predict(test_poetry,pred=output_poetry,text=None)

Dataset(n_sentences=555, n_batches=8, n_buckets=8)

In [19]:
tmp_sent = "soror Tonantis hoc enim solum mihi nomen relictum est semper alienum Iouem ac templa summi uidua deserui aetheris locumque caelo pulsa paelicibus dedi tellus colenda est paelices caelum tenent"
dataset = parser.predict(tmp_sent.split(" "),probs=True)

In [22]:
sent = dataset.sentences[0]
print(sent)
displacy.render(sent.to_displacy(), style='dep', manual=True, options={'compact': True, 'distance': 120})

1	soror	_	_	_	_	8	vocative	_	_
2	Tonantis	_	_	_	_	1	nmod	_	_
3	hoc	_	_	_	_	7	det	_	_
4	enim	_	_	_	_	8	discourse	_	_
5	solum	_	_	_	_	7	amod	_	_
6	mihi	_	_	_	_	8	obl	_	_
7	nomen	_	_	_	_	8	nsubj:pass	_	_
8	relictum	_	_	_	_	0	root	_	_
9	est	_	_	_	_	8	aux:pass	_	_
10	semper	_	_	_	_	11	advmod	_	_
11	alienum	_	_	_	_	12	amod	_	_
12	Iouem	_	_	_	_	17	obj	_	_
13	ac	_	_	_	_	17	cc	_	_
14	templa	_	_	_	_	12	conj	_	_
15	summi	_	_	_	_	18	amod	_	_
16	uidua	_	_	_	_	14	amod	_	_
17	deserui	_	_	_	_	8	conj	_	_
18	aetheris	_	_	_	_	14	nmod	_	_
19	locumque	_	_	_	_	23	obj	_	_
20	caelo	_	_	_	_	21	obl	_	_
21	pulsa	_	_	_	_	23	advcl	_	_
22	paelicibus	_	_	_	_	21	obl	_	_
23	dedi	_	_	_	_	8	conj	_	_
24	tellus	_	_	_	_	25	nsubj	_	_
25	colenda	_	_	_	_	8	conj	_	_
26	est	_	_	_	_	25	cop	_	_
27	paelices	_	_	_	_	29	nsubj	_	_
28	caelum	_	_	_	_	29	obj	_	_
29	tenent	_	_	_	_	8	conj	_	_



## Evaluation of Dev Data


In [2]:
model_path = "./exp/sbert-combined-sbatch/model"

In [3]:
combined_dev = "../combined/dev.conllu"

In [4]:
parser = Parser.load(model_path)

Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


mlp_size - no sbert 800
with sbert 1056


Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


In [5]:
loss, metric = parser.evaluate(combined_dev)

2024-02-13 15:08:15 INFO Loading the data
2024-02-13 15:08:23 INFO 
Dataset(n_sentences=8984, n_batches=34, n_buckets=8)
2024-02-13 15:08:23 INFO Evaluating the dataset
2024-02-13 15:08:35 INFO loss: 0.4774 - UCM: 60.72% LCM: 46.42% UAS: 92.91% LAS: 89.37%
2024-02-13 15:08:35 INFO 0:00:11.070687s elapsed, 811.51 Sents/s


In [6]:
second_parser = Parser.load('la_ittb_llct.mbert')

mlp_size - no sbert 800


Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


In [7]:
loss, metric = second_parser.evaluate(combined_dev)

2024-02-13 15:08:37 INFO Loading the data
2024-02-13 15:08:45 INFO 
Dataset(n_sentences=8984, n_batches=35, n_buckets=8)
2024-02-13 15:08:45 INFO Evaluating the dataset
2024-02-13 15:08:53 INFO loss: 5.6423 - UCM: 39.87% LCM:  8.47% UAS: 83.52% LAS: 69.73%
2024-02-13 15:08:53 INFO 0:00:08.119031s elapsed, 1106.54 Sents/s


## Evaluation on the LT2024 Test Data

In [13]:
model_path = "./exp/sbert-combined-sbatch/model"

In [9]:
gold_prose = "../test_data/EvaLatin_2024_prose_gold.conllu"
gold_poetry = "../test_data/EvaLatin_2024_poetry_gold.conllu"

In [14]:
parser = Parser.load(model_path)

Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


mlp_size - no sbert 800
with sbert 1056


Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


In [5]:
loss, metric = parser.evaluate(gold_prose)

2024-03-09 12:13:50 INFO Loading the data
2024-03-09 12:13:50 INFO 
Dataset(n_sentences=299, n_batches=8, n_buckets=8)
2024-03-09 12:13:50 INFO Evaluating the dataset
2024-03-09 12:13:51 INFO loss: 2.6546 - UCM: 14.38% LCM:  3.68% UAS: 77.49% LAS: 64.15%
2024-03-09 12:13:51 INFO 0:00:00.643499s elapsed, 464.65 Sents/s


In [6]:
loss, metric = parser.evaluate(gold_poetry)

2024-03-09 12:13:52 INFO Loading the data
2024-03-09 12:13:53 INFO 
Dataset(n_sentences=555, n_batches=8, n_buckets=8)
2024-03-09 12:13:53 INFO Evaluating the dataset
2024-03-09 12:13:53 INFO loss: 2.7063 - UCM: 35.50% LCM: 15.14% UAS: 75.81% LAS: 59.67%
2024-03-09 12:13:53 INFO 0:00:00.697075s elapsed, 796.18 Sents/s


In [7]:
second_parser = Parser.load('la_ittb_llct.mbert')

mlp_size - no sbert 800


Downloading tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


In [8]:
loss, metric = second_parser.evaluate(gold_prose)

2024-03-09 12:14:03 INFO Loading the data
2024-03-09 12:14:03 INFO 
Dataset(n_sentences=299, n_batches=8, n_buckets=8)
2024-03-09 12:14:03 INFO Evaluating the dataset
2024-03-09 12:14:03 INFO loss: 7.9201 - UCM:  2.34% LCM:  0.00% UAS: 53.91% LAS: 35.57%
2024-03-09 12:14:03 INFO 0:00:00.414106s elapsed, 722.04 Sents/s


In [9]:
loss, metric = second_parser.evaluate(gold_poetry)

2024-03-09 12:14:05 INFO Loading the data
2024-03-09 12:14:06 INFO 
Dataset(n_sentences=299, n_batches=8, n_buckets=8)
2024-03-09 12:14:06 INFO Evaluating the dataset
2024-03-09 12:14:06 INFO loss: 7.8765 - UCM:  2.34% LCM:  0.00% UAS: 53.91% LAS: 35.57%
2024-03-09 12:14:06 INFO 0:00:00.412864s elapsed, 724.21 Sents/s


In [10]:
third_parser = Parser.load('./exp/no-sbert-combined/model')

mlp_size - no sbert 800


Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


In [10]:
loss, metric = third_parser.evaluate(gold_prose)

2024-03-09 17:32:43 INFO Loading the data
2024-03-09 17:32:43 INFO 
Dataset(n_sentences=299, n_batches=8, n_buckets=8)
2024-03-09 17:32:43 INFO Evaluating the dataset
2024-03-09 17:32:44 INFO loss: 3.3226 - UCM: 14.38% LCM:  3.68% UAS: 77.39% LAS: 63.18%
2024-03-09 17:32:44 INFO 0:00:00.326076s elapsed, 916.96 Sents/s


In [11]:
loss, metric = third_parser.evaluate(gold_poetry)

2024-03-09 17:32:46 INFO Loading the data
2024-03-09 17:32:46 INFO 
Dataset(n_sentences=555, n_batches=8, n_buckets=8)
2024-03-09 17:32:46 INFO Evaluating the dataset
2024-03-09 17:32:47 INFO loss: 2.8925 - UCM: 33.87% LCM: 13.69% UAS: 75.65% LAS: 59.33%
2024-03-09 17:32:47 INFO 0:00:00.431658s elapsed, 1285.74 Sents/s


In [17]:
third_parser.evaluate("../combined/dev.conllu")

2024-03-09 17:43:40 INFO Loading the data
2024-03-09 17:43:48 INFO 
Dataset(n_sentences=8984, n_batches=35, n_buckets=8)
2024-03-09 17:43:48 INFO Evaluating the dataset
2024-03-09 17:43:54 INFO loss: 0.4778 - UCM: 60.99% LCM: 46.50% UAS: 92.94% LAS: 89.43%
2024-03-09 17:43:54 INFO 0:00:05.670316s elapsed, 1584.39 Sents/s


(0.4778144657611847, UCM: 60.99% LCM: 46.50% UAS: 92.94% LAS: 89.43%)

In [16]:
parser.evaluate("../combined/dev.conllu")

2024-03-09 17:43:17 INFO Loading the data
2024-03-09 17:43:25 INFO 
Dataset(n_sentences=8984, n_batches=36, n_buckets=8)
2024-03-09 17:43:25 INFO Evaluating the dataset
2024-03-09 17:43:36 INFO loss: 0.4830 - UCM: 60.83% LCM: 46.66% UAS: 92.92% LAS: 89.38%
2024-03-09 17:43:36 INFO 0:00:11.173303s elapsed, 804.06 Sents/s


(0.4830123285452525, UCM: 60.83% LCM: 46.66% UAS: 92.92% LAS: 89.38%)

## Get total output of the gold prose and poetry 
Want to get the total output of the gold prose and poetry from the other models, so that it can be compared with the first model

In [2]:
test_prose = "../test_data/EvaLatin_2024_prose-test-data.conllu"
test_poetry = "../test_data/EvaLatin_2024_poetry_test_data.conllu"
output_prose_two = "../test_pred/parser_2_prose.conllu"
output_poetry_two = "../test_pred/parser_2_poetry.conllu"

In [3]:
second_parser = Parser.load('la_ittb_llct.mbert')

mlp_size - no sbert 800


Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


In [4]:
second_parser.predict(test_prose,pred=output_prose_two,text=None)
second_parser.predict(test_poetry,pred=output_poetry_two,text=None)



Dataset(n_sentences=555, n_batches=8, n_buckets=8)

In [5]:
third_parser = Parser.load('./exp/no-sbert-combined/model')

mlp_size - no sbert 800


Using bos_token, but it is not set yet.
Using eos_token, but it is not set yet.


In [6]:
output_prose_three = "../test_pred/parser_3_prose.conllu"
output_poetry_three = "../test_pred/parser_3_poetry.conllu"

In [7]:
third_parser.predict(test_prose,pred=output_prose_three,text=None)
third_parser.predict(test_poetry,pred=output_poetry_three,text=None)

Dataset(n_sentences=555, n_batches=8, n_buckets=8)