-
Notifications
You must be signed in to change notification settings - Fork 89
/
opusTCv20210807_transformer-big_2022-09-15.yml
82 lines (82 loc) · 2.67 KB
/
opusTCv20210807_transformer-big_2022-09-15.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
release: heb-sla/opusTCv20210807_transformer-big_2022-09-15.zip
release-date: 2022-09-15
dataset-name: opusTCv20210807
modeltype: transformer-big
vocabulary:
source: opusTCv20210807.spm32k-spm32k.vocab.yml
target: opusTCv20210807.spm32k-spm32k.vocab.yml
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
source: spm32k
target: spm32k
subword-models:
source: source.spm
target: target.spm
source-languages:
- heb
target-languages:
- bel
- bel_Latn
- bul
- ces
- pol
- rus
- slv
- ukr
raw-source-languages:
- heb
raw-target-languages:
- bel
- bul
- ces
- pol
- rus
- slv
- ukr
use-target-labels:
training-data:
heb-bel: Tatoeba-train-v2021-08-07.bel-heb.strict (375943)
heb-bel_Latn: Tatoeba-train-v2021-08-07.bel_Latn-heb.strict (835)
heb-bul: Tatoeba-train-v2021-08-07.bul-heb.strict (26171981)
heb-ces: Tatoeba-train-v2021-08-07.ces-heb.strict (29695074)
heb-pol: Tatoeba-train-v2021-08-07.heb-pol.strict (33243574)
heb-rus: Tatoeba-train-v2021-08-07.heb-rus.strict (24998215)
heb-slv: Tatoeba-train-v2021-08-07.heb-slv.strict (16712836)
heb-ukr: Tatoeba-train-v2021-08-07.heb-ukr.strict (3376606)
validation-data:
bel-heb: Tatoeba-dev-v2021-08-07, 969
bul-heb: Tatoeba-dev-v2021-08-07, 1000
ces-heb: Tatoeba-dev-v2021-08-07, 1003
heb-pol: Tatoeba-dev-v2021-08-07, 7727
heb-rus: Tatoeba-dev-v2021-08-07, 3546
heb-slv: Tatoeba-dev-v2021-08-07, 1000
heb-ukr: Tatoeba-dev-v2021-08-07, 988
total-size-shuffled: 6948
devset-selected: top 5000 lines of Tatoeba-dev-v2021-08-07.src.shuffled
test-data:
Tatoeba-test-v2021-08-07.heb-bel: 52/302
Tatoeba-test-v2021-08-07.heb-bul: 1/4
Tatoeba-test-v2021-08-07.heb-ces: 34/181
Tatoeba-test-v2021-08-07.heb-pol: 5000/31462
Tatoeba-test-v2021-08-07.heb-rus: 2500/16481
Tatoeba-test-v2021-08-07.heb-slv: 2/8
Tatoeba-test-v2021-08-07.heb-ukr: 966/5175
Tatoeba-test-v2021-08-07.heb-multi: 8555/53756
BLEU-scores:
Tatoeba-test-v2021-08-07.heb-bel: 34.8
Tatoeba-test-v2021-08-07.heb-bul: 100.0
Tatoeba-test-v2021-08-07.heb-ces: 36.5
Tatoeba-test-v2021-08-07.heb-pol: 42.7
Tatoeba-test-v2021-08-07.heb-rus: 40.4
Tatoeba-test-v2021-08-07.heb-slv: 61.8
Tatoeba-test-v2021-08-07.heb-ukr: 37.6
Tatoeba-test-v2021-08-07.heb-multi: 41.6
chr-F-scores:
Tatoeba-test-v2021-08-07.heb-bel: 0.52606
Tatoeba-test-v2021-08-07.heb-bul: 10.00000
Tatoeba-test-v2021-08-07.heb-ces: 0.66213
Tatoeba-test-v2021-08-07.heb-pol: 0.63458
Tatoeba-test-v2021-08-07.heb-rus: 0.60154
Tatoeba-test-v2021-08-07.heb-slv: 0.76894
Tatoeba-test-v2021-08-07.heb-ukr: 0.58660
Tatoeba-test-v2021-08-07.heb-multi: 0.62078