/
app.ddlog
73 lines (63 loc) · 2.01 KB
/
app.ddlog
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
chunk?(
@key sent_id bigint,
@key word_id bigint,
tag text
).
tags(tag text).
# input data ##################################################################
words_raw(
sent_id bigint,
word_id bigint,
word text,
pos text,
true_tag text
).
words(
sent_id bigint,
word_id bigint,
word text,
pos text,
true_tag text,
tag text
).
# supervision / scope of tags #################################################
chunk(s, word, tag) = (
if tag = tag_supervised then TRUE
else NULL
end
) :-
words(s, word, _, pos, _, tag_supervised),
tags(tag).
# TODO the following if-then-else can go directly to the supervision rule once DDlog supports true unification (tag_supervised = if ... end in the body)
words(sent_id, word_id, word, pos, true_tag,
if true_tag = "B-UCP" then NULL
else if true_tag = "I-UCP" then NULL
else if strpos(true_tag, "-") > 0 then split_part(true_tag, "-", 2)
else if true_tag = "O" then "O"
else NULL
end) :-
words_raw(sent_id, word_id, word, pos, true_tag).
# features ####################################################################
word_features(
sent_id bigint,
word_id bigint,
feature text
).
function ext_features
over (sent_id bigint, word_id1 bigint, word1 text, pos1 text, word2 text, pos2 text)
returns rows like word_features
implementation "udf/ext_features.py" handles tsv lines.
word_features +=
ext_features(s, w1, word1, pos1, word2, pos2) :-
words(s, w1, word1, pos1, _, _),
words(s, w2, word2, pos2, _, _),
[w1 = w2 + 1],
word1 IS NOT NULL.
@weight(f, tag)
chunk(s, w, tag) :- word_features(s, w, f).
# linear chains ###############################################################
@weight(tag1, tag2)
chunk(s, w1, tag1) ^ chunk(s, w2, tag2) :- w2 = w1 + 1.
# skip chain ##################################################################
@weight(tag1, tag2)
chunk(s, w1, tag1) ^ chunk(s, w2, tag2) :- w1 < w2.