Permalink
Fetching contributors…
Cannot retrieve contributors at this time
89 lines (68 sloc) 1.8 KB
chunk?(
@key sent_id bigint,
@key word_id bigint,
tag text
).
tags(tag text).
# input data ##################################################################
words(
sent_id bigint,
word_id bigint,
word text,
pos text,
true_tag text
).
word_embedding(
word text,
vec float[]
).
vec_dims(idx int).
word_vec(wid, vec):-
words(s, wid, word, pos, true_tag),
word_embedding(word_lower, vec),
word_lower = lower(word).
# supervision / scope of tags #################################################
chunk(s, word, tag) = (
if tag = true_tag then TRUE else FALSE end
) :-
words(s, word, _, pos, true_tag),
tags(tag).
# unigram features ###############################################################
@name("current word")
@weight(tag, word)
chunk(s, w, tag) :-
words(s, w, word, pos, _).
@name("current pos tag")
@weight(tag, pos)
chunk(s, w, tag) :-
words(s, w, word, pos, _).
@name("current and next pos")
@weight(tag, pos1, pos2)
chunk(s, w1, tag) :-
w2 = w1 + 1,
words(s, w1, word1, pos1, _),
words(s, w2, word2, pos2, _).
@name("current and prev pos")
@weight(tag, pos1, pos2)
chunk(s, w2, tag) :-
w2 = w1 + 1,
words(s, w1, word1, pos1, _),
words(s, w2, word2, pos2, _).
# bigram features ###############################################################
@name("linear chain")
@weight(tag1, tag2)
chunk(s, w1, tag1) ^ chunk(s, w2, tag2) :- w2 = w1 + 1.
@name("linear chain by pos")
@weight(tag1, tag2, pos1, pos2)
chunk(s, w1, tag1) ^ chunk(s, w2, tag2) :-
w2 = w1 + 1,
words(s, w1, word1, pos1, _),
words(s, w2, word2, pos2, _).
# word embedding features ###############################################################
@name("word embedding")
@weight(tag, k)
@feature_value(vec[k])
chunk(s, w, tag) :-
words(s, w, word, pos, _),
word_vec(s, vec),
vec_dims(k).