-
Notifications
You must be signed in to change notification settings - Fork 542
/
application.conf
87 lines (70 loc) · 2.59 KB
/
application.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
deepdive {
db.default {
driver: "org.postgresql.Driver"
url : "jdbc:postgresql://"${PGHOST}":"${PGPORT}"/"${DBNAME}
user : ${PGUSER}
password : ${PGPASSWORD}
dbname : ${DBNAME}
host : ${PGHOST}
port : ${PGPORT}
}
# Put your variables here
schema.variables {
words.tag: Categorical(13)
}
# Put your extractors here
extraction.extractors {
# extract training data
ext_training {
style: "tsv_extractor"
input: "select * from words_raw"
output_relation: "words"
udf: ${APP_HOME}"/udf/ext_training.py"
}
# add features
ext_features.style: "tsv_extractor"
ext_features.input: """
select w1.word_id as "w1.word_id", w1.word as "w1.word", w1.pos as "w1.pos",
w2.word as "w2.word", w2.pos as "w2.pos"
from words w1, words w2
where w1.word_id = w2.word_id + 1 and w1.word is not null"""
ext_features.output_relation: "word_features"
ext_features.udf: ${APP_HOME}"/udf/ext_features.py"
ext_features.dependencies: ["ext_training"]
}
# Put your inference rules here
inference.factors {
factor_feature {
input_query: """select words.id as "words.id", words.tag as "words.tag", word_features.feature as "feature"
from words, word_features
where words.word_id = word_features.word_id and words.word is not null"""
function: "Multinomial(words.tag)"
weight: "?(feature)"
}
factor_linear_chain_crf {
input_query: """select w1.id as "words.w1.id", w2.id as "words.w2.id", w1.tag as "words.w1.tag", w2.tag as "words.w2.tag"
from words w1, words w2
where w2.word_id = w1.word_id + 1"""
function: "Multinomial(words.w1.tag, words.w2.tag)"
weight: "?"
}
factor_skip_chain_crf {
input_query: """select *
from
(select w1.id as "words.w1.id", w2.id as "words.w2.id", w1.tag as "words.w1.tag", w2.tag as "words.w2.tag",
row_number() over (partition by w1.id) as rn
from words w1, words w2
where w1.tag is not null and w1.sent_id = w2.sent_id and w1.word = w2.word and w1.id < w2.id) scrf
where scrf.rn = 1"""
function: "Multinomial(words.w1.tag, words.w2.tag)"
weight: "?"
}
}
# pipeline.run: "inference"
pipeline.pipelines.ext: ["ext_training", "ext_features"]
pipeline.pipelines.inference: ["factor_feature", "factor_linear_chain_crf", "factor_skip_chain_crf"]
# Specify a holdout fraction
calibration: {
holdout_query: "INSERT INTO dd_graph_variables_holdout(variable_id) SELECT id FROM words WHERE word_id > 220663"
}
}