-
Notifications
You must be signed in to change notification settings - Fork 542
/
deepdive-compile
executable file
·202 lines (183 loc) · 6.8 KB
/
deepdive-compile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env bash
# deepdive-compile -- Compiles DeepDive source code into executables under run/
# $ deepdive compile [-v]...
# Adding -v flags will display more verbose logs.
#
# $ deepdive compile -q
# $ export DEEPDIVE_LOG_LEVEL=0
# $ deepdive compile
# Using the -q flag or setting the environment variable will quiet the
# compilation process.
##
# Author: Jaeho Shin <netj@cs.stanford.edu>
# Created: 2015-11-03
set -euo pipefail
: ${DEEPDIVE_LOG_LEVEL:=1}
: ${DEEPDIVE_CONFIG_EXTRA:=} # deepdive.conf can be augmented with extra content
: ${DEEPDIVE_COMPILE_INPUT_JSON:=} # compiling app.ddlog and merging deepdive.conf and schema.json can be skipped by specifying a precompiled deepdive.conf.json
: ${DEEPDIVE_COMPILE_SKIP_DATAFLOW:=} ${DEEPDIVE_COMPILE_SKIP_CODEGEN:=} # some codegen steps can be disabled
while getopts "vq" o; do
case $o in
v)
let ++DEEPDIVE_LOG_LEVEL
;;
q)
DEEPDIVE_LOG_LEVEL=0
;;
esac
done
DEEPDIVE_APP=$(find-deepdive-app)
export DEEPDIVE_APP
cd "$DEEPDIVE_APP"
mkdir -p run
# create a fresh working directory for compiling
compileDir=$(date +%Y%m%d/%H%M%S.%N)
mkdir -p "run/$compileDir"
exec 3>&2 # keep original stderr dup as fd 3
# log all the compilation steps and stderr with timestamps
if [[ $DEEPDIVE_LOG_LEVEL -gt 1 ]]; then
exec 2> >(logging-with-ts run/"$compileDir"/compile.log >&2)
else
exec 2> >(logging-with-ts run/"$compileDir"/compile.log >/dev/null)
fi
STEP() { echo "$@"; } >&2
# bookkeep some symlinks upon exit and abort
cleanup() {
cd "$DEEPDIVE_APP"
[[ ! run/RUNNING.COMPILE -ef run/"$compileDir" ]] || rm -f run/RUNNING.COMPILE
} >&3
abort() {
local exitstatus=${1:-$?} # given or get the last command's
cd "$DEEPDIVE_APP"
[[ ! -e run/"$compileDir" ]] || ln -sfnv "$compileDir" run/ABORTED.COMPILE >&2
[[ $DEEPDIVE_LOG_LEVEL -gt 1 ]] || error-from-file run/"$compileDir"/compile.log 2>&3 || true
exit $exitstatus
}
trap cleanup EXIT
trap abort ERR HUP QUIT INT TERM
ln -sfnv "$compileDir" run/LATEST.COMPILE >&2
ln -sfnv "$compileDir" run/RUNNING.COMPILE >&2
# a shorthand for waiting and checking exit status of child processes
waitall() { shift; local pid=; for pid; do wait $pid || abort $?; done; }
# augment PATH to access compiler components
PATH="$DEEPDIVE_HOME"/util/compile-config:"$PATH"
PATH="$DEEPDIVE_HOME"/util/compile-code:"$PATH"
export DEEPDIVE_ERROR_PREFIX="[ERROR] "
###############################################################################
## record which version of DeepDive compiled this
deepdive version >run/"$compileDir"/deepdive.version
## record the environment used for compilation
env >run/"$compileDir"/environ
###############################################################################
## compile a full deepdive.conf from source code
if ! [[ -e $DEEPDIVE_COMPILE_INPUT_JSON ]]; then
STEP "Parsing DeepDive application ($DEEPDIVE_APP) to generate:"
STEP " run/compiled/deepdive.conf"
touch run/"$compileDir"/deepdive.conf
{
# user's schema.json
if [[ -r schema.json && -s schema.json ]]; then
STEP " from schema.json"
echo -n 'deepdive.schema '
cat schema.json
fi
# compile DDlog rules
if [[ -r app.ddlog ]]; then
STEP " from app.ddlog"
ddlog compile app.ddlog
fi
# append user's deepdive.conf
if [[ -r deepdive.conf ]]; then
STEP " from deepdive.conf"
cat deepdive.conf
fi
# any extra config present in DEEPDIVE_CONFIG_EXTRA environment gets more priority
if [[ -n "$DEEPDIVE_CONFIG_EXTRA" ]]; then
STEP " from \$DEEPDIVE_CONFIG_EXTRA"
echo "$DEEPDIVE_CONFIG_EXTRA"
fi
} >run/"$compileDir"/deepdive.conf
###############################################################################
cd run/"$compileDir"
# produce a JSON from compiled deepdive.conf
STEP " run/compiled/deepdive.conf.json"
(
# XXX some bogus variables in deepdive.conf added by DDlog compiler or in deepdive-default.conf
APP_HOME='"$DEEPDIVE_APP"'
PIPELINE=
PARALLELISM=1
INPUT_BATCH_SIZE=100000
export APP_HOME PIPELINE PARALLELISM INPUT_BATCH_SIZE
# convert HOCON into JSON
hocon2json \
deepdive.conf \
"$DEEPDIVE_HOME"/etc/deepdive-default.conf
) >deepdive.conf.json
else
STEP "Loading precompiled DeepDive application from $DEEPDIVE_COMPILE_INPUT_JSON to generate:"
STEP " run/compiled/deepdive.conf.json"
cp -pf "$DEEPDIVE_COMPILE_INPUT_JSON" run/"$compileDir"/deepdive.conf.json
cd run/"$compileDir"
fi
###############################################################################
STEP "Performing sanity checks on run/compiled/deepdive.conf.json:"
deepdive-check -a -c "$PWD"/deepdive.conf.json 'input_*' 2>&1 | sed 's/^/ /' >&2
STEP "Normalizing and adding built-in processes to the data flow to compile:"
config=deepdive.conf.json
for cc in "$DEEPDIVE_HOME"/util/compile-config/compile-config-*; do
[[ -x "$cc" ]] || continue
cc=${cc##*/}
name=${cc#compile-config-}
output=config-$name.json
STEP " run/compiled/$output"
"$cc" "$config" >"$output"
config=$output # process the output with the next config compiler
done
STEP " run/compiled/config.json"
ln -sfn "$config" config.json
STEP "Validating run/compiled/config.json:"
deepdive-check -a -c "$PWD"/config.json 'compiled_*' 2>&1 | sed 's/^/ /' >&2
###############################################################################
STEP "Compiling executable code into:"
# compile extractors under process/
pids=(--)
for cc in "$DEEPDIVE_HOME"/util/compile-code/compile-code-*; do
[[ -x "$cc" ]] || continue
cc=${cc##*/}
name=${cc#compile-code-}
STEP " run/compiled/code-$name.json"
"$cc" config.json >code-$name.json & pids+=($!)
done
waitall "${pids[@]}"
STEP "Validating run/compiled/code-*.json:"
deepdive-check -a -c <(cat code-*.json) 'codegen_*' 2>&1 | sed 's/^/ /' >&2
# mark everything compiled as read-only
chmod a-w *
###############################################################################
if [[ -z $DEEPDIVE_COMPILE_SKIP_CODEGEN ]]; then
# generate actual code under run/
cd "$DEEPDIVE_APP"/run
STEP "Generating files:"
cat "$compileDir"/code-*.json |
compile-codegen
# some postprocessing over generated files
pids=(--)
if dot -Tpdf -o /dev/null <<<"digraph {}"; then
dot -Tpdf -o dataflow.pdf <dataflow.dot & pids+=($!)
STEP " run/dataflow.pdf"
STEP " (file://$(pwd -P)/dataflow.pdf)" # XXX a handy file: URL for opening in a browser
fi
if dot -Tsvg -o /dev/null <<<"digraph {}"; then
dot -Tsvg -o dataflow.svg <dataflow.dot & pids+=($!)
STEP " run/dataflow.svg"
STEP " (file://$(pwd -P)/dataflow.svg)" # XXX a handy file: URL for opening in a browser
fi
waitall "${pids[@]}"
fi
###############################################################################
# point to the successfully compiled one, leaving the previous one as backup
{
cd "$DEEPDIVE_APP"
[[ ! -e run/compiled ]] || mv -f --no-target-directory run/compiled run/compiled~
ln -sfnv "$compileDir" run/compiled
}