-
Notifications
You must be signed in to change notification settings - Fork 1
/
dvc.jsonnet
52 lines (45 loc) · 1.24 KB
/
dvc.jsonnet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
local bd = import './lib.jsonnet';
local subpipes = {
'loc-mds': import 'loc-mds/dvc.jsonnet',
openlibrary: import 'openlibrary/dvc.jsonnet',
viaf: import 'viaf/dvc.jsonnet',
az2014: import 'az2014/dvc.jsonnet',
az2018: import 'az2018/dvc.jsonnet',
bx: import 'bx/dvc.jsonnet',
goodreads: import 'goodreads/dvc.jsonnet',
'book-links': import 'book-links/dvc.jsonnet',
};
local parquets = [
std.strReplace(out, '.parquet', '')
for dir in std.objectFields(subpipes)
for stage in std.objectValues(subpipes[dir].stages)
for out in bd.stageOuts(dir, stage)
if std.endsWith(out, '.parquet')
];
local notebook = function(name, deps=[]) {
cmd: std.format('quarto render %s.qmd --to html', name),
deps: [
name + '.qmd',
] + deps,
outs: [
{ [name + '.ipynb']: { cache: false } },
name + '.html',
name + '_files',
],
};
bd.pipeline({
ClusterStats: notebook('ClusterStats', ['book-links/cluster-stats.parquet']),
LinkageStats: notebook('LinkageStats', [
'book-links/gender-stats.csv',
]),
schema: {
foreach: parquets,
do: {
cmd: bd.cmd('pq-info -o ${item}.json ${item}.parquet'),
deps: ['${item}.parquet'],
outs: [
{ '${item}.json': { cache: false } },
],
},
},
})