-
Notifications
You must be signed in to change notification settings - Fork 26
/
Jenkinsfile
221 lines (197 loc) · 10.8 KB
/
Jenkinsfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
pipeline {
agent any
environment {
BUILDSTARTDATE = sh(script: "echo `date +%Y%m%d`", returnStdout: true).trim()
// Distribution ID for the AWS CloudFront for this branch,
// used solely for invalidations
AWS_CLOUDFRONT_DISTRIBUTION_ID = 'EUVSWXZQBXCFP'
}
options {
timestamps()
}
stages {
// Very first: pause for a minute to give a chance to
// cancel and clean the workspace before use.
stage('Ready and clean') {
steps {
// Give us a minute to cancel if we want.
sleep time: 1, unit: 'MINUTES'
cleanWs()
}
}
stage('Initialize') {
steps {
// Start preparing environment.
parallel(
"Report": {
sh 'env > env.txt'
sh 'echo $BRANCH_NAME > branch.txt'
sh 'echo "$BRANCH_NAME"'
sh 'cat env.txt'
sh 'cat branch.txt'
sh "echo $BUILDSTARTDATE > dow.txt"
sh "echo $BUILDSTARTDATE"
})
}
}
stage('Build kg_covid_19') {
steps {
dir('./gitrepo') {
git(
url: 'https://github.com/Knowledge-Graph-Hub/kg-covid-19',
branch: env.BRANCH_NAME
)
sh '/usr/bin/python3.7 -m venv venv'
sh '. venv/bin/activate'
sh './venv/bin/pip install -r requirements.txt'
sh './venv/bin/pip install .'
}
}
}
stage('Download') {
steps {
dir('./gitrepo') {
script {
def run_py_dl = sh(
script: '. venv/bin/activate && python3.7 run.py download', returnStatus: true
)
if (run_py_dl == 0) {
if (env.BRANCH_NAME != 'master') { // upload raw to s3 if we're on correct branch
echo "Will not push if not on correct branch."
} else {
withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) {
sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text --cf-invalidate put -r data/raw s3://kg-hub-public-data/'
}
}
} else { // 'run.py download' failed - let's try to download last good copy of raw/ from s3 to data/
withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) {
sh 'rm -fr data/raw || true;'
sh 'mkdir -p data/raw || true'
sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/ data/raw/'
}
}
}
}
}
}
stage('Transform') {
steps {
dir('./gitrepo') {
sh 'env'
sh '. venv/bin/activate && env && python3.7 run.py transform'
}
}
}
stage('Merge') {
steps {
dir('./gitrepo') {
sh '. venv/bin/activate && python3.7 run.py merge'
sh 'env'
sh 'cp merged_graph_stats.yaml merged_graph_stats_$BUILDSTARTDATE.yaml'
sh 'tar -rvf data/merged/merged-kg.tar merged_graph_stats_$BUILDSTARTDATE.yaml'
}
}
}
stage('Make blazegraph journal'){
steps {
dir('./gitrepo/blazegraph') {
git(
url: 'https://github.com/balhoff/blazegraph-runner.git',
branch: 'master'
)
sh 'sbt stage'
sh 'pigz -d ../data/merged/merged-kg.nt.gz'
sh 'export JAVA_OPTS=-Xmx128G && ./target/universal/stage/bin/blazegraph-runner load --informat=ntriples --journal=../merged-kg.jnl --use-ontology-graph=true ../data/merged/merged-kg.nt'
sh 'pigz ../merged-kg.jnl'
sh 'pigz ../data/merged/merged-kg.nt'
}
}
}
stage('Publish') {
steps {
dir('./gitrepo') {
script {
// code for building s3 index files
sh 'git clone https://github.com/justaddcoffee/go-site.git'
// make sure we aren't going to clobber existing data on S3
withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) {
REMOTE_BUILD_DIR_CONTENTS = sh (
script: 's3cmd -c $S3CMD_CFG ls s3://kg-hub-public-data/$BUILDSTARTDATE/',
returnStdout: true.trim()
)
echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'"
if("${REMOTE_BUILD_DIR_CONTENTS}" != ''){
echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE"
sh 'exit 1'
} else {
echo "remote directory $BUILDSTARTDATE is empty, proceeding"
}
}
if (env.BRANCH_NAME != 'master') {
echo "Will not push if not on correct branch."
} else {
withCredentials([
file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG'),
file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON'),
string(credentialsId: 'aws_kg_hub_access_key', variable: 'AWS_ACCESS_KEY_ID'),
string(credentialsId: 'aws_kg_hub_secret_key', variable: 'AWS_SECRET_ACCESS_KEY')]) {
//
// make $BUILDSTARTDATE/ directory and sync to s3 bucket
//
sh 'mkdir $BUILDSTARTDATE/'
sh 'cp -p data/merged/merged-kg.nt.gz $BUILDSTARTDATE/kg-covid-19.nt.gz'
sh 'cp -p data/merged/merged-kg.tar.gz $BUILDSTARTDATE/kg-covid-19.tar.gz'
sh 'cp -p merged-kg.jnl.gz $BUILDSTARTDATE/kg-covid-19.jnl.gz'
// transformed data
sh 'rm -fr data/transformed/.gitkeep'
sh 'cp -pr data/transformed $BUILDSTARTDATE/'
sh 'cp -pr data/raw $BUILDSTARTDATE/'
sh 'cp Jenkinsfile $BUILDSTARTDATE/'
// stats dir
sh 'mkdir $BUILDSTARTDATE/stats/'
sh 'cp -p *_stats.yaml $BUILDSTARTDATE/stats/'
//
// put $BUILDSTARTDATE/ in s3 bucket
//
sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE -x -u'
sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/'
// make current/ directory
sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/current -x -u'
sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/current/'
// Build the top level index.html
// "External" packages required to run these
// scripts.
sh './venv/bin/pip install pystache boto3'
sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html'
sh 's3cmd -c $S3CMD_CFG put --acl-public --mime-type=text/html --cf-invalidate top-level-index.html s3://kg-hub-public-data/index.html'
// Invalidate the CDN now that the new
// files are up.
sh './venv/bin/pip install awscli'
sh 'echo "[preview]" > ./awscli_config.txt && echo "cloudfront=true" >> ./awscli_config.txt'
sh '. venv/bin/activate && AWS_CONFIG_FILE=./awscli_config.txt python3.7 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_DISTRIBUTION_ID --paths "/*"'
// Should now appear at:
// https://kg-hub.berkeleybop.io/[artifact name]
}
}
}
}
}
}
stage('Deploy blazegraph') {
when { anyOf { branch 'master' } }
steps {
git([branch: 'master',
credentialsId: 'justaddcoffee_github_api_token_username_pw',
url: 'https://github.com/geneontology/operations.git'])
dir('./ansible') {
withCredentials([file(credentialsId: 'ansible-bbop-local-slave', variable: 'DEPLOY_LOCAL_IDENTITY')]) {
echo 'Push master out to public Blazegraph'
retry(3){
sh 'ansible-playbook update-kg-hub-endpoint.yaml --inventory=hosts.local-rdf-endpoint --private-key="$DEPLOY_LOCAL_IDENTITY" -e target_user=bbop --extra-vars="endpoint=internal"'
}
}
}
}
}
}
}