-
Notifications
You must be signed in to change notification settings - Fork 1
/
restore.sh
executable file
·331 lines (297 loc) · 11.4 KB
/
restore.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#!/bin/bash
# Set defaults
default_args() {
declare -g -A ARGS
ARGS[SHARED_DIR]=/mnt/shared/backups
ARGS[ALPHA_DIR]=/mnt/alpha-browse
ARGS[AUTHORITY]=
ARGS[BIBLIO]=
ARGS[DB]=
ARGS[NODE]=
ARGS[VERBOSE]=0
}
default_args
# Script help text
runhelp() {
echo ""
echo "Usage: Runs a restore of the Solr, the database, and/or the alphabrowse"
echo " files from backups."
echo ""
echo "Examples:"
echo " ./restore.sh --biblio /path/to/biblio/snapshot.1.tar.gz"
echo " Restore the biblio Solr index using data from snapshot.1.tar.gz"
echo " ./restore.sh --alpha /path/to/alpha/20220101.tar"
echo " Restore the alphabrowse databases from 20220101.tar"
echo " ./restore.sh --db /path/to/20220101.tar"
echo " Restore the database using 20220101.tar backup"
echo " ./restore.sh --authority /path/to/authority/snapshot.1.tar.gz"
echo " Restore the authority Solr index using data from snapshot.1.tar.gz"
echo ""
echo "Flags:"
echo " -a/--authority"
echo " Full path to the authority Solr index backup to restore to"
echo " -b/--biblio"
echo " Full path to the biblio Solr index backup to restore to"
echo " -l|--alpha"
echo " Back up the Solr alphabrowse database files"
echo " -d/--db"
echo " Full path to the database backup to restore to"
echo " -n/--node"
echo " Node number to restore the database backup from"
echo " Default: 1"
echo " -p|--alpha-dir ALPHA_DIR"
echo " Full path to the alphabrowse database storage location."
echo " Default: ${ARGS[ALPHA_DIR]}"
echo " -s|--shared-dir SHARED_DIR"
echo " Full path to the shared storage location for backups to be stored."
echo " Default: ${ARGS[SHARED_DIR]}"
echo " -v/--verbose"
echo " Show verbose output"
}
if [[ -z "$1" || $1 == "-h" || $1 == "--help" || $1 == "help" ]]; then
runhelp
exit 0
fi
# Parse command arguments
parse_args() {
# Parse flag arguments
while [[ $# -gt 0 ]]; do
case $1 in
-l|--alpha)
ARGS[ALPHA]=$( readlink -f "$2" )
RC=$?
if [[ "$RC" -ne 0 || ! -f "${ARGS[ALPHA]}" ]]; then
echo "ERROR: -l|--alpha file does not exist: $2"
exit 1
fi
shift; shift ;;
-a|--authority)
ARGS[AUTHORITY]=$( readlink -f "$2" )
RC=$?
if [[ "$RC" -ne 0 || ! -f "${ARGS[AUTHORITY]}" ]]; then
echo "ERROR: -a|--authority file does not exist: $2"
exit 1
fi
shift; shift ;;
-b|--biblio)
ARGS[BIBLIO]=$( readlink -f "$2" )
RC=$?
if [[ "$RC" -ne 0 || ! -f "${ARGS[BIBLIO]}" ]]; then
echo "ERROR: -b|--biblio file does not exist: $2"
exit 1
fi
shift; shift ;;
-d|--db)
ARGS[DB]=$( readlink -f "$2" )
RC=$?
if [[ "$RC" -ne 0 || ! -f "${ARGS[DB]}" ]]; then
echo "ERROR: -d|--db file does not exist: $2"
exit 1
fi
shift; shift ;;
-s|--shared-dir)
ARGS[SHARED_DIR]=$( readlink -f "$2" )
RC=$?
if [[ "$RC" -ne 0 || ! -d "${ARGS[SHARED_DIR]}" ]]; then
echo "ERROR: -s|--shared-dir path does not exist: $2"
exit 1
fi
shift; shift ;;
-p|--alpha-dir)
ARGS[ALPHA_DIR]=$( readlink -f "$2" )
RC=$?
if [[ "$RC" -ne 0 || ! -d "${ARGS[ALPHA_DIR]}" ]]; then
echo "ERROR: -p|--alpha-dir path does not exist: $2"
exit 1
fi
shift; shift ;;
-n|--node)
ARGS[NODE]="$2"
if [[ ! "${ARGS[NODE]}" -gt 0 ]]; then
echo "ERROR: -n|--node only accept positive integers"
exit 1
fi
shift; shift ;;
-v|--verbose)
ARGS[VERBOSE]=1
shift;;
*)
echo "ERROR: Unknown flag: $1"
exit 1
esac
done
}
catch_invalid_args() {
if [[ -z "${ARGS[AUTHORITY]}" && -z "${ARGS[DB]}" && -z "${ARGS[BIBLIO]}" && -z "${ARGS[ALPHA]}" ]]; then
echo "ERROR: Neither --authority, --biblio, --alpha, or --db flag is set. Please select one or more to use this tool."
exit 1
fi
if [[ -z "${ARGS[DB]}" && -n "${ARGS[NODE]}" ]]; then
echo "ERROR: --node cannot be used without --db. Please see the --help message for more information."
exit 1
fi
}
# Print message if verbose is enabled
verbose() {
FORCE=$2
LOG_TS=$(date +%Y-%m-%d\ %H:%M:%S)
MSG="[${LOG_TS}] $1"
if [[ "${ARGS[VERBOSE]}" -eq 1 ]] || [[ "$FORCE" -eq 1 ]]; then
echo "${MSG}"
fi
echo "${MSG}" >> "$LOG_FILE"
}
restore_collection() {
# Select one Solr node to perform backups on
SOLR_NODES=(solr1 solr2 solr3)
SOLR_IDX="$(( RANDOM % ${#SOLR_NODES[@]} ))"
SOLR_NODE="${SOLR_NODES[$SOLR_IDX]}"
COLL="$1"
BACKUP_PATH="$2"
BACKUP_FILE="$(basename "${BACKUP_PATH}")"
BACKUP_FILE=${BACKUP_FILE//.tar.gz}
mkdir -p ${ARGS[SHARED_DIR]}/solr_dropbox/"${COLL}"/"${BACKUP_FILE}"
verbose "Extracting the backup ${BACKUP_PATH} to ${ARGS[SHARED_DIR]}/solr_dropbox/${COLL}"
if ! tar -xzf "${BACKUP_PATH}" -C "${ARGS[SHARED_DIR]}"/solr_dropbox/"${COLL}"; then
verbose "ERROR: could not extract ${BACKUP_PATH} to ${ARGS[SHARED_DIR]}/solr_dropbox/${COLL}" 1
exit 1
fi
chmod -R 777 ${ARGS[SHARED_DIR]}/solr_dropbox/
chown -R 1001:1001 ${ARGS[SHARED_DIR]}/solr_dropbox
# Trigger the backup in Solr
verbose "Starting restore of '${COLL}' index"
if ! curl "http://${SOLR_NODE}:8983/solr/${COLL}/replication?command=restore&location=/mnt/solr_backups/${COLL}&name=${BACKUP_FILE//snapshot.}" > /dev/null 2>&1; then
verbose "ERROR: Failed to trigger a restore of the '${COLL}' collection in Solr!" 1
exit 1
fi
# Wait until restore is complete
sleep 3
MAX_WAITS=500
CUR_WAIT=1
URL="http://${SOLR_NODE}:8983/solr/${COLL}/replication?command=restorestatus&wt=json"
STAT=""
ACTUAL=""
verbose "Waiting until restore is complete of ${BACKUP_FILE}"
while [[ "${STAT}" != *"success"* && "${BACKUP_FILE}" != "${ACTUAL}" ]]; do
if [ "$CUR_WAIT" -gt "$MAX_WAITS" ]; then
verbose "ERROR: Restore never completed for '${COLL}' index!" 1
exit 1
fi
STAT="$(curl -s "${URL}" | jq '.restorestatus.status')"
ACTUAL="$(curl -s "${URL}" | jq '.restorestatus.snapshotName' 2>/dev/null)"
if [ "$CUR_WAIT" -ne 1 ]; then
verbose "Restore not yet complete. Status: ${STAT}"
fi
sleep 2
CUR_WAIT=$((CUR_WAIT+1))
done
verbose "Removing temporary uncompressed backup"
if ! rm -rf "${ARGS[SHARED_DIR]}"/solr_dropbox/"${COLL}"/"${BACKUP_FILE}"; then
verbose "ERROR: could not remove temporary restore location ${ARGS[SHARED_DIR]}/solr_dropbox/${COLL}/${BACKUP_FILE}" 1 # won't exit
fi
}
cleanup() {
if ! rm -rf /tmp/restore; then
verbose "ERROR: could not remove temporary restore location /tmp/restore" 1 # won't exit
fi
}
restore_db() {
DBS=( galera1 galera2 galera3 )
DB_IDX="$(( RANDOM % ${#DBS[@]} ))"
declare -g DB_NODE="${DBS[$DB_IDX]}"
mkdir -p /tmp/restore
# If interrupted, we'll try to clean up temp files
trap cleanup SIGTERM SIGINT EXIT
verbose "Extracting the backup"
if ! tar -xf ${ARGS[DB]} -C /tmp/restore; then
verbose "ERROR: could not extract ${ARGS[DB]} to /tmp/restore" 1
exit 1
fi
BACKUP="$(find /tmp/restore -type f -name "galera${NODE}-*.sql.gz")"
verbose "Temporarily setting Galera node to desychronized state"
if ! OUTPUT=$(mysql -h "$DB_NODE" -u root -p"$MARIADB_ROOT_PASSWORD" -e "SET GLOBAL wsrep_desync = ON" 2>&1); then
# Check if it was a false negative and the state was actually set
if ! mysql -h "$DB_NODE" -u root -p"$MARIADB_ROOT_PASSWORD" -e "SHOW GLOBAL STATUS LIKE 'wsrep_desync_count'" 2>/dev/null \
| grep 1 > /dev/null 2>&1; then
verbose "ERROR: Failed to set node to desychronized state. Unsafe to continue restore. ${OUTPUT}" 1
exit 1
fi
fi
verbose "Starting restore of database using ${BACKUP}"
if ! OUTPUT=$(gunzip < "${BACKUP}" | mysql -h "$DB_NODE" -u root -p"$MARIADB_ROOT_PASSWORD" vufind 2>&1); then
verbose "ERROR: Failed to successfully restore the database. ${OUTPUT}" 1
exit 1
fi
verbose "Re-enabling Galera node to sychronized state"
if ! OUTPUT=$(mysql -h "$DB_NODE" -u root -p"$MARIADB_ROOT_PASSWORD" -e "SET GLOBAL wsrep_desync = OFF" 2>&1); then
# Check if it was a false negative and the state was actually set
if ! mysql -h "$DB_NODE" -u root -p"$MARIADB_ROOT_PASSWORD" -e "SHOW GLOBAL STATUS LIKE 'wsrep_desync_count'" 2>/dev/null \
| grep 0 > /dev/null 2>&1; then
verbose "ERROR: Failed to re-set node to synchronized state after restore was complete. ${OUTPUT}" 1
exit 1
fi
fi
verbose "Removing temporary uncompressed backup"
# Reset the database and unset our trap
cleanup
trap - SIGTERM SIGINT EXIT
verbose "Completed restore of database"
}
restore_alpha() {
verbose "Extracting the backup"
if ! OUTPUT=$(mkdir -p /tmp/restore); then
verbose "ERROR: could not make temp restore directory. ${OUTPUT}" 1
exit 1
fi
if ! OUTPUT=$(tar -xf ${ARGS[ALPHA]} -C /tmp/restore); then
verbose "ERROR: could not extract ${ARGS[ALPHA]} to /tmp/restore. ${OUTPUT}" 1
exit 1
fi
verbose "Starting restore."
if ! OUTPUT=$(find /tmp/restore/ -type f -exec cp {} ${ARGS[ALPHA_DIR]}/ \;); then
verbose "ERROR: failed to restore ${ARGS[ALPHA]} to ${ARGS[ALPHA_DIR]}. ${OUTPUT}" 1
exit 1
fi
verbose "Touching files to update timestamp"
verbose "(so that the alpha-browse script recognizes them as the most up-to-date files to use)"
if ! OUTPUT=$(touch ${ARGS[ALPHA_DIR]}/*); then
verbose "ERROR: failed to update the timestamp of the files in ${ARGS[ALPHA_DIR]}. ${OUTPUT}" 1
exit 1
fi
verbose "Cleaning up temp files"
if ! OUTPUT=$(rm -rf /tmp/restore); then
verbose "ERROR: failed to cleanup /tmp/restore directory. ${OUTPUT}" 1
exit 1
fi
verbose "Restore complete." 1
verbose "IMPORTANT: remaining steps: " 1
verbose "--" 1
verbose "On each node, connect to the Solr cron container and run the alpha-browse.sh script" 1
verbose "to copy back the files into the running Solr instance." 1
verbose "Command:" 1
verbose "docker exec \$(docker ps -q -f ${STACK_NAME}-solr_cron) /alpha-browse.sh -v" 1
verbose "--" 1
}
main() {
declare -g LOG_FILE
LOG_FILE=$(mktemp)
verbose "Logging to ${LOG_FILE}"
verbose "Starting processing"
if [[ -n "${ARGS[BIBLIO]}" ]]; then
restore_collection "biblio" "${ARGS[BIBLIO]}"
fi
if [[ -n "${ARGS[AUTHORITY]}" ]]; then
restore_collection "authority" "${ARGS[AUTHORITY]}"
fi
if [[ -n "${ARGS[DB]}" ]]; then
restore_db
fi
if [[ -n "${ARGS[ALPHA]}" ]]; then
restore_alpha
fi
verbose "All processing complete"
}
# Parse and start running
parse_args "$@"
catch_invalid_args
main