Skip to content

Commit

Permalink
Merge pull request #3135 from SEED-platform/backup-tweaks
Browse files Browse the repository at this point in the history
Changes to the pgdata backup process
  • Loading branch information
nllong committed Feb 18, 2022
2 parents 9524cb3 + 13be9cc commit adf8e4d
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 40 deletions.
20 changes: 10 additions & 10 deletions docker/backup_k8s/backup_database.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# This backup script creates nightly database and media file backups of SEED when SEED is running
# in a docker container. This is to be used in conjunction with k8s and
# a CronJob task.
# a CronJob task.

DB_HOST=$1
DB_NAME=$2
Expand All @@ -17,7 +17,7 @@ send_slack_notification(){
fi
}

# Verify that the following required enviroment variables are set
# Verify that the following required environment variables are set
if [ -z ${AWS_ACCESS_KEY_ID} ]; then
echo "AWS_ACCESS_KEY_ID is not set"
send_slack_notification "[ERROR-$ENVIRONMENT]-AWS_ACCESS_KEY_ID-not-configured"
Expand Down Expand Up @@ -101,7 +101,7 @@ done
for file in $BACKUP_DIR/*.tgz
do
echo "Backing up $file $S3_BUCKET/$RUN_DATE/"

if [ ! -s $file ]; then
# the file is empty, send an error
send_slack_notification "[ERROR-$ENVIRONMENT]-Mediadata-backup-file-was-empty-or-missing"
Expand All @@ -122,13 +122,13 @@ send_slack_notification "[$ENVIRONMENT]-database-backup-run-completed"

# Daily - add dates in format "2021-10-22" to the keep array.
for i in {0..60}
do
((keep[$(date +%Y%m%d -d "-$i day")]++))
do
((keep[$(date +%Y%m%d -d "-$i day")]++))
done

# Last 52 weeks of Monday morning backups. "monday-i week" is method to get previous monday back i times.
for i in {0..52}
do
do
vali=$((i+1))
((keep[$(date "+%Y%m%d" -d "monday-$vali week")]++))
done
Expand All @@ -146,7 +146,7 @@ done
# Query S3 to find all the dates that exist. Mapfile converts output or CRLF stdout to array in bash.
mapfile s3dirs < <(aws s3 ls $S3_BUCKET | awk '{print $2}')

# Iterate to find which backups need to be removed
# Iterate to find which backups need to be removed
for s3dir in "${s3dirs[@]}"
do
date_found=false
Expand All @@ -160,9 +160,9 @@ do
done

# This method can be quite destructive and delete any
# files that are in the date format. Be sure to
# test this script before deploying in any production
# environment. It will only remove directories that
# files that are in the date format. Be sure to
# test this script before deploying in any production
# environment. It will only remove directories that
# have a YYYY-MM-DD format
if [ "$date_found" = false ] && [[ "${s3dir:0:10}" =~ ^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]$ ]]; then
echo "Deleting out of date backup of ${s3dir:0:10}"
Expand Down
65 changes: 35 additions & 30 deletions docker/backup_k8s/tar_backup_database.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#!/bin/bash

# This back up script grabs the lastest pg_dump, restores it, tars it, and
# This back up script grabs the latest pg_dump, restores it, tars it, and
# uploads it when SEED is running in a docker container. This is to be used
# in conjunction with k8s and a CronJob task.

DB_USERNAME=$1
# in conjunction with k8s and a CronJob task, and runs as the `postgres` user.

send_slack_notification(){
if [ ! -z ${APP_SLACK_WEBHOOK} ]; then
Expand All @@ -15,7 +13,7 @@ send_slack_notification(){
fi
}

# Verify that the following required enviroment variables are set
# Verify that the following required environment variables are set
if [ -z ${AWS_ACCESS_KEY_ID} ]; then
echo "AWS_ACCESS_KEY_ID is not set"
send_slack_notification "[ERROR-$ENVIRONMENT]-AWS_ACCESS_KEY_ID-not-configured"
Expand All @@ -40,47 +38,54 @@ if [ -z ${S3_BUCKET} ]; then
exit 1
fi

if [ -z ${PGPASSWORD} ]; then
echo "PGPASSWORD is not set"
send_slack_notification "[ERROR-$ENVIRONMENT]-PGPASSWORD-not-configured"
if [ -z ${POSTGRES_DB} ]; then
echo "POSTGRES_DB is not set"
send_slack_notification "[ERROR-$ENVIRONMENT]-POSTGRES_DB-not-configured"
exit 1
fi

if [ -z ${POSTGRES_USER} ]; then
echo "POSTGRES_USER is not set"
send_slack_notification "[ERROR-$ENVIRONMENT]-POSTGRES_USER-not-configured"
exit 1
fi

# Instal aws cli
apk add --no-cache \
python3 \
py3-pip \
&& pip3 install --upgrade pip \
&& pip3 install awscli
if [ -z ${POSTGRES_PASSWORD} ]; then
echo "POSTGRES_PASSWORD is not set"
send_slack_notification "[ERROR-$ENVIRONMENT]-POSTGRES_PASSWORD-not-configured"
exit 1
fi

LATEST_DIR="$(aws s3 ls seed-dev1-backups | sort | tail -n 1 | awk -F' ' '{print $2}')"
ARCHIVE=backup.tar.xz

# if backup.tar already exists, for go rest of script
if aws s3 ls $S3_BUCKET/$LATEST_DIR | grep "backup.tar"; then
echo "There's already a backup for $LATEST_DIR";
# if backup already exists, forgo rest of script
if aws s3 ls $S3_BUCKET/$LATEST_DIR | grep $ARCHIVE; then
echo "There's already a backup for $LATEST_DIR";
exit 0

fi

# work in the scratch volume for storage
cd /scratch

# Download latest S3 backup
aws s3 cp $S3_BUCKET/$LATEST_DIR . --recursive --exclude "*" --include "*.dump"
aws s3 cp $S3_BUCKET/$LATEST_DIR . --recursive --exclude "*" --include "seed*.dump"

# Start postgres
su postgres -c "initdb"
su postgres -c "pg_ctl start"
# Restart for timescale-tune to take effect
pg_ctl restart

# Restore db
su postgres -c "createuser ${DB_USERNAME}"
su postgres -c "pg_restore -v -C -d postgres ./seed*.dump"
# Restore db
psql -U $POSTGRES_USER $POSTGRES_DB -c 'SELECT timescaledb_pre_restore();'
pg_restore -U $POSTGRES_USER -d $POSTGRES_DB ./seed*.dump
psql -U $POSTGRES_USER $POSTGRES_DB -c 'SELECT timescaledb_post_restore();'

# Stop postgres
su postgres -c "pg_ctl stop"
pg_ctl stop

# compress pgdata
tar -cJf $ARCHIVE /var/lib/postgresql/data

# tar db
tar -czf backup.tar /var/lib/postgresql/data
# push archived db to s3
aws s3 cp $ARCHIVE $S3_BUCKET/$LATEST_DIR

# push tared db to s3
aws s3 cp backup.tar $S3_BUCKET/$LATEST_DIR
exit 0

0 comments on commit adf8e4d

Please sign in to comment.