From f6083ca1cbf691381d76ced836a8ff06fd71c0ed Mon Sep 17 00:00:00 2001 From: pbailie Date: Tue, 28 Aug 2018 18:47:17 -0400 Subject: [PATCH 1/6] db_backup.py Monthly dumpfile retention WIP --- nightly_db_backup/db_backup.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/nightly_db_backup/db_backup.py b/nightly_db_backup/db_backup.py index 5cc8538..49ba5c4 100644 --- a/nightly_db_backup/db_backup.py +++ b/nightly_db_backup/db_backup.py @@ -5,7 +5,7 @@ :file: db_backup.py :language: python3 :author: Peter Bailie (Systems Programmer, Dept. of Computer Science, RPI) -:date: August 22 2018 +:date: August 28 2018 This script will take backup dumps of each individual Submitty course database. This should be set up by a sysadmin to be run on the Submitty @@ -30,6 +30,7 @@ """ import argparse +import calendar import datetime import json import os @@ -43,13 +44,15 @@ # WHERE DUMP FILES ARE WRITTEN DUMP_PATH = '/var/local/submitty/submitty-dumps' -def delete_obsolete_dumps(working_path, expiration_stamp): +def delete_obsolete_dumps(working_path, monthly_retention_stamp, expiration_stamp): """ Recurse through folders/files and delete any obsolete dump files :param working_path: path to recurse through + :param monthly_retention day of month that dump is always preserved (val < "01" when disabled) :param expiration_stamp: date to begin purging old dump files :type working_path: string + :type monthly_retention string :type expiration_stamp: string """ @@ -62,7 +65,7 @@ def delete_obsolete_dumps(working_path, expiration_stamp): for file in files_list: if os.path.isdir(file): # If the file is a folder, recurse - delete_obsolete_dumps(file, expiration_stamp) + delete_obsolete_dumps(file, monthly_retention_stamp, expiration_stamp) else: # File date was concat'ed into the file's name. Use regex to isolate date from full path. # e.g. "/var/local/submitty-dumps/s18/cs1000/180424_s18_cs1000.dbdump" @@ -77,7 +80,7 @@ def delete_obsolete_dumps(working_path, expiration_stamp): match = re.search('(\d{6}_\w+_\w+\.dbdump)$', file) if match is not None: file_date_stamp = match.group(0)[0:6] - if file_date_stamp <= expiration_stamp: + if file_date_stamp <= expiration_stamp and monthly_retention_stamp != file_date_stamp[4:6]: os.remove(file) def main(): @@ -89,11 +92,13 @@ def main(): # READ COMMAND LINE ARGUMENTS # Note that -t and -g are different args and mutually exclusive - parser = argparse.ArgumentParser(description='Dump all Submitty databases for a particular academic term.') - parser.add_argument('-e', action='store', nargs='?', type=int, default=0, help='Set number of days expiration of older dumps (default: no expiration).', metavar='days') + parser = argparse.ArgumentParser(description='Dump all Submitty databases for a particular academic term.', prefix_chars='-', add_help=True) + parser.add_argument('-e', action='store', type=int, default=0, help='Set number of days expiration of older dumps (default: no expiration).', metavar='days') + parser.add_argument('-m', action='store', type=int, default=0, choices=range(0,29), help='Day of month to ALWAYS retain a dumpfile (default: no monthly retention).', metavar='day of month') group = parser.add_mutually_exclusive_group(required=True) - group.add_argument('-t', action='store', nargs='?', type=str, help='Set the term code.', metavar='term code') + group.add_argument('-t', action='store', type=str, help='Set the term code.', metavar='term code') group.add_argument('-g', action='store_true', help='Guess term code based on calender month and year.') + args = parser.parse_args() # Get current date -- needed throughout the script, but also used when guessing default term code. @@ -112,6 +117,9 @@ def main(): else: semester = args.t + # MONTHLY RETENTION DATE + monthly_retention_stamp = "{:0>2}".format(args.m) + # GET DATABASE CONFIG FROM SUBMITTY fh = open(DB_CONFIG_PATH, "r") db_config = json.load(fh) @@ -175,7 +183,7 @@ def main(): working_path = "{}/{}".format(DUMP_PATH, semester) # RECURSIVELY CULL OBSOLETE DUMPS - delete_obsolete_dumps(working_path, expiration_stamp) + delete_obsolete_dumps(working_path, monthly_retention_stamp, expiration_stamp) if __name__ == "__main__": main() From 4963abb7fbc20ecbcc99ba13522bfd437720b00e Mon Sep 17 00:00:00 2001 From: pbailie Date: Wed, 29 Aug 2018 19:48:11 -0400 Subject: [PATCH 2/6] db_backup.py Monthly retention WIP Should now work with day > 28 and still preserve at every end of month. --- nightly_db_backup/db_backup.py | 62 +++++++++++++++++----------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/nightly_db_backup/db_backup.py b/nightly_db_backup/db_backup.py index 49ba5c4..19d9e8f 100644 --- a/nightly_db_backup/db_backup.py +++ b/nightly_db_backup/db_backup.py @@ -44,16 +44,16 @@ # WHERE DUMP FILES ARE WRITTEN DUMP_PATH = '/var/local/submitty/submitty-dumps' -def delete_obsolete_dumps(working_path, monthly_retention_stamp, expiration_stamp): +def delete_obsolete_dumps(working_path, monthly_retention, expiration_date): """ Recurse through folders/files and delete any obsolete dump files - :param working_path: path to recurse through - :param monthly_retention day of month that dump is always preserved (val < "01" when disabled) - :param expiration_stamp: date to begin purging old dump files - :type working_path: string - :type monthly_retention string - :type expiration_stamp: string + :param working_path: path to recurse through + :param monthly_retention: day of month that dump is always preserved (val < 1 when disabled) + :param expiration_date: date to begin purging old dump files + :type working_path: string + :type monthly_retention: integer + :type expiration_date: datetime.date object """ # Filter out '.', '..', and any "hidden" files/directories. @@ -65,23 +65,25 @@ def delete_obsolete_dumps(working_path, monthly_retention_stamp, expiration_stam for file in files_list: if os.path.isdir(file): # If the file is a folder, recurse - delete_obsolete_dumps(file, monthly_retention_stamp, expiration_stamp) + delete_obsolete_dumps(file, monthly_retention, expiration_date) else: - # File date was concat'ed into the file's name. Use regex to isolate date from full path. - # e.g. "/var/local/submitty-dumps/s18/cs1000/180424_s18_cs1000.dbdump" - # The date substring can be located with high confidence by looking for: - # - final token of the full path (the actual file name) - # - file name consists of three tokens delimited by '_' chars - # - first token is exactly 6 digits, the date stamp. - # - second token is the semester code, at least one 'word' char - # - third token is the course code, at least one 'word' char - # - filename always ends in ".dbdump" - # - then take substring [0:6] to get "180424". - match = re.search('(\d{6}_\w+_\w+\.dbdump)$', file) - if match is not None: - file_date_stamp = match.group(0)[0:6] - if file_date_stamp <= expiration_stamp and monthly_retention_stamp != file_date_stamp[4:6]: - os.remove(file) + # Determine file's date from its filename + filename = file.split('/')[-1] + datestamp = filename.split('_')[0] + year, month, day = map(int, datestamp.split('-')) + file_date = datetime.date(year, month, day) + + # Conditions to NOT delete old file: + if file_date > expiration_date: + pass + elif file_date.day == monthly_retention: + pass + # A month can be as few as 28 days, but we NEVER skip months even when "-m" is 29, 30, or 31. + elif monthly_retention > 28 and file_date.day == calendar.monthrange(file_date.year, file_date.month)[1]: + pass + else: + os.remove(file) + def main(): """ Main """ @@ -94,7 +96,7 @@ def main(): # Note that -t and -g are different args and mutually exclusive parser = argparse.ArgumentParser(description='Dump all Submitty databases for a particular academic term.', prefix_chars='-', add_help=True) parser.add_argument('-e', action='store', type=int, default=0, help='Set number of days expiration of older dumps (default: no expiration).', metavar='days') - parser.add_argument('-m', action='store', type=int, default=0, choices=range(0,29), help='Day of month to ALWAYS retain a dumpfile (default: no monthly retention).', metavar='day of month') + parser.add_argument('-m', action='store', type=int, default=0, choices=range(0,32), help='Day of month to ALWAYS retain a dumpfile (default: no monthly retention).', metavar='day of month') group = parser.add_mutually_exclusive_group(required=True) group.add_argument('-t', action='store', type=str, help='Set the term code.', metavar='term code') group.add_argument('-g', action='store_true', help='Guess term code based on calender month and year.') @@ -102,10 +104,9 @@ def main(): args = parser.parse_args() # Get current date -- needed throughout the script, but also used when guessing default term code. - # (today.year % 100) determines the two digit year. e.g. '2017' -> '17' today = datetime.date.today() - year = str(today.year % 100) - today_stamp = '{:0>2}{:0>2}{:0>2}'.format(year, today.month, today.day) + year = str(today.year) + today_stamp = today.isoformat() # PARSE COMMAND LINE ARGUMENTS expiration = args.e @@ -118,7 +119,7 @@ def main(): semester = args.t # MONTHLY RETENTION DATE - monthly_retention_stamp = "{:0>2}".format(args.m) + monthly_retention = args.m # GET DATABASE CONFIG FROM SUBMITTY fh = open(DB_CONFIG_PATH, "r") @@ -178,12 +179,11 @@ def main(): # DETERMINE EXPIRATION DATE (to delete obsolete dump files) # (do this BEFORE recursion so it is not calculated recursively n times) if expiration > 0: - expiration_date = datetime.date.fromordinal(today.toordinal() - expiration) - expiration_stamp = '{:0>2}{:0>2}{:0>2}'.format(expiration_date.year % 100, expiration_date.month, expiration_date.day) + expiration_date = datetime.date.fromordinal(today.toordinal() - expiration) working_path = "{}/{}".format(DUMP_PATH, semester) # RECURSIVELY CULL OBSOLETE DUMPS - delete_obsolete_dumps(working_path, monthly_retention_stamp, expiration_stamp) + delete_obsolete_dumps(working_path, monthly_retention, expiration_date) if __name__ == "__main__": main() From dba652cddebc9c6e0987549bcac4d761b66de470 Mon Sep 17 00:00:00 2001 From: Peter Bailie Date: Thu, 30 Aug 2018 16:40:18 -0400 Subject: [PATCH 3/6] db_backup.py bugfix --- nightly_db_backup/db_backup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nightly_db_backup/db_backup.py b/nightly_db_backup/db_backup.py index 19d9e8f..9e7bcc3 100644 --- a/nightly_db_backup/db_backup.py +++ b/nightly_db_backup/db_backup.py @@ -84,7 +84,6 @@ def delete_obsolete_dumps(working_path, monthly_retention, expiration_date): else: os.remove(file) - def main(): """ Main """ @@ -105,7 +104,7 @@ def main(): # Get current date -- needed throughout the script, but also used when guessing default term code. today = datetime.date.today() - year = str(today.year) + year = today.strftime("%y") today_stamp = today.isoformat() # PARSE COMMAND LINE ARGUMENTS From 03266c6c4b7ac60640bec0433b9f2c625194f4a2 Mon Sep 17 00:00:00 2001 From: pbailie Date: Thu, 30 Aug 2018 21:40:09 -0400 Subject: [PATCH 4/6] Changes to be committed: modified: nightly_db_backup/db_backup.py WIP --- nightly_db_backup/db_backup.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nightly_db_backup/db_backup.py b/nightly_db_backup/db_backup.py index 9e7bcc3..dffdb81 100644 --- a/nightly_db_backup/db_backup.py +++ b/nightly_db_backup/db_backup.py @@ -68,6 +68,7 @@ def delete_obsolete_dumps(working_path, monthly_retention, expiration_date): delete_obsolete_dumps(file, monthly_retention, expiration_date) else: # Determine file's date from its filename + # Note: datetime.date.fromisoformat() doesn't exist in Python 3.6 or earlier. filename = file.split('/')[-1] datestamp = filename.split('_')[0] year, month, day = map(int, datestamp.split('-')) @@ -78,12 +79,12 @@ def delete_obsolete_dumps(working_path, monthly_retention, expiration_date): pass elif file_date.day == monthly_retention: pass - # A month can be as few as 28 days, but we NEVER skip months even when "-m" is 29, 30, or 31. - elif monthly_retention > 28 and file_date.day == calendar.monthrange(file_date.year, file_date.month)[1]: + # A month can be as few as 28 days, but we NEVER skip months even when "-m" is 28, 29, 30, or 31. + elif monthly_retention > 28 and (file_date.day == calendar.monthrange(file_date.year, file_date.month)[1] and file_date.day <= monthly_retention): pass else: - os.remove(file) - +# os.remove(file) + print("remove " + file) def main(): """ Main """ From 0dc0e726b8080953c0955c31d212967c6617367c Mon Sep 17 00:00:00 2001 From: pbailie Date: Fri, 31 Aug 2018 15:41:18 -0400 Subject: [PATCH 5/6] readme.md Doc update --- nightly_db_backup/readme.md | 62 ++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/nightly_db_backup/readme.md b/nightly_db_backup/readme.md index e6c6a0c..d004495 100644 --- a/nightly_db_backup/readme.md +++ b/nightly_db_backup/readme.md @@ -1,7 +1,7 @@ # Nightly Database Backup Python Script -Readme June 26, 2018 +Readme August 31, 2018 -### db_backup.py +## db_backup.py This script will read a course list, corresponding to a specific term, from the 'master' Submitty database. With a course list, the script will use @@ -9,9 +9,9 @@ Postgresql's "pg_dump" tool to retrieve a SQL dump of the submitty 'master' database and each registered course's Submitty database of a specific semester. The script also has cleanup functionality to automatically remove older dumps. -*db_backup.py is written in Python 3, and tested with Python 3.4.* +*db_backup.py is written in Python 3, and tested with Python 3.6.* ---- +### Term code The term code can be specified as a command line argument as option `-t`. @@ -34,15 +34,17 @@ The term code will follow the pattern of TYY, where - YY is the two digit year - e.g. April 15, 2018 will correspond to "s18" (Spring 2018). -`-t` and `-g` are mutually exclusive. +`-t` and `-g` are mutually exclusive, but one is required. ---- +### Date stamp -Each dump has a date stamp in its name following the format of "YYMMD", +Each dump has a date stamp in its name following the format of "YYYY-MM-DD", followed by the semester code, then the course code. -e.g. '180423_s18_cs100.dbdump' is a dump taken on April 23, 2018 of the Spring -2018 semester for course CS-100. +e.g. `2018-04-23_s18_cs100.dbdump` is a dump taken on April 23, 2018 of the +Spring 2018 semester for course CS-100. + +### Cleanup schedule Older dumps can be automatically purged with the command line option "-e". @@ -56,42 +58,46 @@ term being processed will be purged, in this example, 'f17'. The default expiration value is 0 (no expiration -- no files are purged) should this argument be ommitted. ---- +### Monthly retention + +Command line option `-m` will set a monthly retention date. Dumps taken on that +date will not be purged. In the case the retention date is past the 28th, end +of month dumps will still be retained. + +e.g. `-m 30` will retain any dump on the 30th of the month. In the case of +February, dumps on the 28th, or 29th on a leap year, are also retained. Dumps +on the 31st of another month are not retained (as they were retained on the +30th). + +No monthly retention occurs if `-m` is omitted or set `-m 0`. + +### Restore a dump Submitty databases can be restored from a dump using the pg_restore tool. -q.v. [https://www.postgresql.org/docs/9.5/static/app-pgrestore.html](https://www.postgresql.org/docs/9.5/static/app-pgrestore.html) +q.v. [https://www.postgresql.org/docs/10/static/app-pgrestore.html](https://www.postgresql.org/docs/10/static/app-pgrestore.html) This is script intended to be run as a cronjob by 'root' on the same server machine as the Submitty system. *Running this script on another server other than Submitty has not been tested.* ---- - -Please configure options near the top of the code. - -DB_HOST: Hostname of the Submitty databases. You may use 'localhost' if -Postgresql is on the same machine as the Submitty system. - -DB_USER: The username that interacts with Submitty databases. Typically -'hsdbu'. +### Options at the top of the code -DB_PASS: The password for Submitty's database account (e.g. account 'hsdbu'). -**Do NOT use the placeholder value of 'DB.p4ssw0rd'** +`DB_CONFIG_PATH` looks for Submitty's `database.json` file that contains +database authentication information. Leaving this at the default is usually OK. -DUMP_PATH: The folder path to store the database dumps. Course folders will -be created from this path, and the dumps stored in their respective course -folders, grouped by semester. +`DUMP_PATH` indicates where dump files are stored. Only change this if the +default location is undesirable for your server. ---- +### FERPA Warning WARNING: Database dumps can contain student information that is protected by [FERPA (20 U.S.C. § 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html). Please consult with your school's IT dept. for advice on data security policies and practices. ---- +### Etc. -db_backup.py is tested to run on Python 3.4 or higher. +db_backup.py is tested to run on Python 3.6 or higher. NOTE: Some modification of code may be necessary to work with your school's information systems. From 406921a31f28eb5bc71eb25fbc98b2501497bf3f Mon Sep 17 00:00:00 2001 From: pbailie Date: Fri, 31 Aug 2018 16:37:12 -0400 Subject: [PATCH 6/6] readme.md More doc updates --- nightly_db_backup/readme.md | 56 +++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/nightly_db_backup/readme.md b/nightly_db_backup/readme.md index d004495..fa21849 100644 --- a/nightly_db_backup/readme.md +++ b/nightly_db_backup/readme.md @@ -11,7 +11,17 @@ The script also has cleanup functionality to automatically remove older dumps. *db_backup.py is written in Python 3, and tested with Python 3.6.* -### Term code +NOTE: Some modification of code may be necessary to work with your school's +information systems. + +### FERPA Warning + +WARNING: Database dumps can contain student information that is protected by +[FERPA (20 U.S.C. § 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html). +Please consult with your school's IT dept. for advice on data security policies +and practices. + +### Term Code The term code can be specified as a command line argument as option `-t`. @@ -28,37 +38,37 @@ current month and year of the server's date. The term code will follow the pattern of TYY, where - T is the term - - **s** is for Spring (Jan - May) - - **u** is for Summer (Jun - Jul) - - **f** is for Fall (Aug-Dec) -- YY is the two digit year + - `s` is for Spring (Jan - May) + - `u` is for Summer (Jun - Jul) + - `f` is for Fall (Aug-Dec) +- `YY` is the two digit year - e.g. April 15, 2018 will correspond to "s18" (Spring 2018). `-t` and `-g` are mutually exclusive, but one is required. -### Date stamp +### Date Stamp -Each dump has a date stamp in its name following the format of "YYYY-MM-DD", +Each dump has a date stamp in its name following the format of `YYYY-MM-DD`, followed by the semester code, then the course code. e.g. `2018-04-23_s18_cs100.dbdump` is a dump taken on April 23, 2018 of the Spring 2018 semester for course CS-100. -### Cleanup schedule +### Cleanup Schedule -Older dumps can be automatically purged with the command line option "-e". +Older dumps can be automatically purged with the command line option `-e`. For example: `python3 ./db_backup.py -t f17 -e 7` will purge any dumps with a stamp seven days or older. Only dumps of the -term being processed will be purged, in this example, 'f17'. +term being processed will be purged, in this example, `f17`. The default expiration value is 0 (no expiration -- no files are purged) should this argument be ommitted. -### Monthly retention +### Monthly Retention Command line option `-m` will set a monthly retention date. Dumps taken on that date will not be purged. In the case the retention date is past the 28th, end @@ -69,35 +79,27 @@ February, dumps on the 28th, or 29th on a leap year, are also retained. Dumps on the 31st of another month are not retained (as they were retained on the 30th). +For clarification: `-m 31` will retain dumps taken on February 28/29; +April, June, September, November 30; and January, March, May, July, August, +October, December 31. + No monthly retention occurs if `-m` is omitted or set `-m 0`. -### Restore a dump +### Restore a Dump Submitty databases can be restored from a dump using the pg_restore tool. q.v. [https://www.postgresql.org/docs/10/static/app-pgrestore.html](https://www.postgresql.org/docs/10/static/app-pgrestore.html) +### Cron + This is script intended to be run as a cronjob by 'root' on the same server machine as the Submitty system. *Running this script on another server other than Submitty has not been tested.* -### Options at the top of the code +### Options At The Top Of The Code `DB_CONFIG_PATH` looks for Submitty's `database.json` file that contains database authentication information. Leaving this at the default is usually OK. `DUMP_PATH` indicates where dump files are stored. Only change this if the default location is undesirable for your server. - -### FERPA Warning - -WARNING: Database dumps can contain student information that is protected by -[FERPA (20 U.S.C. § 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html). -Please consult with your school's IT dept. for advice on data security policies -and practices. - -### Etc. - -db_backup.py is tested to run on Python 3.6 or higher. - -NOTE: Some modification of code may be necessary to work with your school's -information systems.