diff --git a/bin/concatenate_all.py b/bin/concatenate_all.py index 89df4b4..e506404 100644 --- a/bin/concatenate_all.py +++ b/bin/concatenate_all.py @@ -7,7 +7,6 @@ import argparse import os import json -import sys import time import fnmatch from pathlib import Path @@ -49,59 +48,78 @@ def parse_args(): return parser.parse_args() -def main(): - start_time = time.time() - args = parse_args() - - sys.stdout.write("CONCATENATE ALL...") # don't want a newline here so can't use print - sys.stdout.flush() - - config_path = os.path.join(args.basepath, "config.json") - if not os.path.isfile(config_path): - print(f"Error: invalid config path provided ({config_path})") - exit(1) - - with open(config_path) as config_file: - config = json.load(config_file) - - semester = config["semester"] - course = config["course"] - gradeable = config["gradeable"] - version_mode = config["version"] - users_to_ignore = config["ignore_submissions"] +def validate(config, args): + # load parameters from the config to be checked regex_patterns = config["regex"].split(',') regex_dirs = config["regex_dirs"] + language = config["language"] + threshold = int(config["threshold"]) + sequence_length = int(config["sequence_length"]) prior_term_gradeables = config["prior_term_gradeables"] - # ========================================================================== - # Error checking + # Check we have a tokenizer to support the configured language + langs_data_json_path = "./data.json" # data.json is in the Lichen/bin directory after install + with open(langs_data_json_path, 'r') as langs_data_file: + langs_data = json.load(langs_data_file) + if language not in langs_data: + raise SystemExit(f"ERROR! tokenizing not supported for language {language}") + + # Check values of common code threshold and sequence length + if (threshold < 2): + raise SystemExit("ERROR! threshold must be >= 2") + + if (sequence_length < 1): + raise SystemExit("ERROR! sequence_length must be >= 1") # Check for backwards crawling for e in regex_patterns: if ".." in e: - print('ERROR! Invalid path component ".." in regex') - exit(1) + raise SystemExit('ERROR! Invalid path component ".." in regex') for ptg in prior_term_gradeables: for field in ptg: if ".." in field: - print('ERROR! Invalid path component ".." in prior_term_gradeable field') - exit(1) + raise SystemExit('ERROR! Invalid component ".." in prior_term_gradeable path') # check permissions to make sure we have access to the prior term gradeables my_course_group_perms = Path(args.basepath).group() for ptg in prior_term_gradeables: if Path(args.datapath, ptg["prior_semester"], ptg["prior_course"]).group()\ != my_course_group_perms: - print(f"Error: Invalid permissions to access course {ptg['prior_semester']}" + raise SystemExit(f"ERROR! Invalid permissions to access course {ptg['prior_semester']}" f"/{ptg['prior_course']}") - exit(1) # make sure the regex directory is one of the acceptable directories for dir in regex_dirs: if dir not in ["submissions", "results", "checkout"]: - print("ERROR! ", dir, " is not a valid input directory for Lichen") - exit(1) + raise SystemExit("ERROR! ", dir, " is not a valid input directory for Lichen") + + +def main(): + start_time = time.time() + args = parse_args() + + print("CONCATENATE ALL...", end="") + + config_path = os.path.join(args.basepath, "config.json") + if not os.path.isfile(config_path): + raise SystemExit(f"ERROR! invalid config path provided ({config_path})") + + with open(config_path) as config_file: + config = json.load(config_file) + + # perform error checking on config parameters + validate(config, args) + + # parameters to be used in this file + semester = config["semester"] + course = config["course"] + gradeable = config["gradeable"] + version_mode = config["version"] + regex_patterns = config["regex"].split(',') + regex_dirs = config["regex_dirs"] + prior_term_gradeables = config["prior_term_gradeables"] + users_to_ignore = config["ignore_submissions"] # ========================================================================== # loop through and concatenate the selected files for each user in this gradeable @@ -196,7 +214,7 @@ def main(): my_concatenated_file = os.path.join(version_path, "submission.concatenated") with open(my_concatenated_file, "r+") as my_cf: if my_cf.read() == "": - my_cf.write("Error: No files matched provided regex in selected directories") + my_cf.write("ERROR! No files matched provided regex in selected directories") # do the same for the other gradeables for other_gradeable in prior_term_gradeables: @@ -210,7 +228,7 @@ def main(): my_concatenated_file = os.path.join(other_version_path, "submission.concatenated") with open(my_concatenated_file, "r+") as my_cf: if my_cf.read() == "": - my_cf.write("Error: No files matched provided regex in" + my_cf.write("ERROR! No files matched provided regex in" "selected directories") # ========================================================================== diff --git a/bin/hash_all.py b/bin/hash_all.py index 99f8554..a9c1669 100644 --- a/bin/hash_all.py +++ b/bin/hash_all.py @@ -9,7 +9,6 @@ import os import json import time -import sys import hashlib @@ -26,13 +25,6 @@ def hasher(lichen_config_data, my_tokenized_file, my_hashes_file): data_json_path = "./data.json" # data.json is in the Lichen/bin directory after install with open(data_json_path) as token_data_file: token_data = json.load(token_data_file) - if language not in token_data: - print("\n\nERROR: UNKNOWN HASHER\n\n") - exit(1) - - if (sequence_length < 1): - print("ERROR! sequence_length must be >= 1") - exit(1) with open(my_tokenized_file, 'r', encoding='ISO-8859-1') as my_tf: with open(my_hashes_file, 'w') as my_hf: @@ -57,15 +49,13 @@ def main(): with open(os.path.join(args.basepath, "config.json")) as lichen_config: lichen_config_data = json.load(lichen_config) - sys.stdout.write("HASH ALL...") - sys.stdout.flush() + print("HASH ALL...", end="") # ========================================================================== # walk the subdirectories of this gradeable users_dir = os.path.join(args.basepath, "users") if not os.path.isdir(users_dir): - print("Error: Unable to find users directory") - exit(1) + raise SystemExit("ERROR! Unable to find users directory") for user in sorted(os.listdir(users_dir)): user_dir = os.path.join(users_dir, user) @@ -86,8 +76,7 @@ def main(): other_gradeables_dir = os.path.join(args.basepath, "other_gradeables") if not os.path.isdir(other_gradeables_dir): - print("Error: Unable to find other gradeables directory") - exit(1) + raise SystemExit("ERROR! Unable to find other gradeables directory") for other_gradeable in sorted(os.listdir(other_gradeables_dir)): other_gradeable_dir = os.path.join(other_gradeables_dir, other_gradeable) diff --git a/bin/tokenize_all.py b/bin/tokenize_all.py index b4b8d8e..766200d 100644 --- a/bin/tokenize_all.py +++ b/bin/tokenize_all.py @@ -7,7 +7,6 @@ import os import json import time -import sys def parse_args(): @@ -24,11 +23,7 @@ def tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file): data_json_path = "./data.json" # data.json is in the Lichen/bin directory after install with open(data_json_path, 'r') as token_data_file: token_data = json.load(token_data_file) - if language not in token_data: - print("\n\nERROR: UNKNOWN TOKENIZER\n\n") - exit(1) - else: - language_token_data = token_data[language] + language_token_data = token_data[language] tokenizer = f"./{language_token_data['tokenizer']}" @@ -46,8 +41,7 @@ def main(): start_time = time.time() args = parse_args() - sys.stdout.write("TOKENIZE ALL...") - sys.stdout.flush() + print("TOKENIZE ALL...", end="") with open(os.path.join(args.basepath, "config.json")) as lichen_config: lichen_config_data = json.load(lichen_config) @@ -56,8 +50,7 @@ def main(): # walk the subdirectories to tokenize this gradeable's submissions users_dir = os.path.join(args.basepath, "users") if not os.path.isdir(users_dir): - print("Error: Unable to find users directory") - exit(1) + raise SystemExit("ERROR! Unable to find users directory") for user in sorted(os.listdir(users_dir)): user_dir = os.path.join(users_dir, user) @@ -77,8 +70,7 @@ def main(): # tokenize the other prior term gradeables' submissions other_gradeables_dir = os.path.join(args.basepath, "other_gradeables") if not os.path.isdir(other_gradeables_dir): - print("Error: Unable to find other gradeables directory") - exit(1) + raise SystemExit("ERROR! Unable to find other gradeables directory") for other_gradeable in sorted(os.listdir(other_gradeables_dir)): other_gradeable_dir = os.path.join(other_gradeables_dir, other_gradeable) diff --git a/compare_hashes/compare_hashes.cpp b/compare_hashes/compare_hashes.cpp index 329453e..a1b596f 100644 --- a/compare_hashes/compare_hashes.cpp +++ b/compare_hashes/compare_hashes.cpp @@ -192,9 +192,6 @@ int main(int argc, char* argv[]) { int sequence_length = config_file_json.value("sequence_length",1); int threshold = config_file_json.value("threshold",5); - assert (sequence_length >= 1); - assert (threshold >= 2); - // error checking, confirm there are hashes to work with boost::filesystem::path users_root_directory = lichen_gradeable_path / "users"; if (!boost::filesystem::exists(users_root_directory) ||