Submitty · bmcutler · Jul 30, 2021 · Jul 28, 2021 · Jul 29, 2021 · Jul 29, 2021
diff --git a/bin/concatenate_all.py b/bin/concatenate_all.py
@@ -7,7 +7,6 @@
 import argparse
 import os
 import json
-import sys
 import time
 import fnmatch
 from pathlib import Path
@@ -49,59 +48,78 @@ def parse_args():
     return parser.parse_args()
 
 
-def main():
-    start_time = time.time()
-    args = parse_args()
-
-    sys.stdout.write("CONCATENATE ALL...")  # don't want a newline here so can't use print
-    sys.stdout.flush()
-
-    config_path = os.path.join(args.basepath, "config.json")
-    if not os.path.isfile(config_path):
-        print(f"Error: invalid config path provided ({config_path})")
-        exit(1)
-
-    with open(config_path) as config_file:
-        config = json.load(config_file)
-
-    semester = config["semester"]
-    course = config["course"]
-    gradeable = config["gradeable"]
-    version_mode = config["version"]
-    users_to_ignore = config["ignore_submissions"]
+def validate(config, args):
+    # load parameters from the config to be checked
     regex_patterns = config["regex"].split(',')
     regex_dirs = config["regex_dirs"]
+    language = config["language"]
+    threshold = int(config["threshold"])
+    sequence_length = int(config["sequence_length"])
     prior_term_gradeables = config["prior_term_gradeables"]
 
-    # ==========================================================================
-    # Error checking
+    # Check we have a tokenizer to support the configured language
+    langs_data_json_path = "./data.json"  # data.json is in the Lichen/bin directory after install
+    with open(langs_data_json_path, 'r') as langs_data_file:
+        langs_data = json.load(langs_data_file)
+        if language not in langs_data:
+            raise SystemExit(f"ERROR! tokenizing not supported for language {language}")
+
+    # Check values of common code threshold and sequence length
+    if (threshold < 2):
+        raise SystemExit("ERROR! threshold must be >= 2")
+
+    if (sequence_length < 1):
+        raise SystemExit("ERROR! sequence_length must be >= 1")
 
     # Check for backwards crawling
     for e in regex_patterns:
         if ".." in e:
-            print('ERROR! Invalid path component ".." in regex')
-            exit(1)
+            raise SystemExit('ERROR! Invalid path component ".." in regex')
 
     for ptg in prior_term_gradeables:
         for field in ptg:
             if ".." in field:
-                print('ERROR! Invalid path component ".." in prior_term_gradeable field')
-                exit(1)
+                raise SystemExit('ERROR! Invalid component ".." in prior_term_gradeable path')
 
     # check permissions to make sure we have access to the prior term gradeables
     my_course_group_perms = Path(args.basepath).group()
     for ptg in prior_term_gradeables:
         if Path(args.datapath, ptg["prior_semester"], ptg["prior_course"]).group()\
            != my_course_group_perms:
-            print(f"Error: Invalid permissions to access course {ptg['prior_semester']}"
+            raise SystemExit(f"ERROR! Invalid permissions to access course {ptg['prior_semester']}"
                   f"/{ptg['prior_course']}")
-            exit(1)
 
     # make sure the regex directory is one of the acceptable directories
     for dir in regex_dirs:
         if dir not in ["submissions", "results", "checkout"]:
-            print("ERROR! ", dir, " is not a valid input directory for Lichen")
-            exit(1)
+            raise SystemExit("ERROR! ", dir, " is not a valid input directory for Lichen")
+
+
+def main():
+    start_time = time.time()
+    args = parse_args()
+
+    print("CONCATENATE ALL...", end="")
+
+    config_path = os.path.join(args.basepath, "config.json")
+    if not os.path.isfile(config_path):
+        raise SystemExit(f"ERROR! invalid config path provided ({config_path})")
+
+    with open(config_path) as config_file:
+        config = json.load(config_file)
+
+    # perform error checking on config parameters
+    validate(config, args)
+
+    # parameters to be used in this file
+    semester = config["semester"]
+    course = config["course"]
+    gradeable = config["gradeable"]
+    version_mode = config["version"]
+    regex_patterns = config["regex"].split(',')
+    regex_dirs = config["regex_dirs"]
+    prior_term_gradeables = config["prior_term_gradeables"]
+    users_to_ignore = config["ignore_submissions"]
 
     # ==========================================================================
     # loop through and concatenate the selected files for each user in this gradeable
@@ -196,7 +214,7 @@ def main():
             my_concatenated_file = os.path.join(version_path, "submission.concatenated")
             with open(my_concatenated_file, "r+") as my_cf:
                 if my_cf.read() == "":
-                    my_cf.write("Error: No files matched provided regex in selected directories")
+                    my_cf.write("ERROR! No files matched provided regex in selected directories")
 
     # do the same for the other gradeables
     for other_gradeable in prior_term_gradeables:
@@ -210,7 +228,7 @@ def main():
                 my_concatenated_file = os.path.join(other_version_path, "submission.concatenated")
                 with open(my_concatenated_file, "r+") as my_cf:
                     if my_cf.read() == "":
-                        my_cf.write("Error: No files matched provided regex in"
+                        my_cf.write("ERROR! No files matched provided regex in"
                                     "selected directories")
 
     # ==========================================================================

diff --git a/bin/hash_all.py b/bin/hash_all.py
@@ -9,7 +9,6 @@
 import os
 import json
 import time
-import sys
 import hashlib
 
 
@@ -26,13 +25,6 @@ def hasher(lichen_config_data, my_tokenized_file, my_hashes_file):
     data_json_path = "./data.json"  # data.json is in the Lichen/bin directory after install
     with open(data_json_path) as token_data_file:
         token_data = json.load(token_data_file)
-        if language not in token_data:
-            print("\n\nERROR: UNKNOWN HASHER\n\n")
-            exit(1)
-
-    if (sequence_length < 1):
-        print("ERROR! sequence_length must be >= 1")
-        exit(1)
 
     with open(my_tokenized_file, 'r', encoding='ISO-8859-1') as my_tf:
         with open(my_hashes_file, 'w') as my_hf:
@@ -57,15 +49,13 @@ def main():
     with open(os.path.join(args.basepath, "config.json")) as lichen_config:
         lichen_config_data = json.load(lichen_config)
 
-    sys.stdout.write("HASH ALL...")
-    sys.stdout.flush()
+    print("HASH ALL...", end="")
 
     # ==========================================================================
     # walk the subdirectories of this gradeable
     users_dir = os.path.join(args.basepath, "users")
     if not os.path.isdir(users_dir):
-        print("Error: Unable to find users directory")
-        exit(1)
+        raise SystemExit("ERROR! Unable to find users directory")
 
     for user in sorted(os.listdir(users_dir)):
         user_dir = os.path.join(users_dir, user)
@@ -86,8 +76,7 @@ def main():
 
     other_gradeables_dir = os.path.join(args.basepath, "other_gradeables")
     if not os.path.isdir(other_gradeables_dir):
-        print("Error: Unable to find other gradeables directory")
-        exit(1)
+        raise SystemExit("ERROR! Unable to find other gradeables directory")
 
     for other_gradeable in sorted(os.listdir(other_gradeables_dir)):
         other_gradeable_dir = os.path.join(other_gradeables_dir, other_gradeable)

diff --git a/bin/tokenize_all.py b/bin/tokenize_all.py
@@ -7,7 +7,6 @@
 import os
 import json
 import time
-import sys
 
 
 def parse_args():
@@ -24,11 +23,7 @@ def tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file):
     data_json_path = "./data.json"  # data.json is in the Lichen/bin directory after install
     with open(data_json_path, 'r') as token_data_file:
         token_data = json.load(token_data_file)
-        if language not in token_data:
-            print("\n\nERROR: UNKNOWN TOKENIZER\n\n")
-            exit(1)
-        else:
-            language_token_data = token_data[language]
+        language_token_data = token_data[language]
 
     tokenizer = f"./{language_token_data['tokenizer']}"
 
@@ -46,8 +41,7 @@ def main():
     start_time = time.time()
     args = parse_args()
 
-    sys.stdout.write("TOKENIZE ALL...")
-    sys.stdout.flush()
+    print("TOKENIZE ALL...", end="")
 
     with open(os.path.join(args.basepath, "config.json")) as lichen_config:
         lichen_config_data = json.load(lichen_config)
@@ -56,8 +50,7 @@ def main():
     # walk the subdirectories to tokenize this gradeable's submissions
     users_dir = os.path.join(args.basepath, "users")
     if not os.path.isdir(users_dir):
-        print("Error: Unable to find users directory")
-        exit(1)
+        raise SystemExit("ERROR! Unable to find users directory")
 
     for user in sorted(os.listdir(users_dir)):
         user_dir = os.path.join(users_dir, user)
@@ -77,8 +70,7 @@ def main():
     # tokenize the other prior term gradeables' submissions
     other_gradeables_dir = os.path.join(args.basepath, "other_gradeables")
     if not os.path.isdir(other_gradeables_dir):
-        print("Error: Unable to find other gradeables directory")
-        exit(1)
+        raise SystemExit("ERROR! Unable to find other gradeables directory")
 
     for other_gradeable in sorted(os.listdir(other_gradeables_dir)):
         other_gradeable_dir = os.path.join(other_gradeables_dir, other_gradeable)

diff --git a/compare_hashes/compare_hashes.cpp b/compare_hashes/compare_hashes.cpp
@@ -192,9 +192,6 @@ int main(int argc, char* argv[]) {
   int sequence_length = config_file_json.value("sequence_length",1);
   int threshold = config_file_json.value("threshold",5);
 
-  assert (sequence_length >= 1);
-  assert (threshold >= 2);
-
   // error checking, confirm there are hashes to work with
   boost::filesystem::path users_root_directory = lichen_gradeable_path / "users";
   if (!boost::filesystem::exists(users_root_directory) ||