From 56c9c205f0e391eb29a8decfac433fedc55aa5f6 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Fri, 25 Jun 2021 08:19:25 -0400 Subject: [PATCH 01/52] Initial rewrite of process_all.sh --- bin/process_all.sh | 62 +++++++++++++++------------------------------- 1 file changed, 20 insertions(+), 42 deletions(-) diff --git a/bin/process_all.sh b/bin/process_all.sh index 832ada9..c01a3fc 100644 --- a/bin/process_all.sh +++ b/bin/process_all.sh @@ -1,47 +1,25 @@ -#!/bin/bash +# This script is the startup script for Lichen. It accepts a single path to a +# directory containing a config file and creates the necessary output directories +# as appropriate, relative to the provided path. It is possible to run this script +# from the command line but it is meant to be run via the Plagiarism Detection UI. -semester=$1 -course=$2 -gradeable=$3 +# TODO: Assert permissions, as necessary -prev_argument="" -prior_term_gradeables=() -ignore_submissions=() -for argument in "$@" -do - if [[ $argument == --* ]] - then - prev_argument=$argument - else - case $prev_argument in - "--language") - language=$argument - ;; - "--window") - window=$argument - ;; - "--threshold") - threshold=$argument - ;; - "--regrex") - regrex=$argument - ;; - "--provided_code_path") - provided_code_path=$argument - ;; - "--prior_term_gradeables") - prior_term_gradeables+=("$argument") - ;; - "--ignore_submissions") - ignore_submissions+=("$argument") - ;; - esac - fi -done +basepath=$1 # holds the path to a directory containing a config for this gradeable -/usr/local/submitty/Lichen/bin/concatenate_all.py $semester $course $gradeable -/usr/local/submitty/Lichen/bin/tokenize_all.py $semester $course $gradeable --${language} -/usr/local/submitty/Lichen/bin/hash_all.py $semester $course $gradeable --window $window --${language} +# kill the script if there is no config file +if [! -f "${basepath}/config.json" ]; then + echo "Unable to find config.json in provided directory" + exit 1 +fi -/usr/local/submitty/Lichen/bin/compare_hashes.out $semester $course $gradeable --window $window +# provided_code should already exist if the user wishes to run with provided code +mkdir -p "${basepath}/logs" +mkdir -p "${basepath}/other_gradeables" +mkdir -p "${basepath}/users" + +/usr/local/submitty/Lichen/bin/concatenate_all.py $basepath +#/usr/local/submitty/Lichen/bin/tokenize_all.py $basepath +#/usr/local/submitty/Lichen/bin/hash_all.py $basepath +#/usr/local/submitty/Lichen/bin/compare_hashes.out $basepath From 198790d01f0d527e75b04412d207aeb2a1e8814f Mon Sep 17 00:00:00 2001 From: williamjallen Date: Fri, 25 Jun 2021 09:40:41 -0400 Subject: [PATCH 02/52] Update process_all.sh --- bin/process_all.sh | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/bin/process_all.sh b/bin/process_all.sh index c01a3fc..24629ae 100644 --- a/bin/process_all.sh +++ b/bin/process_all.sh @@ -6,20 +6,24 @@ # TODO: Assert permissions, as necessary basepath=$1 # holds the path to a directory containing a config for this gradeable +datapath=$2 # holds the path to a directory conatining courses and their data + # (probably /var/local/submitty/courses on Submitty) # kill the script if there is no config file -if [! -f "${basepath}/config.json" ]; then +if [ ! -f "${basepath}/config.json" ]; then echo "Unable to find config.json in provided directory" exit 1 fi -# provided_code should already exist if the user wishes to run with provided code +# create these directories if they don't already exist mkdir -p "${basepath}/logs" +mkdir -p "${basepath}/provided_code" +mkdir -p "${basepath}/provided_code/files" mkdir -p "${basepath}/other_gradeables" mkdir -p "${basepath}/users" - -/usr/local/submitty/Lichen/bin/concatenate_all.py $basepath -#/usr/local/submitty/Lichen/bin/tokenize_all.py $basepath -#/usr/local/submitty/Lichen/bin/hash_all.py $basepath -#/usr/local/submitty/Lichen/bin/compare_hashes.out $basepath +# run all of the modules and exit if an error occurs +/usr/local/submitty/Lichen/bin/concatenate_all.py $basepath $datapath || exit 1 +#/usr/local/submitty/Lichen/bin/tokenize_all.py $basepath || exit 1 +#/usr/local/submitty/Lichen/bin/hash_all.py $basepath || exit 1 +#/usr/local/submitty/Lichen/bin/compare_hashes.out $basepath || exit 1 From 8737f557ba968d32b384f5080efa9667760a4320 Mon Sep 17 00:00:00 2001 From: sbelsk Date: Fri, 25 Jun 2021 10:54:45 -0400 Subject: [PATCH 03/52] Make modifications to file paths and add timers --- compare_hashes/compare_hashes.cpp | 98 ++++++++++++++++++------------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/compare_hashes/compare_hashes.cpp b/compare_hashes/compare_hashes.cpp index e15ef0c..b283e88 100644 --- a/compare_hashes/compare_hashes.cpp +++ b/compare_hashes/compare_hashes.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "boost/filesystem/operations.hpp" #include "boost/filesystem/path.hpp" @@ -157,17 +158,20 @@ bool ranking_sorter(const StudentRanking &a, const StudentRanking &b) { // =================================================================================== // =================================================================================== int main(int argc, char* argv[]) { - std::cout << "COMPARE HASHES..."; fflush(stdout); + time_t overall_start, overall_end; + time(&overall_start); // --------------------------------------------------------------------------- // deal with command line arguments assert (argc == 2); - std::string config_file = argv[1]; + std::string lichen_gradeable_path_str = argv[1]; + boost::filesystem::path lichen_gradeable_path = boost::filesystem::system_complete(lichen_gradeable_path_str); + boost::filesystem::path config_file_json_path = lichen_gradeable_path / "config.json"; - std::ifstream istr(config_file.c_str()); + std::ifstream istr(config_file_json_path.string()); assert (istr.good()); nlohmann::json config_file_json = nlohmann::json::parse(istr); @@ -181,17 +185,15 @@ int main(int argc, char* argv[]) { assert (threshold >= 2); // error checking, confirm there are hashes to work with - std::string tmp = "/var/local/submitty/courses/"+semester+"/"+course+"/lichen/hashes/"+gradeable; - boost::filesystem::path hashes_root_directory = boost::filesystem::system_complete(tmp); - if (!boost::filesystem::exists(hashes_root_directory) || - !boost::filesystem::is_directory(hashes_root_directory)) { - std::cerr << "ERROR with directory " << hashes_root_directory << std::endl; + boost::filesystem::path users_root_directory = lichen_gradeable_path / "users"; + if (!boost::filesystem::exists(users_root_directory) || + !boost::filesystem::is_directory(users_root_directory)) { + std::cerr << "ERROR with directory " << users_root_directory << std::endl; exit(0); } // the file path where we expect to find the hashed instructor provided code file - std::string tmp2 = "/var/local/submitty/courses/"+semester+"/"+course+"/lichen/hashes/"+gradeable+"/provided_code/provided_code/hashes.txt"; - boost::filesystem::path provided_code_file = boost::filesystem::system_complete(tmp2); + boost::filesystem::path provided_code_file = lichen_gradeable_path / "provided_code" / "hashes.txt"; // if file exists in that location, the provided code mode is enabled. bool provided_code_enabled = boost::filesystem::exists(provided_code_file); @@ -205,25 +207,26 @@ int main(int argc, char* argv[]) { // Stores all hashes from the instructor provided code std::unordered_set provided_code; + time_t start, end; + time(&start); + + if (provided_code_enabled) { + // load the instructor provided code's hashes + std::ifstream istr(provided_code_file.string()); + assert(istr.good()); + hash instructor_hash; + while (istr >> instructor_hash) { + provided_code.insert(instructor_hash); + } + } + // loop over all users boost::filesystem::directory_iterator end_iter; - for (boost::filesystem::directory_iterator dir_itr( hashes_root_directory ); dir_itr != end_iter; ++dir_itr) { + for (boost::filesystem::directory_iterator dir_itr( users_root_directory ); dir_itr != end_iter; ++dir_itr) { boost::filesystem::path username_path = dir_itr->path(); assert (is_directory(username_path)); std::string username = dir_itr->path().filename().string(); - if (username == "provided_code") { - assert(provided_code_enabled); - - // load the instructor provided code's hashes - std::ifstream istr(provided_code_file.string()); - hash instructor_hash; - while (istr >> instructor_hash) { - provided_code.insert(instructor_hash); - } - continue; - } - // loop over all versions for (boost::filesystem::directory_iterator username_itr( username_path ); username_itr != end_iter; ++username_itr) { boost::filesystem::path version_path = username_itr->path(); @@ -239,6 +242,7 @@ int main(int argc, char* argv[]) { boost::filesystem::path hash_file = version_path; hash_file /= "hashes.txt"; std::ifstream istr(hash_file.string()); + assert(istr.good()); hash input_hash; int location = 0; while (istr >> input_hash) { @@ -251,8 +255,9 @@ int main(int argc, char* argv[]) { } } - - std::cout << "finished loading" << std::endl; + time(&end); + double diff = difftime(end, start); + std::cout << "finished loading in " << diff << "s" << std::endl; // --------------------------------------------------------------------------- // THIS IS THE MAIN PLAGIARISM DETECTION ALGORITHM @@ -260,6 +265,7 @@ int main(int argc, char* argv[]) { // Used to calculate current progress (printed to the log) int my_counter = 0; int my_percent = 0; + time(&start); // walk over every Submission for (std::vector::iterator submission_itr = all_submissions.begin(); @@ -317,7 +323,9 @@ int main(int argc, char* argv[]) { } } - std::cout << "finished walking" << std::endl; + time(&end); + diff = difftime(end, start); + std::cout << "finished walking in " << diff << "s" << std::endl; // --------------------------------------------------------------------------- // Writing the output files and merging the results @@ -325,6 +333,7 @@ int main(int argc, char* argv[]) { my_counter = 0; my_percent = 0; std::cout << "writing matches files and merging regions..." << std::endl; + time(&start); // Loop over all of the submissions, writing a JSON file for each one if it has suspicious matches for (std::vector::iterator submission_itr = all_submissions.begin(); @@ -487,11 +496,10 @@ int main(int argc, char* argv[]) { // save the file with matches per user nlohmann::json match_data = result; - std::string matches_dir = "/var/local/submitty/courses/"+semester+"/"+course - +"/lichen/matches/"+gradeable+"/"+submission_itr->student()+"/"+std::to_string(submission_itr->version()); - boost::filesystem::create_directories(matches_dir); - std::string matches_file = matches_dir+"/matches.json"; - std::ofstream ostr(matches_file); + boost::filesystem::path submission_dir = users_root_directory / submission_itr->student() / std::to_string(submission_itr->version()); + boost::filesystem::create_directories(submission_dir); + boost::filesystem::path matches_file = submission_dir / "matches.json"; + std::ofstream ostr(matches_file.string()); assert(ostr.good()); ostr << match_data.dump(4) << std::endl; @@ -503,16 +511,19 @@ int main(int argc, char* argv[]) { } } - std::cout << "done merging and writing matches files" << std::endl; + + time(&end); + diff = difftime(end, start); + std::cout << "done merging and writing matches files in " << diff << "s" << std::endl; // --------------------------------------------------------------------------- // Create a general summary of rankings of users by percentage match + std::cout << "writing rakings files..." << std::endl; + time(&start); // create a single file of students ranked by highest percentage of code plagiarised - std::string ranking_dir = "/var/local/submitty/courses/"+semester+"/"+course+"/lichen/ranking/"+gradeable+"/"; - std::string ranking_file = ranking_dir+"overall_ranking.txt"; - boost::filesystem::create_directories(ranking_dir); - std::ofstream ranking_ostr(ranking_file); + boost::filesystem::path ranking_file = lichen_gradeable_path / "overall_ranking.txt"; + std::ofstream ranking_ostr(ranking_file.string()); // a map of students to a pair of the version and highest percent match for each student std::unordered_map > highest_matches; @@ -596,11 +607,10 @@ int main(int argc, char* argv[]) { std::sort(student_ranking.begin(), student_ranking.end(), ranking_sorter); // create the directory and a file to write into - std::string ranking_student_dir = "/var/local/submitty/courses/"+semester+"/"+course+"/lichen/ranking/" - +gradeable+"/"+submission_itr->student()+"/"+std::to_string(submission_itr->version())+"/"; - std::string ranking_student_file = ranking_student_dir+submission_itr->student()+"_"+std::to_string(submission_itr->version())+".txt"; + boost::filesystem::path ranking_student_dir = users_root_directory / submission_itr->student() / std::to_string(submission_itr->version()); + boost::filesystem::path ranking_student_file = ranking_student_dir / "ranking.txt"; boost::filesystem::create_directories(ranking_student_dir); - std::ofstream ranking_student_ostr(ranking_student_file); + std::ofstream ranking_student_ostr(ranking_student_file.string()); // finally, write the file of ranking for this submission for (unsigned int i = 0; i < student_ranking.size(); i++) { @@ -610,10 +620,14 @@ int main(int argc, char* argv[]) { << std::setw(3) << std::right << student_ranking[i].version << std::endl; } } - + time(&end); + diff = difftime(end, start); + std::cout << "finished writing rankings in " << diff << "s" << std::endl; // --------------------------------------------------------------------------- - std::cout << "done" << std::endl; + time(&overall_end); + double overall_diff = difftime(overall_end, overall_start); + std::cout << "DONE in " << overall_diff << "s" << std::endl; } From acf48422f0d02276f454eef33541a7533b2761ab Mon Sep 17 00:00:00 2001 From: williamjallen Date: Fri, 25 Jun 2021 10:57:26 -0400 Subject: [PATCH 04/52] Overhaul concatenate_all.py --- bin/concatenate_all.py | 183 ++++++++++++++++++----------------------- 1 file changed, 81 insertions(+), 102 deletions(-) diff --git a/bin/concatenate_all.py b/bin/concatenate_all.py index 3a1e556..eab7aa5 100644 --- a/bin/concatenate_all.py +++ b/bin/concatenate_all.py @@ -8,52 +8,73 @@ import os import json import sys -import shutil +import time import fnmatch -CONFIG_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'config') -with open(os.path.join(CONFIG_PATH, 'submitty.json')) as open_file: - OPEN_JSON = json.load(open_file) -SUBMITTY_DATA_DIR = OPEN_JSON['submitty_data_dir'] -SUBMITTY_INSTALL_DIR = OPEN_JSON['submitty_install_dir'] - IGNORED_FILES = [ ".submit.timestamp" ] +# returns a string containing the contents of the files which match the regex in the specified dir +def getConcatFilesInDir(input_dir, regex_patterns): + result = "" + for my_dir, _dirs, my_files in os.walk(input_dir): + # Determine if regex should be used (blank regex is equivalent to selecting all files) + files = sorted(my_files) + if regex_expressions[0] != "": + files_filtered = [] + for e in regex_patterns: + files_filtered.extend(fnmatch.filter(files, e.strip())) + files = files_filtered + + for my_file in files: + # exclude any files we have ignored for all submissions + if my_file in IGNORED_FILES: + continue + absolute_path = os.path.join(my_dir, my_file) + # print a separator & filename + with open(absolute_path, encoding='ISO-8859-1') as tmp: + result += f"=============== {my_file} ===============\n" + # append the contents of the file + result += tmp.read() + "\n" + return result + + def parse_args(): parser = argparse.ArgumentParser(description="") - parser.add_argument("config_path") + parser.add_argument("basepath") + parser.add_argument("datapath") return parser.parse_args() def main(): + start_time = time.time() args = parse_args() - sys.stdout.write("CONCATENATE ALL...") + sys.stdout.write("CONCATENATE ALL...") # don't want a newline here so can't use print sys.stdout.flush() - with open(args.config_path) as lichen_config: - lichen_config_data = json.load(lichen_config) - semester = lichen_config_data["semester"] - course = lichen_config_data["course"] - gradeable = lichen_config_data["gradeable"] - users_to_ignore = lichen_config_data["ignore_submissions"] + config_path = args.basepath + '/config.json' + if not os.path.isfile(config_path): + print(f"Error: invalid config path provided ({config_path})") + exit(1) - # this assumes regex is seperated by a ',' - regex_expressions = lichen_config_data["regex"].split(',') - regex_dirs = lichen_config_data["regex_dirs"] + with open(config_path) as config_file: + config = json.load(config_file) + + semester = config["semester"] + course = config["course"] + gradeable = config["gradeable"] + users_to_ignore = config["ignore_submissions"] + regex_patterns = config["regex"].split(',') + regex_dirs = config["regex_dirs"] # ========================================================================== - # error checking - course_dir = os.path.join(SUBMITTY_DATA_DIR, "courses", semester, course) - if not os.path.isdir(course_dir): - print("ERROR! ", course_dir, " is not a valid course directory") - exit(1) + # Error checking - for e in regex_expressions: - # Check for backwards crawling + # Check for backwards crawling + for e in regex_patterns: if ".." in e: print('ERROR! Invalid path component ".." in regex') exit(1) @@ -64,99 +85,57 @@ def main(): exit(1) # ========================================================================== - # create the directory - concatenated_dir = os.path.join(course_dir, "lichen", "concatenated", gradeable) - if not os.path.isdir(concatenated_dir): - os.makedirs(concatenated_dir) - - # ========================================================================== - count_total_files = 0 + # loop through and concatenate the selected files for each user in this gradeable for dir in regex_dirs: - submission_dir = os.path.join(course_dir, dir, gradeable) - - # more error checking - if not os.path.isdir(submission_dir): - print("ERROR! ", submission_dir, " is not a valid gradeable ", dir, " directory") - exit(1) - - # ========================================================================= - # walk the subdirectories - for user in sorted(os.listdir(submission_dir)): - if not os.path.isdir(os.path.join(submission_dir, user)): + gradeable_path = os.path.join(datapath, semester, course, dir, gradeable) + # loop over each user + for user in sorted(os.listdir(gradeable_path)): + user_path = os.path.join(gradeable_path, user) + if not os.path.isdir(user_path): continue elif user in users_to_ignore: continue - for version in sorted(os.listdir(os.path.join(submission_dir, user))): - if not os.path.isdir(os.path.join(submission_dir, user, version)): + + # loop over each version + for version in sorted(os.listdir(user_path)): + version_path = os.path.join(user_path, version) + if not os.path.isdir(version_path): continue - # ----------------------------------------------------------------- - # concatenate all files for this submissison into a single file - my_concatenated_dir = os.path.join(concatenated_dir, user, version) - if not os.path.isdir(my_concatenated_dir): - os.makedirs(my_concatenated_dir) - my_concatenated_file = os.path.join(my_concatenated_dir, "submission.concatenated") - - with open(my_concatenated_file, 'a') as my_cf: - # loop over all files in all subdirectories - base_path = os.path.join(submission_dir, user, version) - for my_dir, _dirs, my_files in os.walk(base_path): - # Determine if regex should be used (no regex provided - # is equivalent to selecting all files) - files = sorted(my_files) - if regex_expressions[0] != "": - files_filtered = [] - for e in regex_expressions: - files_filtered.extend(fnmatch.filter(files, e.strip())) - files = files_filtered - - for my_file in files: - # exclude any files we have ignored for all submissions - if my_file in IGNORED_FILES: - continue - absolute_path = os.path.join(my_dir, my_file) - # print a separator & filename - my_cf.write(f"=============== {my_file} ===============\n") - with open(absolute_path, encoding='ISO-8859-1') as tmp: - # append the contents of the file - my_cf.write(tmp.read()) - my_cf.write("\n") - count_total_files += 1 + output_file_path = os.path.join(args.basepath, user, + version, "submission.concatenated") + + if not os.path.exists(os.path.dirname(output_file_path)): + os.makedirs(os.path.dirname(output_file_path)) + + # append to concatenated file + with open(output_file_path, "a") as output_file: + concatenated_contents = getConcatFilesInDir(version_path, regex_patterns) + output_file.write(concatenated_contents) + + # ========================================================================== - # iterate over all of the created submissions, checking to see if they are + # iterate over all of the created submissions, checking to see if they are empty # and adding a message to the top if so (to differentiate empty files from errors in the UI) - for user in os.listdir(concatenated_dir): - for version in os.listdir(os.path.join(concatenated_dir, user)): - my_concatenated_file = os.path.join(concatenated_dir, - user, version, "submission.concatenated") + for user in os.listdir(os.path.join(args.basepath, "users")): + user_path = os.path.join(args.basepath, "users", user) + for version in os.listdir(user_path): + version_path = user_path = os.path.join(user_path, version) + my_concatenated_file = os.path.join(version_path, "submission.concatenated") with open(my_concatenated_file, "r+") as my_cf: if my_cf.read() == "": my_cf.write("Error: No files matched provided regex in selected directories") # ========================================================================== - # concatenate any files in the provided_code directory - provided_code_path = os.path.join(course_dir, "lichen", "provided_code", gradeable) - output_dir = os.path.join(course_dir, "lichen", "concatenated", - gradeable, "provided_code", "provided_code") - output_file = os.path.join(output_dir, "submission.concatenated") - - if os.path.isdir(provided_code_path) and len(os.listdir(provided_code_path)) != 0: - # If the directory already exists, delete it and make a new one - if os.path.isdir(output_dir): - shutil.rmtree(output_dir) - os.makedirs(output_dir) - - with open(output_file, 'w') as of: - # Loop over all of the provided files and concatenate them - for file in sorted(os.listdir(provided_code_path)): - with open(os.path.join(provided_code_path, file), encoding='ISO-8859-1') as tmp: - # append the contents of the file - of.write(tmp.read()) + # concatenate provided code + with open(os.path.join(args.basepath, "provided_code", + "submission.concatenated"), "w") as file: + file.write(getConcatFilesInDir(os.path.join(args.basepath, "provided_code", "files")), []) # ========================================================================== - print("done") - print(f"{count_total_files} files concatenated") + end_time = time.time() + print("done in " + str(end_time - start_time) + " seconds") if __name__ == "__main__": From 388ff4ebe7a77bda88c9959352fe76327f5f97eb Mon Sep 17 00:00:00 2001 From: sbelsk Date: Mon, 28 Jun 2021 13:02:33 -0400 Subject: [PATCH 05/52] Fix python errors --- bin/concatenate_all.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/concatenate_all.py b/bin/concatenate_all.py index eab7aa5..783e1ef 100644 --- a/bin/concatenate_all.py +++ b/bin/concatenate_all.py @@ -22,7 +22,7 @@ def getConcatFilesInDir(input_dir, regex_patterns): for my_dir, _dirs, my_files in os.walk(input_dir): # Determine if regex should be used (blank regex is equivalent to selecting all files) files = sorted(my_files) - if regex_expressions[0] != "": + if regex_patterns[0] != "": files_filtered = [] for e in regex_patterns: files_filtered.extend(fnmatch.filter(files, e.strip())) @@ -88,7 +88,7 @@ def main(): # loop through and concatenate the selected files for each user in this gradeable for dir in regex_dirs: - gradeable_path = os.path.join(datapath, semester, course, dir, gradeable) + gradeable_path = os.path.join(args.datapath, semester, course, dir, gradeable) # loop over each user for user in sorted(os.listdir(gradeable_path)): user_path = os.path.join(gradeable_path, user) @@ -114,7 +114,6 @@ def main(): concatenated_contents = getConcatFilesInDir(version_path, regex_patterns) output_file.write(concatenated_contents) - # ========================================================================== # iterate over all of the created submissions, checking to see if they are empty # and adding a message to the top if so (to differentiate empty files from errors in the UI) From 053f0f6a503161a73867cc36835246c795bd8b0c Mon Sep 17 00:00:00 2001 From: williamjallen Date: Tue, 29 Jun 2021 10:29:26 -0400 Subject: [PATCH 06/52] Progress: everything through tokenization finished --- bin/concatenate_all.py | 4 +- bin/process_all.sh | 18 +++++---- bin/tokenize_all.py | 67 ++++++++++--------------------- compare_hashes/compare_hashes.cpp | 2 +- 4 files changed, 35 insertions(+), 56 deletions(-) diff --git a/bin/concatenate_all.py b/bin/concatenate_all.py index 783e1ef..b289d21 100644 --- a/bin/concatenate_all.py +++ b/bin/concatenate_all.py @@ -103,7 +103,7 @@ def main(): if not os.path.isdir(version_path): continue - output_file_path = os.path.join(args.basepath, user, + output_file_path = os.path.join(args.basepath, "users", user, version, "submission.concatenated") if not os.path.exists(os.path.dirname(output_file_path)): @@ -130,7 +130,7 @@ def main(): # concatenate provided code with open(os.path.join(args.basepath, "provided_code", "submission.concatenated"), "w") as file: - file.write(getConcatFilesInDir(os.path.join(args.basepath, "provided_code", "files")), []) + file.write(getConcatFilesInDir(os.path.join(args.basepath, "provided_code", "files"), regex_patterns)) # ========================================================================== end_time = time.time() diff --git a/bin/process_all.sh b/bin/process_all.sh index 24629ae..d523d99 100644 --- a/bin/process_all.sh +++ b/bin/process_all.sh @@ -6,14 +6,16 @@ # TODO: Assert permissions, as necessary basepath=$1 # holds the path to a directory containing a config for this gradeable + # (probably .../lichen/gradeable// on Submitty) + datapath=$2 # holds the path to a directory conatining courses and their data # (probably /var/local/submitty/courses on Submitty) # kill the script if there is no config file -if [ ! -f "${basepath}/config.json" ]; then - echo "Unable to find config.json in provided directory" - exit 1 -fi +# if [ ! -f "${basepath}/config.json" ]; then +# echo "Unable to find config.json in provided directory" +# exit 1 +# fi # create these directories if they don't already exist mkdir -p "${basepath}/logs" @@ -23,7 +25,7 @@ mkdir -p "${basepath}/other_gradeables" mkdir -p "${basepath}/users" # run all of the modules and exit if an error occurs -/usr/local/submitty/Lichen/bin/concatenate_all.py $basepath $datapath || exit 1 -#/usr/local/submitty/Lichen/bin/tokenize_all.py $basepath || exit 1 -#/usr/local/submitty/Lichen/bin/hash_all.py $basepath || exit 1 -#/usr/local/submitty/Lichen/bin/compare_hashes.out $basepath || exit 1 +./concatenate_all.py "${basepath}" "${datapath}" || exit 1 +./tokenize_all.py $basepath || exit 1 +#hash_all.py $basepath || exit 1 +#compare_hashes.out $basepath || exit 1 diff --git a/bin/tokenize_all.py b/bin/tokenize_all.py index 632d4ef..8a74591 100644 --- a/bin/tokenize_all.py +++ b/bin/tokenize_all.py @@ -9,28 +9,18 @@ import sys -CONFIG_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'config') -with open(os.path.join(CONFIG_PATH, 'submitty.json')) as open_file: - OPEN_JSON = json.load(open_file) -SUBMITTY_DATA_DIR = OPEN_JSON['submitty_data_dir'] -SUBMITTY_INSTALL_DIR = OPEN_JSON['submitty_install_dir'] - - def parse_args(): parser = argparse.ArgumentParser(description="") - parser.add_argument("config_path") + parser.add_argument("basepath") return parser.parse_args() -def tokenize(args, my_concatenated_file, my_tokenized_file): - - with open(args.config_path) as lichen_config: - lichen_config_data = json.load(lichen_config) - language = lichen_config_data["language"] +def tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file): + language = lichen_config_data["language"] language_token_data = dict() - data_json_path = os.path.join(SUBMITTY_INSTALL_DIR, "Lichen", "bin", "data.json") + data_json_path = "./data.json" # data.json is in the Lichen/bin directory after install with open(data_json_path, 'r') as token_data_file: token_data = json.load(token_data_file) if language not in token_data: @@ -39,8 +29,7 @@ def tokenize(args, my_concatenated_file, my_tokenized_file): else: language_token_data = token_data[language] - tokenizer = os.path.join(SUBMITTY_INSTALL_DIR, "Lichen", "bin", - language_token_data["tokenizer"]) + tokenizer = f"./{language_token_data['tokenizer']}" if not language_token_data.get("input_as_argument"): my_concatenated_file = f'< {my_concatenated_file}' @@ -58,39 +47,27 @@ def main(): sys.stdout.write("TOKENIZE ALL...") sys.stdout.flush() - with open(args.config_path) as lichen_config: + with open(os.path.join(args.basepath, "config.json")) as lichen_config: lichen_config_data = json.load(lichen_config) - semester = lichen_config_data["semester"] - course = lichen_config_data["course"] - gradeable = lichen_config_data["gradeable"] - - # =========================================================================== - # error checking - course_dir = os.path.join(SUBMITTY_DATA_DIR, "courses", semester, course) - if not os.path.isdir(course_dir): - print("ERROR! ", course_dir, " is not a valid course directory") - exit(1) - concatenated_dir = os.path.join(course_dir, "lichen", "concatenated", gradeable) - if not os.path.isdir(concatenated_dir): - print("ERROR! ", concatenated_dir, " is not a valid gradeable concatenated directory") - exit(1) - - tokenized_dir = os.path.join(course_dir, "lichen", "tokenized", gradeable) # =========================================================================== # walk the subdirectories - for user in sorted(os.listdir(concatenated_dir)): - for version in sorted(os.listdir(os.path.join(concatenated_dir, user))): - my_concatenated_file = os.path.join(concatenated_dir, user, version, - "submission.concatenated") - - # ================================================================== - # create the directory - my_tokenized_dir = os.path.join(tokenized_dir, user, version) - if not os.path.isdir(my_tokenized_dir): - os.makedirs(my_tokenized_dir) - my_tokenized_file = os.path.join(my_tokenized_dir, "tokens.json") - tokenize(args, my_concatenated_file, my_tokenized_file) + users_dir = os.path.join(args.basepath, "users") + for user in sorted(os.listdir(users_dir)): + user_dir = os.path.join(users_dir, user) + if not os.path.isdir(user_dir): + continue + + for version in sorted(os.listdir(user_dir)): + my_dir = os.path.join(user_dir, version) + if not os.path.isdir(my_dir): + continue + + print(my_dir) + + my_concatenated_file = os.path.join(my_dir, "submission.concatenated") + my_tokenized_file = os.path.join(my_dir, "tokens.json") + tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file) print("done") diff --git a/compare_hashes/compare_hashes.cpp b/compare_hashes/compare_hashes.cpp index b283e88..bf7b813 100644 --- a/compare_hashes/compare_hashes.cpp +++ b/compare_hashes/compare_hashes.cpp @@ -628,6 +628,6 @@ int main(int argc, char* argv[]) { // --------------------------------------------------------------------------- time(&overall_end); double overall_diff = difftime(overall_end, overall_start); - std::cout << "DONE in " << overall_diff << "s" << std::endl; + std::cout << "done in " << overall_diff << "s" << std::endl; } From 9480b4759b5c5e68f71ed8241920b10f1b316313 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Tue, 29 Jun 2021 10:52:08 -0400 Subject: [PATCH 07/52] Everything works --- bin/hash_all.py | 65 ++++++++++++++++----------------------------- bin/process_all.sh | 4 +-- bin/tokenize_all.py | 6 +++-- 3 files changed, 29 insertions(+), 46 deletions(-) diff --git a/bin/hash_all.py b/bin/hash_all.py index 3128cd8..1c5dac5 100644 --- a/bin/hash_all.py +++ b/bin/hash_all.py @@ -11,27 +11,18 @@ import sys import hashlib -CONFIG_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'config') -with open(os.path.join(CONFIG_PATH, 'submitty.json')) as open_file: - OPEN_JSON = json.load(open_file) -SUBMITTY_DATA_DIR = OPEN_JSON['submitty_data_dir'] -SUBMITTY_INSTALL_DIR = OPEN_JSON['submitty_install_dir'] - def parse_args(): parser = argparse.ArgumentParser(description="") - parser.add_argument("config_path") - args = parser.parse_args() - return args + parser.add_argument("basepath") + return parser.parse_args() -def hasher(args, my_tokenized_file, my_hashes_file): - with open(args.config_path) as lichen_config: - lichen_config_data = json.load(lichen_config) - language = lichen_config_data["language"] - sequence_length = int(lichen_config_data["sequence_length"]) +def hasher(lichen_config_data, my_tokenized_file, my_hashes_file): + language = lichen_config_data["language"] + sequence_length = int(lichen_config_data["sequence_length"]) - data_json_path = os.path.join(SUBMITTY_INSTALL_DIR, "Lichen", "bin", "data.json") + data_json_path = "./data.json" # data.json is in the Lichen/bin directory after install with open(data_json_path) as token_data_file: token_data = json.load(token_data_file) if language not in token_data: @@ -59,42 +50,32 @@ def hasher(args, my_tokenized_file, my_hashes_file): def main(): args = parse_args() - with open(args.config_path) as lichen_config: + with open(os.path.join(args.basepath, "config.json")) as lichen_config: lichen_config_data = json.load(lichen_config) - semester = lichen_config_data["semester"] - course = lichen_config_data["course"] - gradeable = lichen_config_data["gradeable"] sys.stdout.write("HASH ALL...") sys.stdout.flush() # ========================================================================= - # error checking - course_dir = os.path.join(SUBMITTY_DATA_DIR, "courses", semester, course) - if not os.path.isdir(course_dir): - print("ERROR! ", course_dir, " is not a valid course directory") - exit(1) - tokenized_dir = os.path.join(course_dir, "lichen", "tokenized", gradeable) - if not os.path.isdir(tokenized_dir): - print("ERROR! ", tokenized_dir, " is not a valid gradeable tokenized directory") + # walk the subdirectories + users_dir = os.path.join(args.basepath, "users") + if not os.path.isdir(users_dir): + print("Error: Unable to find users directory") exit(1) - hashes_dir = os.path.join(course_dir, "lichen", "hashes", gradeable) + for user in sorted(os.listdir(users_dir)): + user_dir = os.path.join(users_dir, user) + if not os.path.isdir(user_dir): + continue - # ========================================================================= - # walk the subdirectories - for user in sorted(os.listdir(tokenized_dir)): - for version in sorted(os.listdir(os.path.join(tokenized_dir, user))): - my_tokenized_file = os.path.join(tokenized_dir, user, version, "tokens.json") - - # ================================================================= - # create the directory - my_hashes_dir = os.path.join(hashes_dir, user, version) - if not os.path.isdir(my_hashes_dir): - os.makedirs(my_hashes_dir) - - my_hashes_file = os.path.join(my_hashes_dir, "hashes.txt") - hasher(args, my_tokenized_file, my_hashes_file) + for version in sorted(os.listdir(user_dir)): + my_dir = os.path.join(user_dir, version) + if not os.path.isdir(my_dir): + continue + + my_tokenized_file = os.path.join(my_dir, "tokens.json") + my_hashes_file = os.path.join(my_dir, "hashes.txt") + hasher(lichen_config_data, my_tokenized_file, my_hashes_file) print("done") diff --git a/bin/process_all.sh b/bin/process_all.sh index d523d99..d7cbb7e 100644 --- a/bin/process_all.sh +++ b/bin/process_all.sh @@ -27,5 +27,5 @@ mkdir -p "${basepath}/users" # run all of the modules and exit if an error occurs ./concatenate_all.py "${basepath}" "${datapath}" || exit 1 ./tokenize_all.py $basepath || exit 1 -#hash_all.py $basepath || exit 1 -#compare_hashes.out $basepath || exit 1 +./hash_all.py $basepath || exit 1 +./compare_hashes.out $basepath || exit 1 diff --git a/bin/tokenize_all.py b/bin/tokenize_all.py index 8a74591..00eb100 100644 --- a/bin/tokenize_all.py +++ b/bin/tokenize_all.py @@ -53,6 +53,10 @@ def main(): # =========================================================================== # walk the subdirectories users_dir = os.path.join(args.basepath, "users") + if not os.path.isdir(users_dir): + print("Error: Unable to find users directory") + exit(1) + for user in sorted(os.listdir(users_dir)): user_dir = os.path.join(users_dir, user) if not os.path.isdir(user_dir): @@ -63,8 +67,6 @@ def main(): if not os.path.isdir(my_dir): continue - print(my_dir) - my_concatenated_file = os.path.join(my_dir, "submission.concatenated") my_tokenized_file = os.path.join(my_dir, "tokens.json") tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file) From 0938e76b26f53f516e6f7a2a6c4a871ffbeabf09 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Tue, 29 Jun 2021 11:19:27 -0400 Subject: [PATCH 08/52] Add timers --- bin/concatenate_all.py | 2 +- bin/hash_all.py | 6 +++++- bin/process_all.sh | 14 ++++++++++---- bin/tokenize_all.py | 6 +++++- compare_hashes/compare_hashes.cpp | 10 +++++----- 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/bin/concatenate_all.py b/bin/concatenate_all.py index b289d21..d140d58 100644 --- a/bin/concatenate_all.py +++ b/bin/concatenate_all.py @@ -134,7 +134,7 @@ def main(): # ========================================================================== end_time = time.time() - print("done in " + str(end_time - start_time) + " seconds") + print("done in " + "%.0f" % (end_time - start_time) + " seconds") if __name__ == "__main__": diff --git a/bin/hash_all.py b/bin/hash_all.py index 1c5dac5..24012d4 100644 --- a/bin/hash_all.py +++ b/bin/hash_all.py @@ -8,6 +8,7 @@ import argparse import os import json +import time import sys import hashlib @@ -48,6 +49,7 @@ def hasher(lichen_config_data, my_tokenized_file, my_hashes_file): def main(): + start_time = time.time() args = parse_args() with open(os.path.join(args.basepath, "config.json")) as lichen_config: @@ -77,7 +79,9 @@ def main(): my_hashes_file = os.path.join(my_dir, "hashes.txt") hasher(lichen_config_data, my_tokenized_file, my_hashes_file) - print("done") + # ========================================================================== + end_time = time.time() + print("done in " + "%.0f" % (end_time - start_time) + " seconds") if __name__ == "__main__": diff --git a/bin/process_all.sh b/bin/process_all.sh index d7cbb7e..ce74cf9 100644 --- a/bin/process_all.sh +++ b/bin/process_all.sh @@ -12,10 +12,16 @@ datapath=$2 # holds the path to a directory conatining courses and their data # (probably /var/local/submitty/courses on Submitty) # kill the script if there is no config file -# if [ ! -f "${basepath}/config.json" ]; then -# echo "Unable to find config.json in provided directory" -# exit 1 -# fi +if [ ! -f "${basepath}/config.json" ]; then + echo "Unable to find config.json in provided directory" + exit 1 +fi + +# delete any previous run results +# TODO: determine if any caching should occur +rm -rf "${basepath}/logs" +rm -rf "${basepath}/other_gradeables" +rm -rf "${basepath}/users" # create these directories if they don't already exist mkdir -p "${basepath}/logs" diff --git a/bin/tokenize_all.py b/bin/tokenize_all.py index 00eb100..d00b3cf 100644 --- a/bin/tokenize_all.py +++ b/bin/tokenize_all.py @@ -6,6 +6,7 @@ import argparse import os import json +import time import sys @@ -42,6 +43,7 @@ def tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file): def main(): + start_time = time.time() args = parse_args() sys.stdout.write("TOKENIZE ALL...") @@ -71,7 +73,9 @@ def main(): my_tokenized_file = os.path.join(my_dir, "tokens.json") tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file) - print("done") + # ========================================================================== + end_time = time.time() + print("done in " + "%.0f" % (end_time - start_time) + " seconds") if __name__ == "__main__": diff --git a/compare_hashes/compare_hashes.cpp b/compare_hashes/compare_hashes.cpp index bf7b813..b362069 100644 --- a/compare_hashes/compare_hashes.cpp +++ b/compare_hashes/compare_hashes.cpp @@ -257,7 +257,7 @@ int main(int argc, char* argv[]) { time(&end); double diff = difftime(end, start); - std::cout << "finished loading in " << diff << "s" << std::endl; + std::cout << "finished loading in " << diff << " seconds" << std::endl; // --------------------------------------------------------------------------- // THIS IS THE MAIN PLAGIARISM DETECTION ALGORITHM @@ -325,7 +325,7 @@ int main(int argc, char* argv[]) { time(&end); diff = difftime(end, start); - std::cout << "finished walking in " << diff << "s" << std::endl; + std::cout << "finished walking in " << diff << " seconds" << std::endl; // --------------------------------------------------------------------------- // Writing the output files and merging the results @@ -514,7 +514,7 @@ int main(int argc, char* argv[]) { time(&end); diff = difftime(end, start); - std::cout << "done merging and writing matches files in " << diff << "s" << std::endl; + std::cout << "done merging and writing matches files in " << diff << " seconds" << std::endl; // --------------------------------------------------------------------------- // Create a general summary of rankings of users by percentage match @@ -623,11 +623,11 @@ int main(int argc, char* argv[]) { time(&end); diff = difftime(end, start); - std::cout << "finished writing rankings in " << diff << "s" << std::endl; + std::cout << "finished writing rankings in " << diff << " seconds" << std::endl; // --------------------------------------------------------------------------- time(&overall_end); double overall_diff = difftime(overall_end, overall_start); - std::cout << "done in " << overall_diff << "s" << std::endl; + std::cout << "done in " << overall_diff << " seconds" << std::endl; } From 563642a857a49ec200c59cccc15e955ffea7dbb5 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Tue, 29 Jun 2021 12:22:37 -0400 Subject: [PATCH 09/52] remove unnecessary code --- bin/process_all.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/process_all.sh b/bin/process_all.sh index ce74cf9..a6479a7 100644 --- a/bin/process_all.sh +++ b/bin/process_all.sh @@ -31,7 +31,7 @@ mkdir -p "${basepath}/other_gradeables" mkdir -p "${basepath}/users" # run all of the modules and exit if an error occurs -./concatenate_all.py "${basepath}" "${datapath}" || exit 1 +./concatenate_all.py $basepath $datapath || exit 1 ./tokenize_all.py $basepath || exit 1 ./hash_all.py $basepath || exit 1 ./compare_hashes.out $basepath || exit 1 From 210a778b03086916c8a37df30b6d7aa163f46797 Mon Sep 17 00:00:00 2001 From: sbelsk Date: Wed, 30 Jun 2021 08:23:54 -0400 Subject: [PATCH 10/52] little python changes --- bin/concatenate_all.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/concatenate_all.py b/bin/concatenate_all.py index d140d58..52403da 100644 --- a/bin/concatenate_all.py +++ b/bin/concatenate_all.py @@ -55,7 +55,7 @@ def main(): sys.stdout.write("CONCATENATE ALL...") # don't want a newline here so can't use print sys.stdout.flush() - config_path = args.basepath + '/config.json' + config_path = os.path.join(args.basepath, "config.json") if not os.path.isfile(config_path): print(f"Error: invalid config path provided ({config_path})") exit(1) @@ -130,7 +130,8 @@ def main(): # concatenate provided code with open(os.path.join(args.basepath, "provided_code", "submission.concatenated"), "w") as file: - file.write(getConcatFilesInDir(os.path.join(args.basepath, "provided_code", "files"), regex_patterns)) + provided_code_files = os.path.join(args.basepath, "provided_code", "files") + file.write(getConcatFilesInDir(provided_code_files, regex_patterns)) # ========================================================================== end_time = time.time() From 473ff7bd215f91c11ffeb1ac1e30bea3c8b71605 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Wed, 30 Jun 2021 08:31:43 -0400 Subject: [PATCH 11/52] William made an oopsie (forgot to deal with provided code) --- bin/hash_all.py | 6 ++++++ bin/process_all.sh | 4 ++++ bin/tokenize_all.py | 6 ++++++ 3 files changed, 16 insertions(+) diff --git a/bin/hash_all.py b/bin/hash_all.py index 24012d4..2e6e544 100644 --- a/bin/hash_all.py +++ b/bin/hash_all.py @@ -79,6 +79,12 @@ def main(): my_hashes_file = os.path.join(my_dir, "hashes.txt") hasher(lichen_config_data, my_tokenized_file, my_hashes_file) + # =========================================================================== + # hash the provided code + provided_code_tokenized = os.path.join(args.basepath, "provided_code", "tokens.json") + provided_code_hashed = os.path.join(args.basepath, "provided_code", "hashes.txt") + hasher(lichen_config_data, provided_code_tokenized, provided_code_hashed) + # ========================================================================== end_time = time.time() print("done in " + "%.0f" % (end_time - start_time) + " seconds") diff --git a/bin/process_all.sh b/bin/process_all.sh index a6479a7..2e01f5b 100644 --- a/bin/process_all.sh +++ b/bin/process_all.sh @@ -22,6 +22,10 @@ fi rm -rf "${basepath}/logs" rm -rf "${basepath}/other_gradeables" rm -rf "${basepath}/users" +rm "${basepath}/overall_ranking.txt" +rm "${basepath}/provided_code/submission.concatenated" +rm "${basepath}/provided_code/tokens.json" +rm "${basepath}/provided_code/hashes.txt" # create these directories if they don't already exist mkdir -p "${basepath}/logs" diff --git a/bin/tokenize_all.py b/bin/tokenize_all.py index d00b3cf..6dac19f 100644 --- a/bin/tokenize_all.py +++ b/bin/tokenize_all.py @@ -73,6 +73,12 @@ def main(): my_tokenized_file = os.path.join(my_dir, "tokens.json") tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file) + # =========================================================================== + # tokenize the provided code + provided_code_concat = os.path.join(args.basepath, "provided_code", "submission.concatenated") + provided_code_tokenized = os.path.join(args.basepath, "provided_code", "tokens.json") + tokenize(lichen_config_data, provided_code_concat, provided_code_tokenized) + # ========================================================================== end_time = time.time() print("done in " + "%.0f" % (end_time - start_time) + " seconds") From 00675a3917226263a9a27629a86a8e4e1181fffb Mon Sep 17 00:00:00 2001 From: williamjallen Date: Thu, 1 Jul 2021 16:17:55 -0400 Subject: [PATCH 12/52] Fix minor bugs Fix process_all.sh script plus fix spelling issue and prevent hash_all.py from breaking when empty tokenized files are written --- bin/hash_all.py | 19 +++++++++++-------- bin/process_all.sh | 23 +++++++++++++++-------- compare_hashes/compare_hashes.cpp | 2 +- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/bin/hash_all.py b/bin/hash_all.py index 2e6e544..571cdf4 100644 --- a/bin/hash_all.py +++ b/bin/hash_all.py @@ -37,15 +37,17 @@ def hasher(lichen_config_data, my_tokenized_file, my_hashes_file): with open(my_tokenized_file, 'r', encoding='ISO-8859-1') as my_tf: with open(my_hashes_file, 'w') as my_hf: tokens = json.load(my_tf) - token_values = [str(x.get(token_data[language]["token_value"])) - for x in tokens] - num = len(tokens) - # FIXME: this truncation should be adjusted after testing - token_hashed_values = [(hashlib.md5(''.join( - token_values[x:x+sequence_length]).encode()) - .hexdigest())[0:8] for x in range(0, num-sequence_length+1)] + # write empty hashes file if the tokens file was empty (such as + # when there is no provided code) + if tokens is not None: + token_values = [str(x[token_data[language]["token_value"]]) for x in tokens] + num = len(tokens) + # FIXME: this truncation should be adjusted after testing + token_hashed_values = [(hashlib.md5(''.join( + token_values[x:x+sequence_length]).encode()) + .hexdigest())[0:8] for x in range(0, num-sequence_length+1)] - my_hf.write('\n'.join(token_hashed_values)) + my_hf.write('\n'.join(token_hashed_values)) def main(): @@ -75,6 +77,7 @@ def main(): if not os.path.isdir(my_dir): continue + print(my_dir) my_tokenized_file = os.path.join(my_dir, "tokens.json") my_hashes_file = os.path.join(my_dir, "hashes.txt") hasher(lichen_config_data, my_tokenized_file, my_hashes_file) diff --git a/bin/process_all.sh b/bin/process_all.sh index 2e01f5b..1a08c4d 100644 --- a/bin/process_all.sh +++ b/bin/process_all.sh @@ -1,3 +1,5 @@ +#!/bin/sh + # This script is the startup script for Lichen. It accepts a single path to a # directory containing a config file and creates the necessary output directories # as appropriate, relative to the provided path. It is possible to run this script @@ -22,10 +24,10 @@ fi rm -rf "${basepath}/logs" rm -rf "${basepath}/other_gradeables" rm -rf "${basepath}/users" -rm "${basepath}/overall_ranking.txt" -rm "${basepath}/provided_code/submission.concatenated" -rm "${basepath}/provided_code/tokens.json" -rm "${basepath}/provided_code/hashes.txt" +rm -f "${basepath}/overall_ranking.txt" +rm -f "${basepath}/provided_code/submission.concatenated" +rm -f "${basepath}/provided_code/tokens.json" +rm -f "${basepath}/provided_code/hashes.txt" # create these directories if they don't already exist mkdir -p "${basepath}/logs" @@ -34,8 +36,13 @@ mkdir -p "${basepath}/provided_code/files" mkdir -p "${basepath}/other_gradeables" mkdir -p "${basepath}/users" +log_file="${basepath}/logs/lichen_job_output.txt" + +cd $(dirname "${0}") + # run all of the modules and exit if an error occurs -./concatenate_all.py $basepath $datapath || exit 1 -./tokenize_all.py $basepath || exit 1 -./hash_all.py $basepath || exit 1 -./compare_hashes.out $basepath || exit 1 +echo "Beginning Lichen run: $(date +"%Y-%m-%d %H:%M:%S")" >> $log_file 2>&1 +./concatenate_all.py $basepath $datapath >> $log_file 2>&1 || exit 1 +./tokenize_all.py $basepath >> $log_file 2>&1 || exit 1 +./hash_all.py $basepath >> $log_file 2>&1 || exit 1 +./compare_hashes.out $basepath >> $log_file 2>&1 || exit 1 diff --git a/compare_hashes/compare_hashes.cpp b/compare_hashes/compare_hashes.cpp index b362069..ab9fa2c 100644 --- a/compare_hashes/compare_hashes.cpp +++ b/compare_hashes/compare_hashes.cpp @@ -518,7 +518,7 @@ int main(int argc, char* argv[]) { // --------------------------------------------------------------------------- // Create a general summary of rankings of users by percentage match - std::cout << "writing rakings files..." << std::endl; + std::cout << "writing rankings files..." << std::endl; time(&start); // create a single file of students ranked by highest percentage of code plagiarised From 8a5db9d21c72d7c9578dcd14131491c2740a5e3a Mon Sep 17 00:00:00 2001 From: williamjallen Date: Fri, 2 Jul 2021 16:13:40 -0400 Subject: [PATCH 13/52] Fix permissions issue with provided code editing --- bin/process_all.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/process_all.sh b/bin/process_all.sh index 1a08c4d..6b958bf 100644 --- a/bin/process_all.sh +++ b/bin/process_all.sh @@ -36,6 +36,9 @@ mkdir -p "${basepath}/provided_code/files" mkdir -p "${basepath}/other_gradeables" mkdir -p "${basepath}/users" +# the default is r-x and we need PHP to be able to write if edits are made to the provided code +chmod g=rwxs "${basepath}/provided_code/files" + log_file="${basepath}/logs/lichen_job_output.txt" cd $(dirname "${0}") From f7abb099c6ad81c62c145ebdb86995c5bd09beff Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 09:44:29 -0400 Subject: [PATCH 14/52] Add initial script --- .github/workflows/lichen_run.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/workflows/lichen_run.yml diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml new file mode 100644 index 0000000..6084780 --- /dev/null +++ b/.github/workflows/lichen_run.yml @@ -0,0 +1,17 @@ +name: Test Lichen + +on: [push] + +jobs: + python-lint: + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: '3.6' + - name: Create Directory Structure + run: | + mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ + mkdir -p /usr/local/submitty/Lichen/ + ls From 3ba16d23a0b7389e54c97a10ef76b260fbf86828 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 09:46:39 -0400 Subject: [PATCH 15/52] Update lichen_run.yml --- .github/workflows/lichen_run.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 6084780..2459f2b 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -3,7 +3,7 @@ name: Test Lichen on: [push] jobs: - python-lint: + Test Lichen: runs-on: ubuntu-18.04 steps: - uses: actions/checkout@v2 @@ -12,6 +12,6 @@ jobs: python-version: '3.6' - name: Create Directory Structure run: | - mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ - mkdir -p /usr/local/submitty/Lichen/ + sudo mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ + sudo mkdir -p /usr/local/submitty/Lichen/ ls From e0ac0dac9bcbe03c6acd80c54b227091034043e9 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 09:47:30 -0400 Subject: [PATCH 16/52] Update lichen_run.yml --- .github/workflows/lichen_run.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 2459f2b..8086f63 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -3,7 +3,7 @@ name: Test Lichen on: [push] jobs: - Test Lichen: + test-lichen: runs-on: ubuntu-18.04 steps: - uses: actions/checkout@v2 From ddbd29f7b66397ee11389c23b4a2dd573dcf27a6 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 09:51:12 -0400 Subject: [PATCH 17/52] Update lichen_run.yml --- .github/workflows/lichen_run.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 8086f63..22ffe60 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -14,4 +14,6 @@ jobs: run: | sudo mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ sudo mkdir -p /usr/local/submitty/Lichen/ - ls + cp * /usr/local/submitty/GIT_CHECKOUT/Lichen/ + bash /usr/local/submitty/GIT_CHECKOUT/Lichen/install_lichen.sh + ls /usr/local/submitty/Lichen/ From 6c3aadd1f2f49c2d49c029643aaff76f9513bb9f Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 09:52:22 -0400 Subject: [PATCH 18/52] Update lichen_run.yml --- .github/workflows/lichen_run.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 22ffe60..2fd50f7 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -14,6 +14,6 @@ jobs: run: | sudo mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ sudo mkdir -p /usr/local/submitty/Lichen/ - cp * /usr/local/submitty/GIT_CHECKOUT/Lichen/ - bash /usr/local/submitty/GIT_CHECKOUT/Lichen/install_lichen.sh - ls /usr/local/submitty/Lichen/ + sudo cp * /usr/local/submitty/GIT_CHECKOUT/Lichen/ + sudo bash /usr/local/submitty/GIT_CHECKOUT/Lichen/install_lichen.sh + sudo ls /usr/local/submitty/Lichen/ From d644fde6aa7bbe742c81d89375d2809a3cd108ab Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 09:53:21 -0400 Subject: [PATCH 19/52] Update lichen_run.yml --- .github/workflows/lichen_run.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 2fd50f7..c79a432 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -14,6 +14,6 @@ jobs: run: | sudo mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ sudo mkdir -p /usr/local/submitty/Lichen/ - sudo cp * /usr/local/submitty/GIT_CHECKOUT/Lichen/ + sudo cp -r * /usr/local/submitty/GIT_CHECKOUT/Lichen/ sudo bash /usr/local/submitty/GIT_CHECKOUT/Lichen/install_lichen.sh sudo ls /usr/local/submitty/Lichen/ From 67566b0020135da1604ba2f7c85daf33fdba5dfe Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 09:56:02 -0400 Subject: [PATCH 20/52] add boost --- .github/workflows/lichen_run.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index c79a432..7e2ea2b 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -10,6 +10,9 @@ jobs: - uses: actions/setup-python@v2 with: python-version: '3.6' + - name: Install Dependencies + run: | + sudo apt install libboost-all-dev - name: Create Directory Structure run: | sudo mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ From 32ff9869af4217bdf0da69aec840dfa366586e90 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 10:03:49 -0400 Subject: [PATCH 21/52] add testing file --- .github/workflows/lichen_run.yml | 4 +++- tests/tests.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tests/tests.py diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 7e2ea2b..f2e48d6 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -19,4 +19,6 @@ jobs: sudo mkdir -p /usr/local/submitty/Lichen/ sudo cp -r * /usr/local/submitty/GIT_CHECKOUT/Lichen/ sudo bash /usr/local/submitty/GIT_CHECKOUT/Lichen/install_lichen.sh - sudo ls /usr/local/submitty/Lichen/ + - name: Run Tests + run: | + python3 ./tests.py diff --git a/tests/tests.py b/tests/tests.py new file mode 100644 index 0000000..e615ac7 --- /dev/null +++ b/tests/tests.py @@ -0,0 +1,11 @@ +import unittest + + +class TestTokenizers(unittest.TestCase): + + def test(self): + print('test!') + + +if __name__ == '__main__': + unittest.main() From be1a57fa28b7252e127854227b380cee06e0c625 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 10:05:03 -0400 Subject: [PATCH 22/52] forgot that paths are important --- .github/workflows/lichen_run.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index f2e48d6..9182126 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -21,4 +21,4 @@ jobs: sudo bash /usr/local/submitty/GIT_CHECKOUT/Lichen/install_lichen.sh - name: Run Tests run: | - python3 ./tests.py + python3 ./tests/tests.py From 4b01ead4c13d6790afedeadcc8b39e0d607d593a Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 10:16:39 -0400 Subject: [PATCH 23/52] Make separate setup.sh script --- .github/workflows/lichen_run.yml | 5 +---- tests/setup.sh | 8 ++++++++ tests/tests.py | 2 +- 3 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 tests/setup.sh diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 9182126..ca8bdf1 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -15,10 +15,7 @@ jobs: sudo apt install libboost-all-dev - name: Create Directory Structure run: | - sudo mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ - sudo mkdir -p /usr/local/submitty/Lichen/ - sudo cp -r * /usr/local/submitty/GIT_CHECKOUT/Lichen/ - sudo bash /usr/local/submitty/GIT_CHECKOUT/Lichen/install_lichen.sh + sudo ./tests/setup.sh - name: Run Tests run: | python3 ./tests/tests.py diff --git a/tests/setup.sh b/tests/setup.sh new file mode 100644 index 0000000..b226963 --- /dev/null +++ b/tests/setup.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ +cp -r * /usr/local/submitty/GIT_CHECKOUT/Lichen/ + +mkdir -p /usr/local/submitty/Lichen/ + +bash /usr/local/submitty/GIT_CHECKOUT/Lichen/install_lichen.sh diff --git a/tests/tests.py b/tests/tests.py index e615ac7..fa41488 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -3,7 +3,7 @@ class TestTokenizers(unittest.TestCase): - def test(self): + def testPlaintextTokenizer(self): print('test!') From 2b15816ec88633055e8b204d5c4829b899bfc601 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 10:18:17 -0400 Subject: [PATCH 24/52] Update lichen_run.yml --- .github/workflows/lichen_run.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index ca8bdf1..e8093ca 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -15,7 +15,7 @@ jobs: sudo apt install libboost-all-dev - name: Create Directory Structure run: | - sudo ./tests/setup.sh + sudo bash ./tests/setup.sh - name: Run Tests run: | python3 ./tests/tests.py From 0595eb396d3dd415227abd9f2ff0063069aaba93 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 11:31:07 -0400 Subject: [PATCH 25/52] Adjust file structure, add setup script --- .flake8 | 1 + .../plaintext/expected_output/output.json | 0 .../output_ignore_everything.json | 0 .../output_ignore_newlines.json | 0 .../output_ignore_punctuation.json | 0 .../expected_output/output_to_lower.json | 0 .../data/tokenizer}/plaintext/input.txt | 0 tests/setup.sh | 23 +++++++++++++++---- tests/tests.py | 18 +++++++++++++-- 9 files changed, 36 insertions(+), 6 deletions(-) rename {tokenizer => tests/data/tokenizer}/plaintext/expected_output/output.json (100%) rename {tokenizer => tests/data/tokenizer}/plaintext/expected_output/output_ignore_everything.json (100%) rename {tokenizer => tests/data/tokenizer}/plaintext/expected_output/output_ignore_newlines.json (100%) rename {tokenizer => tests/data/tokenizer}/plaintext/expected_output/output_ignore_punctuation.json (100%) rename {tokenizer => tests/data/tokenizer}/plaintext/expected_output/output_to_lower.json (100%) rename {tokenizer => tests/data/tokenizer}/plaintext/input.txt (100%) diff --git a/.flake8 b/.flake8 index 5d69a55..33a5336 100644 --- a/.flake8 +++ b/.flake8 @@ -5,3 +5,4 @@ exclude= per-file-ignores = tokenizer/mips/mips_tokenizer.py:W605 + tests/tests.py:E501 diff --git a/tokenizer/plaintext/expected_output/output.json b/tests/data/tokenizer/plaintext/expected_output/output.json similarity index 100% rename from tokenizer/plaintext/expected_output/output.json rename to tests/data/tokenizer/plaintext/expected_output/output.json diff --git a/tokenizer/plaintext/expected_output/output_ignore_everything.json b/tests/data/tokenizer/plaintext/expected_output/output_ignore_everything.json similarity index 100% rename from tokenizer/plaintext/expected_output/output_ignore_everything.json rename to tests/data/tokenizer/plaintext/expected_output/output_ignore_everything.json diff --git a/tokenizer/plaintext/expected_output/output_ignore_newlines.json b/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json similarity index 100% rename from tokenizer/plaintext/expected_output/output_ignore_newlines.json rename to tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json diff --git a/tokenizer/plaintext/expected_output/output_ignore_punctuation.json b/tests/data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json similarity index 100% rename from tokenizer/plaintext/expected_output/output_ignore_punctuation.json rename to tests/data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json diff --git a/tokenizer/plaintext/expected_output/output_to_lower.json b/tests/data/tokenizer/plaintext/expected_output/output_to_lower.json similarity index 100% rename from tokenizer/plaintext/expected_output/output_to_lower.json rename to tests/data/tokenizer/plaintext/expected_output/output_to_lower.json diff --git a/tokenizer/plaintext/input.txt b/tests/data/tokenizer/plaintext/input.txt similarity index 100% rename from tokenizer/plaintext/input.txt rename to tests/data/tokenizer/plaintext/input.txt diff --git a/tests/setup.sh b/tests/setup.sh index b226963..f18c0e3 100644 --- a/tests/setup.sh +++ b/tests/setup.sh @@ -1,8 +1,23 @@ #!/usr/bin/env bash -mkdir -p /usr/local/submitty/GIT_CHECKOUT/Lichen/ -cp -r * /usr/local/submitty/GIT_CHECKOUT/Lichen/ +lichen_repository_dir=/usr/local/submitty/GIT_CHECKOUT/Lichen/ +lichen_installation_dir=/usr/local/submitty/Lichen/ +lichen_data_dir=/var/local/submitty/courses/ -mkdir -p /usr/local/submitty/Lichen/ +# make a simulated GIT_CHECKOUT directory +mkdir -p $lichen_repository_dir +cp -r * $lichen_repository_dir +cd $lichen_repository_dir -bash /usr/local/submitty/GIT_CHECKOUT/Lichen/install_lichen.sh +# install Lichen +mkdir -p $lichen_installation_dir +bash $lichen_repository_dir/install_lichen.sh + +# SETUP TOKENIZER TESTS ######################################################## +tokenizer_tests_course=$lichen_data_dir/f21/test_tokenizers/lichen/ +# make a simulated lichen path for the test_tokenizers course +mkdir -p $tokenizer_tests_course + +# set up file structure for plaintext tokenizer tests +# (doesn't need a full file structure, just a place to put files) +mkdir -p $tokenizer_tests_course/plaintext_tokenizer_tests/ diff --git a/tests/tests.py b/tests/tests.py index fa41488..be0077d 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1,10 +1,24 @@ import unittest +import os +lichen_repository_dir = "/usr/local/submitty/GIT_CHECKOUT/Lichen/" +lichen_installation_dir = "/usr/local/submitty/Lichen/" +lichen_data_dir = "/var/local/submitty/courses/" -class TestTokenizers(unittest.TestCase): +class TestPlaintextTokenizer(unittest.TestCase): def testPlaintextTokenizer(self): - print('test!') + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + # expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/output.json" + + command = f"{lichen_installation_dir}/plaintext_tokenizer.out {input_file} > {output_file}" + os.system(command) + + with open(output_file) as file: + print(file.read()) + + os.remove(output_file) if __name__ == '__main__': From 10dbed2e8d93cc53c5492f7f2cad25b15bbc9af2 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 11:32:31 -0400 Subject: [PATCH 26/52] need sudo for test --- .github/workflows/lichen_run.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index e8093ca..2b322e6 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -18,4 +18,4 @@ jobs: sudo bash ./tests/setup.sh - name: Run Tests run: | - python3 ./tests/tests.py + sudo python3 ./tests/tests.py From b9b085754f6a9c85c633ab6e9f0cc5dbdb1ab217 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 11:39:27 -0400 Subject: [PATCH 27/52] Update tests.py --- tests/tests.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/tests.py b/tests/tests.py index be0077d..286c8e8 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -8,6 +8,7 @@ class TestPlaintextTokenizer(unittest.TestCase): def testPlaintextTokenizer(self): + print("test starting") input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" # expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/output.json" @@ -19,6 +20,7 @@ def testPlaintextTokenizer(self): print(file.read()) os.remove(output_file) + print("test complete") if __name__ == '__main__': From c50e01711723c0a2dd12c9c3198fd3f6c8515dcc Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 11:43:45 -0400 Subject: [PATCH 28/52] fix path --- tests/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index 286c8e8..08ea2ed 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -13,7 +13,7 @@ def testPlaintextTokenizer(self): output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" # expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/output.json" - command = f"{lichen_installation_dir}/plaintext_tokenizer.out {input_file} > {output_file}" + command = f"{lichen_installation_dir}plaintext_tokenizer.out {input_file} > {output_file}" os.system(command) with open(output_file) as file: From 30df2b80146b35bedd474a56818edf686b96a93c Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 11:47:17 -0400 Subject: [PATCH 29/52] fix path --- tests/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index 08ea2ed..8200e1c 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -13,7 +13,7 @@ def testPlaintextTokenizer(self): output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" # expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/output.json" - command = f"{lichen_installation_dir}plaintext_tokenizer.out {input_file} > {output_file}" + command = f"{lichen_installation_dir}bin/plaintext_tokenizer.out {input_file} > {output_file}" os.system(command) with open(output_file) as file: From 8554c60fbd971bb0d9a561ca84c1b4bb939c855e Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 11:50:36 -0400 Subject: [PATCH 30/52] Update tests.py --- tests/tests.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index 8200e1c..296383a 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -8,19 +8,17 @@ class TestPlaintextTokenizer(unittest.TestCase): def testPlaintextTokenizer(self): - print("test starting") input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" # expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/output.json" - command = f"{lichen_installation_dir}bin/plaintext_tokenizer.out {input_file} > {output_file}" + command = f"{lichen_installation_dir}bin/plaintext_tokenizer.out < {input_file} > {output_file}" os.system(command) with open(output_file) as file: print(file.read()) os.remove(output_file) - print("test complete") if __name__ == '__main__': From 32c7aa16cdad984e7d0461db998edafe8c67eb45 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 11:55:04 -0400 Subject: [PATCH 31/52] add assertion to implement test --- tests/tests.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index 296383a..c573507 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -10,14 +10,20 @@ class TestPlaintextTokenizer(unittest.TestCase): def testPlaintextTokenizer(self): input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" - # expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/output.json" + expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/output.json" command = f"{lichen_installation_dir}bin/plaintext_tokenizer.out < {input_file} > {output_file}" os.system(command) with open(output_file) as file: - print(file.read()) + actual_output = file.read() + with open(expected_output_file) as file: + expected_output = file.read() + + self.assertEqual(actual_output, expected_output) + + # clean up the files os.remove(output_file) From 5956ec4fedec06c7517f547e3d420fae43103669 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 11:57:04 -0400 Subject: [PATCH 32/52] fix more paths --- tests/tests.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index c573507..3dddb37 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1,9 +1,9 @@ import unittest import os -lichen_repository_dir = "/usr/local/submitty/GIT_CHECKOUT/Lichen/" -lichen_installation_dir = "/usr/local/submitty/Lichen/" -lichen_data_dir = "/var/local/submitty/courses/" +lichen_repository_dir = "/usr/local/submitty/GIT_CHECKOUT/Lichen" +lichen_installation_dir = "/usr/local/submitty/Lichen" +lichen_data_dir = "/var/local/submitty/courses" class TestPlaintextTokenizer(unittest.TestCase): @@ -12,7 +12,7 @@ def testPlaintextTokenizer(self): output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/output.json" - command = f"{lichen_installation_dir}bin/plaintext_tokenizer.out < {input_file} > {output_file}" + command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out < {input_file} > {output_file}" os.system(command) with open(output_file) as file: From 63df1904436b2e08e3f63491c9dc58abd9624035 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 11:58:57 -0400 Subject: [PATCH 33/52] fix another path issue --- tests/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index 3dddb37..f9e5faa 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -10,7 +10,7 @@ class TestPlaintextTokenizer(unittest.TestCase): def testPlaintextTokenizer(self): input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" - expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/output.json" + expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out < {input_file} > {output_file}" os.system(command) From 57db882fb9711c1fa4e36d9ab346cbdf393baa89 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 12:04:38 -0400 Subject: [PATCH 34/52] Add second test --- tests/tests.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/tests.py b/tests/tests.py index f9e5faa..3f25942 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -26,6 +26,25 @@ def testPlaintextTokenizer(self): # clean up the files os.remove(output_file) + def testPlaintextTokenizerIgnoreNewlines(self): + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json" + + command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out < {input_file} > {output_file}" + os.system(command) + + with open(output_file) as file: + actual_output = file.read() + + with open(expected_output_file) as file: + expected_output = file.read() + + self.assertEqual(actual_output, expected_output) + + # clean up the files + os.remove(output_file) + if __name__ == '__main__': unittest.main() From 250d998aeaae647cc8387d928c6c885845ef9899 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 13:00:49 -0400 Subject: [PATCH 35/52] Update tests.py --- tests/tests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/tests.py b/tests/tests.py index 3f25942..de5a531 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -8,6 +8,8 @@ class TestPlaintextTokenizer(unittest.TestCase): def testPlaintextTokenizer(self): + self.maxDiff = None + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output.json" @@ -27,6 +29,8 @@ def testPlaintextTokenizer(self): os.remove(output_file) def testPlaintextTokenizerIgnoreNewlines(self): + self.maxDiff = None + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json" From f452fa4ddbb5f28511e3f546a63ce852848bcd70 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 13:05:22 -0400 Subject: [PATCH 36/52] it's important to run the right command to get the right results... --- tests/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index de5a531..c7c2f5d 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -35,7 +35,7 @@ def testPlaintextTokenizerIgnoreNewlines(self): output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json" - command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out < {input_file} > {output_file}" + command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_newlines < {input_file} > {output_file}" os.system(command) with open(output_file) as file: From b8103feb450608f90745af13daede85fd18f68ba Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 13:12:47 -0400 Subject: [PATCH 37/52] Add third test --- tests/tests.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/tests.py b/tests/tests.py index c7c2f5d..5973224 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -49,6 +49,27 @@ def testPlaintextTokenizerIgnoreNewlines(self): # clean up the files os.remove(output_file) + def testPlaintextTokenizerIgnoreEverything(self): + self.maxDiff = None + + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_everything.json" + + command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_punctuation --to_lower --ignore_numbers --ignore_newlines < {input_file} > {output_file}" + os.system(command) + + with open(output_file) as file: + actual_output = file.read() + + with open(expected_output_file) as file: + expected_output = file.read() + + self.assertEqual(actual_output, expected_output) + + # clean up the files + os.remove(output_file) + if __name__ == '__main__': unittest.main() From 0efa16e56de35270306c960dd266c71235f69c36 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Sat, 3 Jul 2021 13:22:13 -0400 Subject: [PATCH 38/52] Add remaining plaintext tokenizer tests --- tests/tests.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/tests.py b/tests/tests.py index 5973224..4c155a1 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -28,6 +28,48 @@ def testPlaintextTokenizer(self): # clean up the files os.remove(output_file) + def testPlaintextTokenizerIgnorePunctuation(self): + self.maxDiff = None + + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json" + + command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_punctuation < {input_file} > {output_file}" + os.system(command) + + with open(output_file) as file: + actual_output = file.read() + + with open(expected_output_file) as file: + expected_output = file.read() + + self.assertEqual(actual_output, expected_output) + + # clean up the files + os.remove(output_file) + + def testPlaintextTokenizerToLower(self): + self.maxDiff = None + + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_to_lower.json" + + command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_punctuation < {input_file} > {output_file}" + os.system(command) + + with open(output_file) as file: + actual_output = file.read() + + with open(expected_output_file) as file: + expected_output = file.read() + + self.assertEqual(actual_output, expected_output) + + # clean up the files + os.remove(output_file) + def testPlaintextTokenizerIgnoreNewlines(self): self.maxDiff = None From 24d97bd46968f97f401d9499210435e0ec654e48 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 5 Jul 2021 09:47:47 -0400 Subject: [PATCH 39/52] Add C tokenizer tests --- .github/workflows/lichen_run.yml | 1 + .../tokenizer}/c/expected_output/output.json | 0 .../data/tokenizer}/c/input.cpp | 0 tests/setup.sh | 7 ++-- tests/tests.py | 33 ++++++++++++++++--- 5 files changed, 31 insertions(+), 10 deletions(-) rename {tokenizer => tests/data/tokenizer}/c/expected_output/output.json (100%) rename {tokenizer => tests/data/tokenizer}/c/input.cpp (100%) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 2b322e6..d0fc560 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -13,6 +13,7 @@ jobs: - name: Install Dependencies run: | sudo apt install libboost-all-dev + sudo apt-get install python-clang-3.8 - name: Create Directory Structure run: | sudo bash ./tests/setup.sh diff --git a/tokenizer/c/expected_output/output.json b/tests/data/tokenizer/c/expected_output/output.json similarity index 100% rename from tokenizer/c/expected_output/output.json rename to tests/data/tokenizer/c/expected_output/output.json diff --git a/tokenizer/c/input.cpp b/tests/data/tokenizer/c/input.cpp similarity index 100% rename from tokenizer/c/input.cpp rename to tests/data/tokenizer/c/input.cpp diff --git a/tests/setup.sh b/tests/setup.sh index f18c0e3..fe0be66 100644 --- a/tests/setup.sh +++ b/tests/setup.sh @@ -15,9 +15,6 @@ bash $lichen_repository_dir/install_lichen.sh # SETUP TOKENIZER TESTS ######################################################## tokenizer_tests_course=$lichen_data_dir/f21/test_tokenizers/lichen/ -# make a simulated lichen path for the test_tokenizers course -mkdir -p $tokenizer_tests_course - -# set up file structure for plaintext tokenizer tests +# set up file structure for tokenizer tests # (doesn't need a full file structure, just a place to put files) -mkdir -p $tokenizer_tests_course/plaintext_tokenizer_tests/ +mkdir -p $tokenizer_tests_course diff --git a/tests/tests.py b/tests/tests.py index 4c155a1..842d448 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -11,7 +11,7 @@ def testPlaintextTokenizer(self): self.maxDiff = None input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out < {input_file} > {output_file}" @@ -32,7 +32,7 @@ def testPlaintextTokenizerIgnorePunctuation(self): self.maxDiff = None input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_punctuation < {input_file} > {output_file}" @@ -53,7 +53,7 @@ def testPlaintextTokenizerToLower(self): self.maxDiff = None input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_to_lower.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_punctuation < {input_file} > {output_file}" @@ -74,7 +74,7 @@ def testPlaintextTokenizerIgnoreNewlines(self): self.maxDiff = None input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_newlines < {input_file} > {output_file}" @@ -95,7 +95,7 @@ def testPlaintextTokenizerIgnoreEverything(self): self.maxDiff = None input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/plaintext_tokenizer_tests/output.json" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_everything.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_punctuation --to_lower --ignore_numbers --ignore_newlines < {input_file} > {output_file}" @@ -113,5 +113,28 @@ def testPlaintextTokenizerIgnoreEverything(self): os.remove(output_file) +class TestCTokenizer(unittest.TestCase): + def testCTokenizer(self): + self.maxDiff = None + + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/c/input.txt" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" + expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/c/expected_output/output.json" + + command = f"python3 {lichen_installation_dir}/bin/c_tokenizer.py {input_file} > {output_file}" + os.system(command) + + with open(output_file) as file: + actual_output = file.read() + + with open(expected_output_file) as file: + expected_output = file.read() + + self.assertEqual(actual_output, expected_output) + + # clean up the files + os.remove(output_file) + + if __name__ == '__main__': unittest.main() From a50ef676e2e15861fbb14e216545688d8a86c444 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 5 Jul 2021 09:54:15 -0400 Subject: [PATCH 40/52] Update lichen_run.yml --- .github/workflows/lichen_run.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index d0fc560..aba242d 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -13,7 +13,7 @@ jobs: - name: Install Dependencies run: | sudo apt install libboost-all-dev - sudo apt-get install python-clang-3.8 + sudo apt install python-clang-3.8 - name: Create Directory Structure run: | sudo bash ./tests/setup.sh From ed6415394df3bfe9c2453858b3ba4fbba1de85fc Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 5 Jul 2021 09:55:10 -0400 Subject: [PATCH 41/52] Update lichen_run.yml --- .github/workflows/lichen_run.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index aba242d..2b322e6 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -13,7 +13,6 @@ jobs: - name: Install Dependencies run: | sudo apt install libboost-all-dev - sudo apt install python-clang-3.8 - name: Create Directory Structure run: | sudo bash ./tests/setup.sh From 7300b4ec949cd93f5cd8010c439831cf6e752479 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 5 Jul 2021 09:58:04 -0400 Subject: [PATCH 42/52] Update lichen_run.yml --- .github/workflows/lichen_run.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 2b322e6..9c5296a 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -13,6 +13,7 @@ jobs: - name: Install Dependencies run: | sudo apt install libboost-all-dev + pip install clang - name: Create Directory Structure run: | sudo bash ./tests/setup.sh From 2066fa3f05b9633dc185f88077f82eaf4ea1cba0 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 5 Jul 2021 10:05:44 -0400 Subject: [PATCH 43/52] Add MIPS tokenizer --- .github/workflows/lichen_run.yml | 3 +-- .../mips/example_output/output.json | 0 .../data/tokenizer}/mips/input.s | 0 tests/tests.py | 25 ++++++++++++++++++- 4 files changed, 25 insertions(+), 3 deletions(-) rename {tokenizer => tests/data/tokenizer}/mips/example_output/output.json (100%) rename {tokenizer => tests/data/tokenizer}/mips/input.s (100%) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 9c5296a..20efcb0 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -4,7 +4,7 @@ on: [push] jobs: test-lichen: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 @@ -13,7 +13,6 @@ jobs: - name: Install Dependencies run: | sudo apt install libboost-all-dev - pip install clang - name: Create Directory Structure run: | sudo bash ./tests/setup.sh diff --git a/tokenizer/mips/example_output/output.json b/tests/data/tokenizer/mips/example_output/output.json similarity index 100% rename from tokenizer/mips/example_output/output.json rename to tests/data/tokenizer/mips/example_output/output.json diff --git a/tokenizer/mips/input.s b/tests/data/tokenizer/mips/input.s similarity index 100% rename from tokenizer/mips/input.s rename to tests/data/tokenizer/mips/input.s diff --git a/tests/tests.py b/tests/tests.py index 842d448..3e8cd74 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -117,7 +117,7 @@ class TestCTokenizer(unittest.TestCase): def testCTokenizer(self): self.maxDiff = None - input_file = f"{lichen_repository_dir}/tests/data/tokenizer/c/input.txt" + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/c/input.cpp" output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/c/expected_output/output.json" @@ -136,5 +136,28 @@ def testCTokenizer(self): os.remove(output_file) +class TestMIPSTokenizer(unittest.TestCase): + def testMIPSTokenizer(self): + self.maxDiff = None + + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/mips/input.s" + output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" + expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/mips/expected_output/output.json" + + command = f"python3 {lichen_installation_dir}/bin/mips_tokenizer.py {input_file} > {output_file}" + os.system(command) + + with open(output_file) as file: + actual_output = file.read() + + with open(expected_output_file) as file: + expected_output = file.read() + + self.assertEqual(actual_output, expected_output) + + # clean up the files + os.remove(output_file) + + if __name__ == '__main__': unittest.main() From d26b144f4ce5596e0fe05ca5489c61ea001fd060 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 5 Jul 2021 10:09:20 -0400 Subject: [PATCH 44/52] Update tests.py --- tests/tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/tests.py b/tests/tests.py index 3e8cd74..c0c781a 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -140,6 +140,7 @@ class TestMIPSTokenizer(unittest.TestCase): def testMIPSTokenizer(self): self.maxDiff = None + input_file = f"{lichen_repository_dir}/tests/data/tokenizer/mips/input.s" output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/mips/expected_output/output.json" From bb545d8d414e8b54a98cd45878881133a0b5a083 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 5 Jul 2021 10:23:09 -0400 Subject: [PATCH 45/52] Update tests.py --- tests/tests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index c0c781a..3e8cd74 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -140,7 +140,6 @@ class TestMIPSTokenizer(unittest.TestCase): def testMIPSTokenizer(self): self.maxDiff = None - input_file = f"{lichen_repository_dir}/tests/data/tokenizer/mips/input.s" output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/mips/expected_output/output.json" From 4abfaf18714023f7aac222e54704a2cb9453cf1b Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 19 Jul 2021 10:32:02 -0400 Subject: [PATCH 46/52] Fix paths in tests.py such that it can be run in vagrant --- .gitignore | 1 + .../output.json | 0 .../plaintext/expected_output/output.json | 6 +- .../output_ignore_newlines.json | 6 +- .../output_ignore_punctuation.json | 6 +- .../expected_output/output_to_lower.json | 6 +- tests/tests.py | 77 ++++++++----------- 7 files changed, 47 insertions(+), 55 deletions(-) rename tests/data/tokenizer/mips/{example_output => expected_output}/output.json (100%) diff --git a/.gitignore b/.gitignore index 7080991..bc4bed5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *~ tools/assignments/* +tests/__pycache__ diff --git a/tests/data/tokenizer/mips/example_output/output.json b/tests/data/tokenizer/mips/expected_output/output.json similarity index 100% rename from tests/data/tokenizer/mips/example_output/output.json rename to tests/data/tokenizer/mips/expected_output/output.json diff --git a/tests/data/tokenizer/plaintext/expected_output/output.json b/tests/data/tokenizer/plaintext/expected_output/output.json index 0a04cad..277632f 100644 --- a/tests/data/tokenizer/plaintext/expected_output/output.json +++ b/tests/data/tokenizer/plaintext/expected_output/output.json @@ -51,7 +51,7 @@ "char": 20, "line": 3, "type": "number", - "value": "1" + "value": 1 }, { "char": 22, @@ -159,7 +159,7 @@ "char": 26, "line": 4, "type": "number", - "value": "1" + "value": 1 }, { "char": 27, @@ -171,7 +171,7 @@ "char": 28, "line": 4, "type": "number", - "value": "2" + "value": 2 }, { "char": 29, diff --git a/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json b/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json index 35f4422..68e4a4a 100644 --- a/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json +++ b/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json @@ -39,7 +39,7 @@ "char": 20, "line": 3, "type": "number", - "value": "1" + "value": 1 }, { "char": 22, @@ -141,7 +141,7 @@ "char": 26, "line": 4, "type": "number", - "value": "1" + "value": 1 }, { "char": 27, @@ -153,7 +153,7 @@ "char": 28, "line": 4, "type": "number", - "value": "2" + "value": 2 }, { "char": 29, diff --git a/tests/data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json b/tests/data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json index 341d794..0d218cb 100644 --- a/tests/data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json +++ b/tests/data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json @@ -51,7 +51,7 @@ "char": 20, "line": 3, "type": "number", - "value": "1" + "value": 1 }, { "char": 23, @@ -123,7 +123,7 @@ "char": 26, "line": 4, "type": "number", - "value": "1" + "value": 1 }, { "char": 27, @@ -135,7 +135,7 @@ "char": 28, "line": 4, "type": "number", - "value": "2" + "value": 2 }, { "char": 29, diff --git a/tests/data/tokenizer/plaintext/expected_output/output_to_lower.json b/tests/data/tokenizer/plaintext/expected_output/output_to_lower.json index 14b0da1..2c7721e 100644 --- a/tests/data/tokenizer/plaintext/expected_output/output_to_lower.json +++ b/tests/data/tokenizer/plaintext/expected_output/output_to_lower.json @@ -51,7 +51,7 @@ "char": 20, "line": 3, "type": "number", - "value": "1" + "value": 1 }, { "char": 22, @@ -159,7 +159,7 @@ "char": 26, "line": 4, "type": "number", - "value": "1" + "value": 1 }, { "char": 27, @@ -171,7 +171,7 @@ "char": 28, "line": 4, "type": "number", - "value": "2" + "value": 2 }, { "char": 29, diff --git a/tests/tests.py b/tests/tests.py index 3e8cd74..8034ef8 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1,18 +1,25 @@ import unittest import os +import shutil -lichen_repository_dir = "/usr/local/submitty/GIT_CHECKOUT/Lichen" lichen_installation_dir = "/usr/local/submitty/Lichen" -lichen_data_dir = "/var/local/submitty/courses" +lichen_test_playground = "/usr/local/submitty/Lichen/test_output" class TestPlaintextTokenizer(unittest.TestCase): + def setUp(self): + if not os.path.isdir(os.path.join(lichen_test_playground, 'plaintext_tokenizer')): + os.makedirs(os.path.join(lichen_test_playground, 'plaintext_tokenizer')) + + def tearDown(self): + shutil.rmtree(os.path.join(lichen_test_playground, 'plaintext_tokenizer')) + def testPlaintextTokenizer(self): self.maxDiff = None - input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" - expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output.json" + input_file = "./data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_test_playground}/plaintext_tokenizer/output.json" + expected_output_file = "./data/tokenizer/plaintext/expected_output/output.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out < {input_file} > {output_file}" os.system(command) @@ -31,9 +38,9 @@ def testPlaintextTokenizer(self): def testPlaintextTokenizerIgnorePunctuation(self): self.maxDiff = None - input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" - expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json" + input_file = "./data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_test_playground}/plaintext_tokenizer/output.json" + expected_output_file = "./data/tokenizer/plaintext/expected_output/output_ignore_punctuation.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_punctuation < {input_file} > {output_file}" os.system(command) @@ -52,11 +59,11 @@ def testPlaintextTokenizerIgnorePunctuation(self): def testPlaintextTokenizerToLower(self): self.maxDiff = None - input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" - expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_to_lower.json" + input_file = "./data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_test_playground}/plaintext_tokenizer/output.json" + expected_output_file = "./data/tokenizer/plaintext/expected_output/output_to_lower.json" - command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_punctuation < {input_file} > {output_file}" + command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --to_lower < {input_file} > {output_file}" os.system(command) with open(output_file) as file: @@ -73,9 +80,9 @@ def testPlaintextTokenizerToLower(self): def testPlaintextTokenizerIgnoreNewlines(self): self.maxDiff = None - input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" - expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_newlines.json" + input_file = "./data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_test_playground}/plaintext_tokenizer/output.json" + expected_output_file = "./data/tokenizer/plaintext/expected_output/output_ignore_newlines.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_newlines < {input_file} > {output_file}" os.system(command) @@ -94,9 +101,9 @@ def testPlaintextTokenizerIgnoreNewlines(self): def testPlaintextTokenizerIgnoreEverything(self): self.maxDiff = None - input_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/input.txt" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" - expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/plaintext/expected_output/output_ignore_everything.json" + input_file = "./data/tokenizer/plaintext/input.txt" + output_file = f"{lichen_test_playground}/plaintext_tokenizer/output.json" + expected_output_file = "./data/tokenizer/plaintext/expected_output/output_ignore_everything.json" command = f"{lichen_installation_dir}/bin/plaintext_tokenizer.out --ignore_punctuation --to_lower --ignore_numbers --ignore_newlines < {input_file} > {output_file}" os.system(command) @@ -113,36 +120,20 @@ def testPlaintextTokenizerIgnoreEverything(self): os.remove(output_file) -class TestCTokenizer(unittest.TestCase): - def testCTokenizer(self): - self.maxDiff = None - - input_file = f"{lichen_repository_dir}/tests/data/tokenizer/c/input.cpp" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" - expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/c/expected_output/output.json" - - command = f"python3 {lichen_installation_dir}/bin/c_tokenizer.py {input_file} > {output_file}" - os.system(command) - - with open(output_file) as file: - actual_output = file.read() - - with open(expected_output_file) as file: - expected_output = file.read() - - self.assertEqual(actual_output, expected_output) - - # clean up the files - os.remove(output_file) +class TestMIPSTokenizer(unittest.TestCase): + def setUp(self): + if not os.path.isdir(os.path.join(lichen_test_playground, 'mips_tokenizer')): + os.makedirs(os.path.join(lichen_test_playground, 'mips_tokenizer')) + def tearDown(self): + shutil.rmtree(os.path.join(lichen_test_playground, 'mips_tokenizer')) -class TestMIPSTokenizer(unittest.TestCase): def testMIPSTokenizer(self): self.maxDiff = None - input_file = f"{lichen_repository_dir}/tests/data/tokenizer/mips/input.s" - output_file = f"{lichen_data_dir}/f21/test_tokenizers/lichen/output.json" - expected_output_file = f"{lichen_repository_dir}/tests/data/tokenizer/mips/expected_output/output.json" + input_file = "./data/tokenizer/mips/input.s" + output_file = f"{lichen_test_playground}/mips_tokenizer/output.json" + expected_output_file = "./data/tokenizer/mips/expected_output/output.json" command = f"python3 {lichen_installation_dir}/bin/mips_tokenizer.py {input_file} > {output_file}" os.system(command) From be7dd8b805518a7925c7143f4d84e3b4475abf48 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 19 Jul 2021 10:38:44 -0400 Subject: [PATCH 47/52] Fix github actions --- .github/workflows/lichen_run.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lichen_run.yml b/.github/workflows/lichen_run.yml index 20efcb0..c6b9da1 100644 --- a/.github/workflows/lichen_run.yml +++ b/.github/workflows/lichen_run.yml @@ -18,4 +18,5 @@ jobs: sudo bash ./tests/setup.sh - name: Run Tests run: | - sudo python3 ./tests/tests.py + cd /usr/local/submitty/GIT_CHECKOUT/Lichen/tests + sudo python3 -m unittest discover From 0ef49f80a33c75353a20942fd7fe2261c383f365 Mon Sep 17 00:00:00 2001 From: sbelsk Date: Mon, 19 Jul 2021 11:01:29 -0400 Subject: [PATCH 48/52] Add hash all test --- tests/data/hash_all/config.json | 4 + tests/data/hash_all/submission.txt | 4 + tests/data/hash_all/tokens.json | 158 +++++++++++++++++++++++++++++ tests/tests.py | 58 +++++++++++ 4 files changed, 224 insertions(+) create mode 100644 tests/data/hash_all/config.json create mode 100644 tests/data/hash_all/submission.txt create mode 100644 tests/data/hash_all/tokens.json diff --git a/tests/data/hash_all/config.json b/tests/data/hash_all/config.json new file mode 100644 index 0000000..1ecc52c --- /dev/null +++ b/tests/data/hash_all/config.json @@ -0,0 +1,4 @@ +{ + "language": "plaintext", + "sequence_length": 2 +} diff --git a/tests/data/hash_all/submission.txt b/tests/data/hash_all/submission.txt new file mode 100644 index 0000000..2100e54 --- /dev/null +++ b/tests/data/hash_all/submission.txt @@ -0,0 +1,4 @@ +int x = 8; +int y = 3; +int z = x + y; +int t = 2 * x + y; diff --git a/tests/data/hash_all/tokens.json b/tests/data/hash_all/tokens.json new file mode 100644 index 0000000..98b2040 --- /dev/null +++ b/tests/data/hash_all/tokens.json @@ -0,0 +1,158 @@ +[ + { + "char": 1, + "line": 1, + "type": "string", + "value": "int" + }, + { + "char": 5, + "line": 1, + "type": "string", + "value": "x" + }, + { + "char": 7, + "line": 1, + "type": "punctuation", + "value": "=" + }, + { + "char": 9, + "line": 1, + "type": "number", + "value": 8 + }, + { + "char": 10, + "line": 1, + "type": "punctuation", + "value": ";" + }, + { + "char": 1, + "line": 2, + "type": "string", + "value": "int" + }, + { + "char": 5, + "line": 2, + "type": "string", + "value": "y" + }, + { + "char": 7, + "line": 2, + "type": "punctuation", + "value": "=" + }, + { + "char": 9, + "line": 2, + "type": "number", + "value": 3 + }, + { + "char": 10, + "line": 2, + "type": "punctuation", + "value": ";" + }, + { + "char": 1, + "line": 3, + "type": "string", + "value": "int" + }, + { + "char": 5, + "line": 3, + "type": "string", + "value": "z" + }, + { + "char": 7, + "line": 3, + "type": "punctuation", + "value": "=" + }, + { + "char": 9, + "line": 3, + "type": "string", + "value": "x" + }, + { + "char": 11, + "line": 3, + "type": "punctuation", + "value": "+" + }, + { + "char": 13, + "line": 3, + "type": "string", + "value": "y" + }, + { + "char": 14, + "line": 3, + "type": "punctuation", + "value": ";" + }, + { + "char": 1, + "line": 4, + "type": "string", + "value": "int" + }, + { + "char": 5, + "line": 4, + "type": "string", + "value": "t" + }, + { + "char": 7, + "line": 4, + "type": "punctuation", + "value": "=" + }, + { + "char": 9, + "line": 4, + "type": "number", + "value": 2 + }, + { + "char": 11, + "line": 4, + "type": "punctuation", + "value": "*" + }, + { + "char": 13, + "line": 4, + "type": "string", + "value": "x" + }, + { + "char": 15, + "line": 4, + "type": "punctuation", + "value": "+" + }, + { + "char": 17, + "line": 4, + "type": "string", + "value": "y" + }, + { + "char": 18, + "line": 4, + "type": "punctuation", + "value": ";" + } +] diff --git a/tests/tests.py b/tests/tests.py index 3e8cd74..6678ed7 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1,5 +1,8 @@ import unittest import os +import shutil +import subprocess +import json lichen_repository_dir = "/usr/local/submitty/GIT_CHECKOUT/Lichen" lichen_installation_dir = "/usr/local/submitty/Lichen" @@ -159,5 +162,60 @@ def testMIPSTokenizer(self): os.remove(output_file) +class TestHashAll(unittest.TestCase): + def setUp(self): + os.makedirs("/usr/local/submitty/Lichen/test_output") + + def tearDown(self): + shutil.rmtree("/usr/local/submitty/Lichen/test_output") + + def testHashAll(self): + # make the fake directory structure hash_all.p expects + os.makedirs("/usr/local/submitty/Lichen/test_output/test_hash_all/provided_code") + os.makedirs("/usr/local/submitty/Lichen/test_output/test_hash_all/other_gradeables") + os.makedirs("/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1") + open("/usr/local/submitty/Lichen/test_output/test_hash_all/config.json", 'a').close() + open("/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1/tokens.json", 'a').close() + with open("/usr/local/submitty/Lichen/test_output/test_hash_all/provided_code/tokens.json", 'w') as file: + file.write("null") + + # copy the input files from /data to the the new path + shutil.copyfile("data/hash_all/a/config.json", "/usr/local/submitty/Lichen/test_output/test_hash_all/config.json") + shutil.copyfile("data/hash_all/a/tokens.json", "/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1/tokens.json") + + # save current working directory + cwd = os.getcwd() + + # run hash_all + os.chdir("/usr/local/submitty/Lichen/bin") + os.system("python3 /usr/local/submitty/Lichen/bin/hash_all.py /usr/local/submitty/Lichen/test_output/test_hash_all") + os.chdir(cwd) + + # test output + hashes_file = "/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1/hashes.txt" + with open(hashes_file, 'r') as file: + lines = file.readlines() + + lines = [x.strip() for x in lines] + + tokens_file = "/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1/tokens.json" + with open(tokens_file, 'r') as file: + tokens = json.load(file) + self.assertEqual(len(lines), len(tokens) - 2 + 1) + # make sure the same sequences hash to the same string, and + # that different sequences hash to different strings + for i in range(0, len(lines)): + for j in range(i + 1, len(lines)): + if i == 4 and j == 9\ + or i == 4 and j == 16\ + or i == 9 and j == 16\ + or i == 13 and j == 22\ + or i == 14 and j == 23\ + or i == 15 and j == 24: + self.assertEqual(lines[i], lines[j]) + else: + self.assertNotEqual(lines[i], lines[j]) + + if __name__ == '__main__': unittest.main() From 49f1b36020fea4e86dbed29bc32a01374a570fbd Mon Sep 17 00:00:00 2001 From: sbelsk Date: Mon, 19 Jul 2021 11:42:24 -0400 Subject: [PATCH 49/52] change paths --- tests/tests.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index e4c9ed6..f4d11c4 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -154,44 +154,47 @@ def testMIPSTokenizer(self): class TestHashAll(unittest.TestCase): def setUp(self): - os.makedirs("/usr/local/submitty/Lichen/test_output") + if not os.path.isdir(lichen_test_playground): + os.makedirs(lichen_test_playground) def tearDown(self): - shutil.rmtree("/usr/local/submitty/Lichen/test_output") + shutil.rmtree(lichen_test_playground) def testHashAll(self): # make the fake directory structure hash_all.p expects - os.makedirs("/usr/local/submitty/Lichen/test_output/test_hash_all/provided_code") - os.makedirs("/usr/local/submitty/Lichen/test_output/test_hash_all/other_gradeables") - os.makedirs("/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1") - open("/usr/local/submitty/Lichen/test_output/test_hash_all/config.json", 'a').close() - open("/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1/tokens.json", 'a').close() - with open("/usr/local/submitty/Lichen/test_output/test_hash_all/provided_code/tokens.json", 'w') as file: + os.makedirs(f"{lichen_test_playground}/test_hash_all/provided_code") + os.makedirs(f"{lichen_test_playground}/test_hash_all/other_gradeables") + os.makedirs(f"{lichen_test_playground}/test_hash_all/users/student/1") + open(f"{lichen_test_playground}/test_hash_all/config.json", 'a').close() + open(f"{lichen_test_playground}/test_hash_all/users/student/1/tokens.json", 'a').close() + with open(f"{lichen_test_playground}/test_hash_all/provided_code/tokens.json", 'w') as file: file.write("null") # copy the input files from /data to the the new path - shutil.copyfile("data/hash_all/a/config.json", "/usr/local/submitty/Lichen/test_output/test_hash_all/config.json") - shutil.copyfile("data/hash_all/a/tokens.json", "/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1/tokens.json") + shutil.copyfile("data/hash_all/config.json", f"{lichen_test_playground}/test_hash_all/config.json") + shutil.copyfile("data/hash_all/tokens.json", f"{lichen_test_playground}/test_hash_all/users/student/1/tokens.json") # save current working directory cwd = os.getcwd() # run hash_all - os.chdir("/usr/local/submitty/Lichen/bin") - os.system("python3 /usr/local/submitty/Lichen/bin/hash_all.py /usr/local/submitty/Lichen/test_output/test_hash_all") + os.chdir(f"{lichen_installation_dir}/bin") + # TODO: make this not print to stdout + os.system(f"python3 {lichen_installation_dir}/bin/hash_all.py {lichen_test_playground}/test_hash_all") os.chdir(cwd) # test output - hashes_file = "/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1/hashes.txt" + hashes_file = f"{lichen_test_playground}/test_hash_all/users/student/1/hashes.txt" with open(hashes_file, 'r') as file: lines = file.readlines() - lines = [x.strip() for x in lines] - - tokens_file = "/usr/local/submitty/Lichen/test_output/test_hash_all/users/student/1/tokens.json" + tokens_file = f"{lichen_test_playground}/test_hash_all/users/student/1/tokens.json" with open(tokens_file, 'r') as file: tokens = json.load(file) + + # make sure the number of sequences and the number of hashes are the same self.assertEqual(len(lines), len(tokens) - 2 + 1) + # make sure the same sequences hash to the same string, and # that different sequences hash to different strings for i in range(0, len(lines)): From 154e2a26e9e0d5151214f7c588da57ba26dab37b Mon Sep 17 00:00:00 2001 From: sbelsk Date: Mon, 19 Jul 2021 13:00:06 -0400 Subject: [PATCH 50/52] Get rid of unwanted stdout --- tests/tests.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index f4d11c4..1c71032 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1,7 +1,6 @@ import unittest import os import shutil -import subprocess import json lichen_installation_dir = "/usr/local/submitty/Lichen" @@ -179,8 +178,7 @@ def testHashAll(self): # run hash_all os.chdir(f"{lichen_installation_dir}/bin") - # TODO: make this not print to stdout - os.system(f"python3 {lichen_installation_dir}/bin/hash_all.py {lichen_test_playground}/test_hash_all") + os.system(f"python3 {lichen_installation_dir}/bin/hash_all.py {lichen_test_playground}/test_hash_all > /dev/null") os.chdir(cwd) # test output From 6065d736b3ea5d1e7c8c5f349b1d02d9b8a9d771 Mon Sep 17 00:00:00 2001 From: williamjallen Date: Mon, 19 Jul 2021 15:56:20 -0400 Subject: [PATCH 51/52] Remove old code --- tests/tests.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index 8034ef8..13cff79 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -32,9 +32,6 @@ def testPlaintextTokenizer(self): self.assertEqual(actual_output, expected_output) - # clean up the files - os.remove(output_file) - def testPlaintextTokenizerIgnorePunctuation(self): self.maxDiff = None @@ -53,9 +50,6 @@ def testPlaintextTokenizerIgnorePunctuation(self): self.assertEqual(actual_output, expected_output) - # clean up the files - os.remove(output_file) - def testPlaintextTokenizerToLower(self): self.maxDiff = None @@ -74,9 +68,6 @@ def testPlaintextTokenizerToLower(self): self.assertEqual(actual_output, expected_output) - # clean up the files - os.remove(output_file) - def testPlaintextTokenizerIgnoreNewlines(self): self.maxDiff = None @@ -95,9 +86,6 @@ def testPlaintextTokenizerIgnoreNewlines(self): self.assertEqual(actual_output, expected_output) - # clean up the files - os.remove(output_file) - def testPlaintextTokenizerIgnoreEverything(self): self.maxDiff = None @@ -116,9 +104,6 @@ def testPlaintextTokenizerIgnoreEverything(self): self.assertEqual(actual_output, expected_output) - # clean up the files - os.remove(output_file) - class TestMIPSTokenizer(unittest.TestCase): def setUp(self): @@ -146,9 +131,6 @@ def testMIPSTokenizer(self): self.assertEqual(actual_output, expected_output) - # clean up the files - os.remove(output_file) - if __name__ == '__main__': unittest.main() From a6719b538a794f5d29b5ab7f70f8ecbddbe944ae Mon Sep 17 00:00:00 2001 From: William Allen Date: Mon, 19 Jul 2021 16:29:14 -0400 Subject: [PATCH 52/52] comment with missing letter was bugging me --- tests/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index 0bd59c1..9257d05 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -142,7 +142,7 @@ def tearDown(self): shutil.rmtree(lichen_test_playground) def testHashAll(self): - # make the fake directory structure hash_all.p expects + # make the fake directory structure hash_all.py expects os.makedirs(f"{lichen_test_playground}/test_hash_all/provided_code") os.makedirs(f"{lichen_test_playground}/test_hash_all/other_gradeables") os.makedirs(f"{lichen_test_playground}/test_hash_all/users/student/1")