Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 51 additions & 33 deletions bin/concatenate_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import argparse
import os
import json
import sys
import time
import fnmatch
from pathlib import Path
Expand Down Expand Up @@ -49,59 +48,78 @@ def parse_args():
return parser.parse_args()


def main():
start_time = time.time()
args = parse_args()

sys.stdout.write("CONCATENATE ALL...") # don't want a newline here so can't use print
sys.stdout.flush()

config_path = os.path.join(args.basepath, "config.json")
if not os.path.isfile(config_path):
print(f"Error: invalid config path provided ({config_path})")
exit(1)

with open(config_path) as config_file:
config = json.load(config_file)

semester = config["semester"]
course = config["course"]
gradeable = config["gradeable"]
version_mode = config["version"]
users_to_ignore = config["ignore_submissions"]
def validate(config, args):
# load parameters from the config to be checked
regex_patterns = config["regex"].split(',')
regex_dirs = config["regex_dirs"]
language = config["language"]
threshold = int(config["threshold"])
sequence_length = int(config["sequence_length"])
prior_term_gradeables = config["prior_term_gradeables"]

# ==========================================================================
# Error checking
# Check we have a tokenizer to support the configured language
langs_data_json_path = "./data.json" # data.json is in the Lichen/bin directory after install
with open(langs_data_json_path, 'r') as langs_data_file:
langs_data = json.load(langs_data_file)
if language not in langs_data:
raise SystemExit(f"ERROR! tokenizing not supported for language {language}")

# Check values of common code threshold and sequence length
if (threshold < 2):
raise SystemExit("ERROR! threshold must be >= 2")

if (sequence_length < 1):
raise SystemExit("ERROR! sequence_length must be >= 1")

# Check for backwards crawling
for e in regex_patterns:
if ".." in e:
print('ERROR! Invalid path component ".." in regex')
exit(1)
raise SystemExit('ERROR! Invalid path component ".." in regex')

for ptg in prior_term_gradeables:
for field in ptg:
if ".." in field:
print('ERROR! Invalid path component ".." in prior_term_gradeable field')
exit(1)
raise SystemExit('ERROR! Invalid component ".." in prior_term_gradeable path')

# check permissions to make sure we have access to the prior term gradeables
my_course_group_perms = Path(args.basepath).group()
for ptg in prior_term_gradeables:
if Path(args.datapath, ptg["prior_semester"], ptg["prior_course"]).group()\
!= my_course_group_perms:
print(f"Error: Invalid permissions to access course {ptg['prior_semester']}"
raise SystemExit(f"ERROR! Invalid permissions to access course {ptg['prior_semester']}"
f"/{ptg['prior_course']}")
exit(1)

# make sure the regex directory is one of the acceptable directories
for dir in regex_dirs:
if dir not in ["submissions", "results", "checkout"]:
print("ERROR! ", dir, " is not a valid input directory for Lichen")
exit(1)
raise SystemExit("ERROR! ", dir, " is not a valid input directory for Lichen")


def main():
start_time = time.time()
args = parse_args()

print("CONCATENATE ALL...", end="")

config_path = os.path.join(args.basepath, "config.json")
if not os.path.isfile(config_path):
raise SystemExit(f"ERROR! invalid config path provided ({config_path})")

with open(config_path) as config_file:
config = json.load(config_file)

# perform error checking on config parameters
validate(config, args)

# parameters to be used in this file
semester = config["semester"]
course = config["course"]
gradeable = config["gradeable"]
version_mode = config["version"]
regex_patterns = config["regex"].split(',')
regex_dirs = config["regex_dirs"]
prior_term_gradeables = config["prior_term_gradeables"]
users_to_ignore = config["ignore_submissions"]

# ==========================================================================
# loop through and concatenate the selected files for each user in this gradeable
Expand Down Expand Up @@ -196,7 +214,7 @@ def main():
my_concatenated_file = os.path.join(version_path, "submission.concatenated")
with open(my_concatenated_file, "r+") as my_cf:
if my_cf.read() == "":
my_cf.write("Error: No files matched provided regex in selected directories")
my_cf.write("ERROR! No files matched provided regex in selected directories")

# do the same for the other gradeables
for other_gradeable in prior_term_gradeables:
Expand All @@ -210,7 +228,7 @@ def main():
my_concatenated_file = os.path.join(other_version_path, "submission.concatenated")
with open(my_concatenated_file, "r+") as my_cf:
if my_cf.read() == "":
my_cf.write("Error: No files matched provided regex in"
my_cf.write("ERROR! No files matched provided regex in"
"selected directories")

# ==========================================================================
Expand Down
17 changes: 3 additions & 14 deletions bin/hash_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import os
import json
import time
import sys
import hashlib


Expand All @@ -26,13 +25,6 @@ def hasher(lichen_config_data, my_tokenized_file, my_hashes_file):
data_json_path = "./data.json" # data.json is in the Lichen/bin directory after install
with open(data_json_path) as token_data_file:
token_data = json.load(token_data_file)
if language not in token_data:
print("\n\nERROR: UNKNOWN HASHER\n\n")
exit(1)

if (sequence_length < 1):
print("ERROR! sequence_length must be >= 1")
exit(1)

with open(my_tokenized_file, 'r', encoding='ISO-8859-1') as my_tf:
with open(my_hashes_file, 'w') as my_hf:
Expand All @@ -57,15 +49,13 @@ def main():
with open(os.path.join(args.basepath, "config.json")) as lichen_config:
lichen_config_data = json.load(lichen_config)

sys.stdout.write("HASH ALL...")
sys.stdout.flush()
print("HASH ALL...", end="")

# ==========================================================================
# walk the subdirectories of this gradeable
users_dir = os.path.join(args.basepath, "users")
if not os.path.isdir(users_dir):
print("Error: Unable to find users directory")
exit(1)
raise SystemExit("ERROR! Unable to find users directory")

for user in sorted(os.listdir(users_dir)):
user_dir = os.path.join(users_dir, user)
Expand All @@ -86,8 +76,7 @@ def main():

other_gradeables_dir = os.path.join(args.basepath, "other_gradeables")
if not os.path.isdir(other_gradeables_dir):
print("Error: Unable to find other gradeables directory")
exit(1)
raise SystemExit("ERROR! Unable to find other gradeables directory")

for other_gradeable in sorted(os.listdir(other_gradeables_dir)):
other_gradeable_dir = os.path.join(other_gradeables_dir, other_gradeable)
Expand Down
16 changes: 4 additions & 12 deletions bin/tokenize_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import os
import json
import time
import sys


def parse_args():
Expand All @@ -24,11 +23,7 @@ def tokenize(lichen_config_data, my_concatenated_file, my_tokenized_file):
data_json_path = "./data.json" # data.json is in the Lichen/bin directory after install
with open(data_json_path, 'r') as token_data_file:
token_data = json.load(token_data_file)
if language not in token_data:
print("\n\nERROR: UNKNOWN TOKENIZER\n\n")
exit(1)
else:
language_token_data = token_data[language]
language_token_data = token_data[language]

tokenizer = f"./{language_token_data['tokenizer']}"

Expand All @@ -46,8 +41,7 @@ def main():
start_time = time.time()
args = parse_args()

sys.stdout.write("TOKENIZE ALL...")
sys.stdout.flush()
print("TOKENIZE ALL...", end="")

with open(os.path.join(args.basepath, "config.json")) as lichen_config:
lichen_config_data = json.load(lichen_config)
Expand All @@ -56,8 +50,7 @@ def main():
# walk the subdirectories to tokenize this gradeable's submissions
users_dir = os.path.join(args.basepath, "users")
if not os.path.isdir(users_dir):
print("Error: Unable to find users directory")
exit(1)
raise SystemExit("ERROR! Unable to find users directory")

for user in sorted(os.listdir(users_dir)):
user_dir = os.path.join(users_dir, user)
Expand All @@ -77,8 +70,7 @@ def main():
# tokenize the other prior term gradeables' submissions
other_gradeables_dir = os.path.join(args.basepath, "other_gradeables")
if not os.path.isdir(other_gradeables_dir):
print("Error: Unable to find other gradeables directory")
exit(1)
raise SystemExit("ERROR! Unable to find other gradeables directory")

for other_gradeable in sorted(os.listdir(other_gradeables_dir)):
other_gradeable_dir = os.path.join(other_gradeables_dir, other_gradeable)
Expand Down
3 changes: 0 additions & 3 deletions compare_hashes/compare_hashes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,6 @@ int main(int argc, char* argv[]) {
int sequence_length = config_file_json.value("sequence_length",1);
int threshold = config_file_json.value("threshold",5);

assert (sequence_length >= 1);
assert (threshold >= 2);

// error checking, confirm there are hashes to work with
boost::filesystem::path users_root_directory = lichen_gradeable_path / "users";
if (!boost::filesystem::exists(users_root_directory) ||
Expand Down