# A simple utility for analyzing and editing a scheduler_data folder

In [None]:
# imports
import os
import json

In [None]:
# globals and constants
# filepath_workerdata = input("Enter the path to the select worker_data directory: ")
filepath_workerdata = "data/scraped/pangoly/scheduler_data"

In [None]:
def get_all_files_in_dir(dir_path: str) -> list:
	"""
	Returns a list of all files in the directory (recursively).

	Args:
		dir_path (str): The path to the directory to search for files.

	Returns:
		A list of all files in the directory.
	"""
	filepaths = []
	for root, dirs, files in os.walk(dir_path):
		for file in files:
			filepaths.append(os.path.join(root, file))
	return filepaths


def get_worker_data(root_dir) -> dict:
	"""
	Returns a dictionary of all worker handles in the directory.

	Args:
		root_dir (str): The path to the directory to search for handles.

	Returns:
		A dictionary of all handles in the directory.
	"""
	handles_dict = {}
	file_list = get_all_files_in_dir(root_dir)
	for file in file_list:
		if file.endswith(".json"):
			with open(file, "r") as f:
				data = json.load(f)
				handles_dict[data["metadata"]["handle"]] = data
	return handles_dict


def sort_exit_code_handles(exit_code_handles: dict) -> dict:
	"""
	Returns a sorted dictionary of all exit codes and their handles.

	Args:
		exit_code_handles (dict): The dictionary of exit codes and their handles.

	Returns:
		A sorted dictionary of all exit codes and their handles.
	"""
	return dict(sorted(exit_code_handles.items(), key=lambda item: len(item[1]), reverse=True))


worker_data = get_worker_data(filepath_workerdata)
worker_data = sort_exit_code_handles(worker_data)


In [None]:
def get_exit_code_handles(worker_data: dict) -> dict:
	"""
	Returns a dictionary of all exit codes and their handles.

	Args:
		worker_data (dict): The dictionary of worker data.

	Returns:
		A dictionary of all exit codes and their handles.
	"""
	exit_code_handles = {}
	for handle, data in worker_data.items():
		exit_code = data["workerdata"]["exit_code"]
		if exit_code not in exit_code_handles:
			exit_code_handles[exit_code] = []
		exit_code_handles[exit_code].append(handle)
	return exit_code_handles


def print_exit_code_counts(exit_code_handles: dict):
	"""
	Prints the exit code counts.

	Args:
		exit_code_handles (dict): The dictionary of exit code counts.
	"""
	print(f"Total number of workers: {sum([len(handles) for handles in exit_code_handles.values()])}")
	for exit_code, handles in exit_code_handles.items():
		print(f"Exit code: {exit_code}, count: {len(handles)}")

		
def print_nonzero_handles(exit_code_handles: dict):
	"""
	Prints the handles for all non-zero exit codes.

	Args:
		exit_code_handles (dict): The dictionary of exit code counts.
	"""
	print("Non-zero exit codes:")
	for exit_code, handles in exit_code_handles.items():
		if exit_code != 0:
			print(f"Exit code: {exit_code}, count: {len(handles)}:")
			for handle in handles:
				worker_obj = worker_data[handle]
				add_r = ""
				if "data_analyzer" in worker_obj and "revitalized" in worker_obj["data_analyzer"]:
					add_r = " (revitalized)"
				print(f" - '{handle}' ({worker_obj['metadata']['retries']} retries left){add_r}")


exit_code_handles = get_exit_code_handles(worker_data)
exit_code_handles = sort_exit_code_handles(exit_code_handles)
print_exit_code_counts(exit_code_handles)
print_nonzero_handles(exit_code_handles)
a = 0


In [None]:
def print_successful_handles(exit_code_handles: dict):
	"""
	Prints handles for all successful workers (exit code 0).

	Args:
		exit_code_handles (dict): The dictionary of exit code counts.
	"""
	print("Successful workers:")
	for exit_code, handles in exit_code_handles.items():
		if exit_code == 0:
			print(f"Exit code: {exit_code}, count: {len(handles)}:")
			for handle in handles:
				worker_obj = worker_data[handle]
				print(f" - '{handle}' in {round(worker_obj['workerdata']['duration'])} seconds ({worker_obj['metadata']['retries']} retries left)")

print_successful_handles(exit_code_handles)

In [None]:
def revitalize_nonzero(worker_data: dict, retries: int = 1) -> dict:
	"""
	Revitalizes all workers with a non-zero exit code.

	Args:
		worker_data (dict): The dictionary of worker data.
		retries (int): The number of retries (left) to set for the workers.

	Returns:
		An updated dictionary of all worker data.
	"""
	for handle, data in worker_data.items():
		exit_code = data["workerdata"]["exit_code"]
		if exit_code != 0:
			data["metadata"]["retries"] = retries
			if "data_analyzer" not in data:
				data["data_analyzer"] = {}
			data["data_analyzer"]["revitalized"] = True
	return worker_data

def revitalize_nonzero_and_write(worker_data: dict, retries: int = 1, filepath: str = filepath_workerdata) -> bool:
	"""
	Revitalizes all workers with a non-zero exit code and writes the data to disk.

	Args:
		worker_data (dict): The dictionary of worker data.
		retries (int): The number of retries (left) to set for the workers.
		filepath (str): The path to the directory to write the data to.

	Returns:
		A boolean indicating whether the function ran successfully.
	"""
	# Make sure to ask for confirmation before running this function.
	can_run = input("Are you sure you want to revitalize all non-zero workers? (y/N) ")
	if can_run.lower() != "y":
		return False
	worker_data = revitalize_nonzero(worker_data, retries)
	for handle, data in worker_data.items():
		exit_code = data["workerdata"]["exit_code"]
		if exit_code != 0:
			filepath_joint = os.path.join(filepath, f"{handle}.json")
			with open(filepath_joint, "w") as f:
				json.dump(data, f, indent=2)
	return True

# Run this to revitalize all non-zero workers.
# Comment out the line below to prevent accidental execution.
revitalize_nonzero_and_write(worker_data, retries=1, filepath=filepath_workerdata)


In [None]:
# THIS IS COMMENTED OUT BY DEFAULT FOR SAFETY REASONS - NOT READY YET

# def delete_nonzero(worker_data: dict) -> bool:
# 	"""
# 	Deletes all worker files with a non-zero exit code.

# 	Args:
# 		worker_data (dict): The dictionary of worker data.

# 	Returns:
# 		A boolean indicating whether the function ran successfully.
# 	"""
# 	can_run = input("Are you sure you want to delete all non-zero workers? (y/N) ")
# 	if can_run.lower() != "y":
# 		return False
# 	for handle, data in worker_data.items():
# 		exit_code = data["workerdata"]["exit_code"]
# 		if exit_code != 0:
# 			os.remove(f"{filepath_workerdata}/{handle}.json")
# 	return True

# # Run this to delete all non-zero workers.
# delete_nonzero(worker_data)
	