From 6476007cdc4f44ab47f4eb67441e1f99db71f9c7 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Mon, 26 Aug 2024 10:29:04 -0400 Subject: [PATCH 1/2] Add init-job functionality Why these changes are being introduced: The first step in an AB comparison is initializing a 'Job' that will be used for subsequent 'Runs' of the job. This requires setting up a working directory, start a 'job.json' file, and some supporting utilities. How this addresses that need: * Adds first function 'init_job' in abdiff.core * Adds first utility functions for job names, slug, and working directory Side effects of this change: * None Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/TIMX-338 --- Pipfile | 1 + Pipfile.lock | 74 +++++++++++++++++++++++++---------------- abdiff/config.py | 18 ++++++++++ abdiff/core/__init__.py | 6 ++++ abdiff/core/init_job.py | 35 +++++++++++++++++++ abdiff/core/utils.py | 38 +++++++++++++++++++++ pyproject.toml | 1 + tests/conftest.py | 27 ++++++++++++++- tests/test_init_job.py | 18 ++++++++++ tests/test_utils.py | 52 +++++++++++++++++++++++++++++ 10 files changed, 240 insertions(+), 30 deletions(-) create mode 100644 abdiff/core/utils.py create mode 100644 tests/test_init_job.py create mode 100644 tests/test_utils.py diff --git a/Pipfile b/Pipfile index 0e06e52..3fdf40b 100644 --- a/Pipfile +++ b/Pipfile @@ -5,6 +5,7 @@ name = "pypi" [packages] click = "*" +python-slugify = "*" [dev-packages] black = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 7048bad..328fdd3 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "98785d3c356046dd52e77d6ec08a3b3318feee53cbe8e3f0482c96665a7bb487" + "sha256": "f71c31d9e0df6a10b01c83d5e92e237410922c60db5720f757a55e091b616a3a" }, "pipfile-spec": 6, "requires": { @@ -24,6 +24,22 @@ "index": "pypi", "markers": "python_version >= '3.7'", "version": "==8.1.7" + }, + "python-slugify": { + "hashes": [ + "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8", + "sha256:59202371d1d05b54a9e7720c5e038f928f45daaffe41dd10822f3907b937c856" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==8.0.4" + }, + "text-unidecode": { + "hashes": [ + "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", + "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93" + ], + "version": "==1.3" } }, "develop": { @@ -362,37 +378,37 @@ }, "mypy": { "hashes": [ - "sha256:0624bdb940255d2dd24e829d99a13cfeb72e4e9031f9492148f410ed30bcab54", - "sha256:0bc71d1fb27a428139dd78621953effe0d208aed9857cb08d002280b0422003a", - "sha256:0bd53faf56de9643336aeea1c925012837432b5faf1701ccca7fde70166ccf72", - "sha256:11965c2f571ded6239977b14deebd3f4c3abd9a92398712d6da3a772974fad69", - "sha256:1a81cf05975fd61aec5ae16501a091cfb9f605dc3e3c878c0da32f250b74760b", - "sha256:2684d3f693073ab89d76da8e3921883019ea8a3ec20fa5d8ecca6a2db4c54bbe", - "sha256:2c63350af88f43a66d3dfeeeb8d77af34a4f07d760b9eb3a8697f0386c7590b4", - "sha256:45df906e8b6804ef4b666af29a87ad9f5921aad091c79cc38e12198e220beabd", - "sha256:4c956b49c5d865394d62941b109728c5c596a415e9c5b2be663dd26a1ff07bc0", - "sha256:64f4a90e3ea07f590c5bcf9029035cf0efeae5ba8be511a8caada1a4893f5525", - "sha256:749fd3213916f1751fff995fccf20c6195cae941dc968f3aaadf9bb4e430e5a2", - "sha256:79c07eb282cb457473add5052b63925e5cc97dfab9812ee65a7c7ab5e3cb551c", - "sha256:7b6343d338390bb946d449677726edf60102a1c96079b4f002dedff375953fc5", - "sha256:886c9dbecc87b9516eff294541bf7f3655722bf22bb898ee06985cd7269898de", - "sha256:a2b43895a0f8154df6519706d9bca8280cda52d3d9d1514b2d9c3e26792a0b74", - "sha256:a32fc80b63de4b5b3e65f4be82b4cfa362a46702672aa6a0f443b4689af7008c", - "sha256:a707ec1527ffcdd1c784d0924bf5cb15cd7f22683b919668a04d2b9c34549d2e", - "sha256:a831671bad47186603872a3abc19634f3011d7f83b083762c942442d51c58d58", - "sha256:b639dce63a0b19085213ec5fdd8cffd1d81988f47a2dec7100e93564f3e8fb3b", - "sha256:b868d3bcff720dd7217c383474008ddabaf048fad8d78ed948bb4b624870a417", - "sha256:c1952f5ea8a5a959b05ed5f16452fddadbaae48b5d39235ab4c3fc444d5fd411", - "sha256:d44be7551689d9d47b7abc27c71257adfdb53f03880841a5db15ddb22dc63edb", - "sha256:e1e30dc3bfa4e157e53c1d17a0dad20f89dc433393e7702b813c10e200843b03", - "sha256:e4fe9f4e5e521b458d8feb52547f4bade7ef8c93238dfb5bbc790d9ff2d770ca", - "sha256:f39918a50f74dc5969807dcfaecafa804fa7f90c9d60506835036cc1bc891dc8", - "sha256:f404a0b069709f18bbdb702eb3dcfe51910602995de00bd39cea3050b5772d08", - "sha256:fca4a60e1dd9fd0193ae0067eaeeb962f2d79e0d9f0f66223a0682f26ffcc809" + "sha256:06d26c277962f3fb50e13044674aa10553981ae514288cb7d0a738f495550b36", + "sha256:2ff93107f01968ed834f4256bc1fc4475e2fecf6c661260066a985b52741ddce", + "sha256:36383a4fcbad95f2657642a07ba22ff797de26277158f1cc7bd234821468b1b6", + "sha256:37c7fa6121c1cdfcaac97ce3d3b5588e847aa79b580c1e922bb5d5d2902df19b", + "sha256:3a66169b92452f72117e2da3a576087025449018afc2d8e9bfe5ffab865709ca", + "sha256:3f14cd3d386ac4d05c5a39a51b84387403dadbd936e17cb35882134d4f8f0d24", + "sha256:41ea707d036a5307ac674ea172875f40c9d55c5394f888b168033177fce47383", + "sha256:478db5f5036817fe45adb7332d927daa62417159d49783041338921dcf646fc7", + "sha256:4a8a53bc3ffbd161b5b2a4fff2f0f1e23a33b0168f1c0778ec70e1a3d66deb86", + "sha256:539c570477a96a4e6fb718b8d5c3e0c0eba1f485df13f86d2970c91f0673148d", + "sha256:57555a7715c0a34421013144a33d280e73c08df70f3a18a552938587ce9274f4", + "sha256:6e658bd2d20565ea86da7d91331b0eed6d2eee22dc031579e6297f3e12c758c8", + "sha256:6e7184632d89d677973a14d00ae4d03214c8bc301ceefcdaf5c474866814c987", + "sha256:75746e06d5fa1e91bfd5432448d00d34593b52e7e91a187d981d08d1f33d4385", + "sha256:7f9993ad3e0ffdc95c2a14b66dee63729f021968bff8ad911867579c65d13a79", + "sha256:801780c56d1cdb896eacd5619a83e427ce436d86a3bdf9112527f24a66618fef", + "sha256:801ca29f43d5acce85f8e999b1e431fb479cb02d0e11deb7d2abb56bdaf24fd6", + "sha256:969ea3ef09617aff826885a22ece0ddef69d95852cdad2f60c8bb06bf1f71f70", + "sha256:a976775ab2256aadc6add633d44f100a2517d2388906ec4f13231fafbb0eccca", + "sha256:af8d155170fcf87a2afb55b35dc1a0ac21df4431e7d96717621962e4b9192e70", + "sha256:b499bc07dbdcd3de92b0a8b29fdf592c111276f6a12fe29c30f6c417dd546d12", + "sha256:cd953f221ac1379050a8a646585a29574488974f79d8082cedef62744f0a0104", + "sha256:d42a6dd818ffce7be66cce644f1dff482f1d97c53ca70908dff0b9ddc120b77a", + "sha256:e8960dbbbf36906c5c0b7f4fbf2f0c7ffb20f4898e6a879fcf56a41a08b0d318", + "sha256:edb91dded4df17eae4537668b23f0ff6baf3707683734b6a818d5b9d0c0c31a1", + "sha256:ee23de8530d99b6db0573c4ef4bd8f39a2a6f9b60655bf7a1357e585a3486f2b", + "sha256:f7821776e5c4286b6a13138cc935e2e9b6fde05e081bdebf5cdb2bb97c9df81d" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==1.11.1" + "version": "==1.11.2" }, "mypy-extensions": { "hashes": [ diff --git a/abdiff/config.py b/abdiff/config.py index 6c1aca9..d76cf65 100644 --- a/abdiff/config.py +++ b/abdiff/config.py @@ -1,4 +1,22 @@ import logging +import os +from typing import Any + + +class Config: + REQUIRED_ENV_VARS = ("WORKSPACE",) + OPTIONAL_ENV_VARS = ("DATA_DIRECTORY",) + + def __getattr__(self, name: str) -> Any: # noqa: ANN401 + """Method to raise exception if required env vars not set.""" + if name in self.REQUIRED_ENV_VARS or name in self.OPTIONAL_ENV_VARS: + return os.getenv(name) + message = f"'{name}' not a valid configuration variable" + raise AttributeError(message) + + @property + def data_directory(self) -> str: + return self.DATA_DIRECTORY or "output" def configure_logger(logger: logging.Logger, *, verbose: bool) -> str: diff --git a/abdiff/core/__init__.py b/abdiff/core/__init__.py index f256688..83fd720 100644 --- a/abdiff/core/__init__.py +++ b/abdiff/core/__init__.py @@ -2,3 +2,9 @@ All primary functions used by CLI are importable from here. """ + +from abdiff.core.init_job import init_job + +__all__ = [ + "init_job", +] diff --git a/abdiff/core/init_job.py b/abdiff/core/init_job.py index df16d21..6aa31e0 100644 --- a/abdiff/core/init_job.py +++ b/abdiff/core/init_job.py @@ -1 +1,36 @@ """abdiff.core.init_job""" + +import logging +import os + +from abdiff.config import Config +from abdiff.core.utils import ( + get_job_slug_and_working_directory, + update_or_create_job_json, +) + +CONFIG = Config() + +logger = logging.getLogger(__name__) + + +def init_job(job_name: str) -> dict: + """Function to initialize a new Job. + + 1. create a working directory for job + 2. initialize a job.json file + """ + job_slug, job_working_directory = get_job_slug_and_working_directory(job_name) + os.makedirs(job_working_directory) + logger.info( + f"Job '{job_slug}' initialized. Working directory: {job_working_directory}" + ) + + job_data = { + "job_name": job_name, + "job_slug": job_slug, + "working_directory": str(job_working_directory), + } + update_or_create_job_json(job_name, job_data) + + return job_data diff --git a/abdiff/core/utils.py b/abdiff/core/utils.py new file mode 100644 index 0000000..c800f7c --- /dev/null +++ b/abdiff/core/utils.py @@ -0,0 +1,38 @@ +"""abdiff.core.utils""" + +import json +import os +from pathlib import Path + +from slugify import slugify + +from abdiff.config import Config + +CONFIG = Config() + + +def get_job_slug_and_working_directory(job_name: str) -> tuple[str, Path]: + """Create working directory for new job by slugifying job name.""" + job_slug = slugify(job_name) + return job_slug, Path(CONFIG.data_directory) / job_slug + + +def update_or_create_job_json(job_name: str, new_job_data: dict) -> dict: + """Create or update a job's JSON file. + + This is helpful as a utility method, as multiple steps in the process may update the + Job JSON file, with this as a standard interface. + """ + job_slug, working_directory = get_job_slug_and_working_directory(job_name) + job_json_filepath = working_directory / "job.json" + + job_data = {} + if os.path.exists(job_json_filepath): + with open(job_json_filepath) as f: + job_data = json.load(f) + job_data.update(new_job_data) + + with open(job_json_filepath, "w") as f: + json.dump(job_data, f, indent=2) + + return job_data diff --git a/pyproject.toml b/pyproject.toml index 2f29f70..82b7398 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ ignore = [ "D103", "D104", "D415", + "G004", "PLR0912", "PLR0913", "PLR0915", diff --git a/tests/conftest.py b/tests/conftest.py index b3159f0..1a4952e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,12 +1,37 @@ +import os + import pytest from click.testing import CliRunner +from slugify import slugify + +from abdiff.core.utils import ( + get_job_slug_and_working_directory, +) @pytest.fixture(autouse=True) -def _test_env(monkeypatch): +def _test_env(monkeypatch, tmp_path): monkeypatch.setenv("WORKSPACE", "test") + monkeypatch.setenv("DATA_DIRECTORY", str(tmp_path / "output")) @pytest.fixture def runner(): return CliRunner() + + +@pytest.fixture +def job_name(): + return "Large Refactor Project" + + +@pytest.fixture +def job_slug(job_name): + return slugify(job_name) + + +@pytest.fixture +def job_working_directory(job_name): + job_slug, job_dir = get_job_slug_and_working_directory(job_name) + os.makedirs(job_dir) + return job_dir diff --git a/tests/test_init_job.py b/tests/test_init_job.py new file mode 100644 index 0000000..3a5c5e2 --- /dev/null +++ b/tests/test_init_job.py @@ -0,0 +1,18 @@ +import os.path + +from abdiff.core import init_job + + +def test_init_job_returns_initialized_job_data(tmp_path, job_name): + job_data = init_job(job_name) + assert job_data == { + "job_name": "Large Refactor Project", + "job_slug": "large-refactor-project", # NOTE: the slug form varies slightly + "working_directory": str(tmp_path / "output/large-refactor-project"), + } + + +def test_init_job_creates_working_directory_and_job_json(tmp_path, job_name): + init_job(job_name) + assert os.path.exists(tmp_path / "output/large-refactor-project") + assert os.path.exists(tmp_path / "output/large-refactor-project/job.json") diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..411623b --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,52 @@ +import json +import os.path +from pathlib import Path + +from abdiff.config import Config +from abdiff.core.utils import ( + get_job_slug_and_working_directory, + update_or_create_job_json, +) + +CONFIG = Config() + + +def test_job_slug_success(job_name, job_slug, tmp_path): + _job_slug, _ = get_job_slug_and_working_directory(job_name) + assert job_slug == _job_slug + + +def test_job_slug_remove_special_characters(): + job_name = "abc 123 $#$#( // :: !! def $## 456" + job_slug, _ = get_job_slug_and_working_directory(job_name) + assert job_slug == "abc-123-def-456" + + +def test_job_working_directory_success(job_name, job_slug, tmp_path): + _, job_dir = get_job_slug_and_working_directory(job_name) + assert job_dir == Path(CONFIG.data_directory) / job_slug + + +def test_create_job_json_returns_initial_data(job_name, job_working_directory): + initial_job_data = {"msg": "in a bottle"} + set_job_data = update_or_create_job_json(job_name, initial_job_data) + assert set_job_data == initial_job_data + + +def test_create_job_json_creates_file(job_name, job_working_directory): + initial_job_data = {"msg": "in a bottle"} + update_or_create_job_json(job_name, initial_job_data) + _, job_dir = get_job_slug_and_working_directory(job_name) + assert os.path.exists(job_dir / "job.json") + + +def test_update_job_json_success(job_name, job_working_directory): + # simulate pre-existing job JSON file + data + with open(job_working_directory / "job.json", "w") as f: + json.dump({"msg": "in a bottle"}, f) + + job_data = update_or_create_job_json(job_name, {"msg2": "still in bottle"}) + assert job_data == { + "msg": "in a bottle", + "msg2": "still in bottle", + } From b51e8fdfc70998196e08c084900e438b82b44426 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Mon, 26 Aug 2024 14:58:33 -0400 Subject: [PATCH 2/2] Rename data_directory to root_working_directory --- README.md | 2 +- abdiff/config.py | 6 +++--- abdiff/core/init_job.py | 2 +- abdiff/core/utils.py | 6 +++--- tests/conftest.py | 2 +- tests/test_utils.py | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index d694775..c0acbc3 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ WORKSPACE=### Set to `dev` for local development, this will be set to `stage` an _Delete this section if it isn't applicable to the PR._ ```shell -=### Description for optional environment variable +ROOT_WORKING_DIRECTORY=### Location for Jobs and other working artifacts; defaults to relative `./output` ``` diff --git a/abdiff/config.py b/abdiff/config.py index d76cf65..1c13a5b 100644 --- a/abdiff/config.py +++ b/abdiff/config.py @@ -5,7 +5,7 @@ class Config: REQUIRED_ENV_VARS = ("WORKSPACE",) - OPTIONAL_ENV_VARS = ("DATA_DIRECTORY",) + OPTIONAL_ENV_VARS = ("ROOT_WORKING_DIRECTORY",) def __getattr__(self, name: str) -> Any: # noqa: ANN401 """Method to raise exception if required env vars not set.""" @@ -15,8 +15,8 @@ def __getattr__(self, name: str) -> Any: # noqa: ANN401 raise AttributeError(message) @property - def data_directory(self) -> str: - return self.DATA_DIRECTORY or "output" + def root_working_directory(self) -> str: + return self.ROOT_WORKING_DIRECTORY or "output" def configure_logger(logger: logging.Logger, *, verbose: bool) -> str: diff --git a/abdiff/core/init_job.py b/abdiff/core/init_job.py index 6aa31e0..d7b2919 100644 --- a/abdiff/core/init_job.py +++ b/abdiff/core/init_job.py @@ -23,7 +23,7 @@ def init_job(job_name: str) -> dict: job_slug, job_working_directory = get_job_slug_and_working_directory(job_name) os.makedirs(job_working_directory) logger.info( - f"Job '{job_slug}' initialized. Working directory: {job_working_directory}" + f"Job '{job_slug}' initialized. Job working directory: {job_working_directory}" ) job_data = { diff --git a/abdiff/core/utils.py b/abdiff/core/utils.py index c800f7c..68a1030 100644 --- a/abdiff/core/utils.py +++ b/abdiff/core/utils.py @@ -14,7 +14,7 @@ def get_job_slug_and_working_directory(job_name: str) -> tuple[str, Path]: """Create working directory for new job by slugifying job name.""" job_slug = slugify(job_name) - return job_slug, Path(CONFIG.data_directory) / job_slug + return job_slug, Path(CONFIG.root_working_directory) / job_slug def update_or_create_job_json(job_name: str, new_job_data: dict) -> dict: @@ -23,8 +23,8 @@ def update_or_create_job_json(job_name: str, new_job_data: dict) -> dict: This is helpful as a utility method, as multiple steps in the process may update the Job JSON file, with this as a standard interface. """ - job_slug, working_directory = get_job_slug_and_working_directory(job_name) - job_json_filepath = working_directory / "job.json" + job_slug, job_working_directory = get_job_slug_and_working_directory(job_name) + job_json_filepath = job_working_directory / "job.json" job_data = {} if os.path.exists(job_json_filepath): diff --git a/tests/conftest.py b/tests/conftest.py index 1a4952e..31a17d4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,7 @@ @pytest.fixture(autouse=True) def _test_env(monkeypatch, tmp_path): monkeypatch.setenv("WORKSPACE", "test") - monkeypatch.setenv("DATA_DIRECTORY", str(tmp_path / "output")) + monkeypatch.setenv("ROOT_WORKING_DIRECTORY", str(tmp_path / "output")) @pytest.fixture diff --git a/tests/test_utils.py b/tests/test_utils.py index 411623b..c683309 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -24,7 +24,7 @@ def test_job_slug_remove_special_characters(): def test_job_working_directory_success(job_name, job_slug, tmp_path): _, job_dir = get_job_slug_and_working_directory(job_name) - assert job_dir == Path(CONFIG.data_directory) / job_slug + assert job_dir == Path(CONFIG.root_working_directory) / job_slug def test_create_job_json_returns_initial_data(job_name, job_working_directory):