Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ name = "pypi"

[packages]
click = "*"
python-slugify = "*"

[dev-packages]
black = "*"
Expand Down
74 changes: 45 additions & 29 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ WORKSPACE=### Set to `dev` for local development, this will be set to `stage` an
_Delete this section if it isn't applicable to the PR._

```shell
<OPTIONAL_ENV>=### Description for optional environment variable
ROOT_WORKING_DIRECTORY=### Location for Jobs and other working artifacts; defaults to relative `./output`
```


Expand Down
18 changes: 18 additions & 0 deletions abdiff/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,22 @@
import logging
import os
from typing import Any


class Config:
REQUIRED_ENV_VARS = ("WORKSPACE",)
OPTIONAL_ENV_VARS = ("ROOT_WORKING_DIRECTORY",)

def __getattr__(self, name: str) -> Any: # noqa: ANN401
"""Method to raise exception if required env vars not set."""
if name in self.REQUIRED_ENV_VARS or name in self.OPTIONAL_ENV_VARS:
return os.getenv(name)
message = f"'{name}' not a valid configuration variable"
raise AttributeError(message)

@property
def root_working_directory(self) -> str:
return self.ROOT_WORKING_DIRECTORY or "output"


def configure_logger(logger: logging.Logger, *, verbose: bool) -> str:
Expand Down
6 changes: 6 additions & 0 deletions abdiff/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,9 @@
All primary functions used by CLI are importable from here.
"""

from abdiff.core.init_job import init_job

__all__ = [
"init_job",
]
Comment on lines +8 to +10
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had figured we could just add functions here as they get created?

35 changes: 35 additions & 0 deletions abdiff/core/init_job.py
Original file line number Diff line number Diff line change
@@ -1 +1,36 @@
"""abdiff.core.init_job"""

import logging
import os

from abdiff.config import Config
from abdiff.core.utils import (
get_job_slug_and_working_directory,
update_or_create_job_json,
)

CONFIG = Config()

logger = logging.getLogger(__name__)


def init_job(job_name: str) -> dict:
"""Function to initialize a new Job.

1. create a working directory for job
2. initialize a job.json file
"""
job_slug, job_working_directory = get_job_slug_and_working_directory(job_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Building on the output_directory comment, to further minimize potential confusion, could this just be job_directory or working_directory consistently?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @ehanson8 on choosing one naming convention and keeping it consistent across functions.

I am leaning towards job_directory. I like that it is explicitly linking the directory to a job. Related to @ehanson8 's comment on output_directory: data_directory makes me think of a folder containing file extracts and transformed record JSON files, but this directory will contain more than that -- the structure of the directory is very specific to a job, containing job and run JSON files and dated 'run' folders in addition to the transformed records. 🤔

os.makedirs(job_working_directory)
logger.info(
f"Job '{job_slug}' initialized. Job working directory: {job_working_directory}"
)

job_data = {
"job_name": job_name,
"job_slug": job_slug,
"working_directory": str(job_working_directory),
}
update_or_create_job_json(job_name, job_data)

return job_data
38 changes: 38 additions & 0 deletions abdiff/core/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""abdiff.core.utils"""

import json
import os
from pathlib import Path

from slugify import slugify

from abdiff.config import Config

CONFIG = Config()


def get_job_slug_and_working_directory(job_name: str) -> tuple[str, Path]:
"""Create working directory for new job by slugifying job name."""
job_slug = slugify(job_name)
return job_slug, Path(CONFIG.root_working_directory) / job_slug


def update_or_create_job_json(job_name: str, new_job_data: dict) -> dict:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To anyone who works' on functionality that will update the job's job.json, my hope is that this utility function would be reusable for that purpose.

"""Create or update a job's JSON file.

This is helpful as a utility method, as multiple steps in the process may update the
Job JSON file, with this as a standard interface.
"""
job_slug, job_working_directory = get_job_slug_and_working_directory(job_name)
job_json_filepath = job_working_directory / "job.json"

job_data = {}
if os.path.exists(job_json_filepath):
with open(job_json_filepath) as f:
job_data = json.load(f)
job_data.update(new_job_data)

with open(job_json_filepath, "w") as f:
json.dump(job_data, f, indent=2)

return job_data
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ ignore = [
"D103",
"D104",
"D415",
"G004",
"PLR0912",
"PLR0913",
"PLR0915",
Expand Down
27 changes: 26 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,37 @@
import os

import pytest
from click.testing import CliRunner
from slugify import slugify

from abdiff.core.utils import (
get_job_slug_and_working_directory,
)


@pytest.fixture(autouse=True)
def _test_env(monkeypatch):
def _test_env(monkeypatch, tmp_path):
monkeypatch.setenv("WORKSPACE", "test")
monkeypatch.setenv("ROOT_WORKING_DIRECTORY", str(tmp_path / "output"))


@pytest.fixture
def runner():
return CliRunner()


@pytest.fixture
def job_name():
return "Large Refactor Project"


@pytest.fixture
def job_slug(job_name):
return slugify(job_name)


@pytest.fixture
def job_working_directory(job_name):
job_slug, job_dir = get_job_slug_and_working_directory(job_name)
os.makedirs(job_dir)
return job_dir
18 changes: 18 additions & 0 deletions tests/test_init_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os.path

from abdiff.core import init_job


def test_init_job_returns_initialized_job_data(tmp_path, job_name):
job_data = init_job(job_name)
assert job_data == {
"job_name": "Large Refactor Project",
"job_slug": "large-refactor-project", # NOTE: the slug form varies slightly
"working_directory": str(tmp_path / "output/large-refactor-project"),
}


def test_init_job_creates_working_directory_and_job_json(tmp_path, job_name):
init_job(job_name)
assert os.path.exists(tmp_path / "output/large-refactor-project")
assert os.path.exists(tmp_path / "output/large-refactor-project/job.json")
52 changes: 52 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import json
import os.path
from pathlib import Path

from abdiff.config import Config
from abdiff.core.utils import (
get_job_slug_and_working_directory,
update_or_create_job_json,
)

CONFIG = Config()


def test_job_slug_success(job_name, job_slug, tmp_path):
_job_slug, _ = get_job_slug_and_working_directory(job_name)
assert job_slug == _job_slug


def test_job_slug_remove_special_characters():
job_name = "abc 123 $#$#( // :: !! def $## 456"
job_slug, _ = get_job_slug_and_working_directory(job_name)
assert job_slug == "abc-123-def-456"


def test_job_working_directory_success(job_name, job_slug, tmp_path):
_, job_dir = get_job_slug_and_working_directory(job_name)
assert job_dir == Path(CONFIG.root_working_directory) / job_slug


def test_create_job_json_returns_initial_data(job_name, job_working_directory):
initial_job_data = {"msg": "in a bottle"}
set_job_data = update_or_create_job_json(job_name, initial_job_data)
assert set_job_data == initial_job_data


def test_create_job_json_creates_file(job_name, job_working_directory):
initial_job_data = {"msg": "in a bottle"}
update_or_create_job_json(job_name, initial_job_data)
_, job_dir = get_job_slug_and_working_directory(job_name)
assert os.path.exists(job_dir / "job.json")


def test_update_job_json_success(job_name, job_working_directory):
# simulate pre-existing job JSON file + data
with open(job_working_directory / "job.json", "w") as f:
json.dump({"msg": "in a bottle"}, f)

job_data = update_or_create_job_json(job_name, {"msg2": "still in bottle"})
assert job_data == {
"msg": "in a bottle",
"msg2": "still in bottle",
}