Skip to content

Commit

Permalink
Encapsulate configuration functions in a class
Browse files Browse the repository at this point in the history
Why these changes are being introduced:
* Streamline logging configuration and environment variable management

How this addresses that need:
* Create Config class
   * Clearly identify required vs. optional env vars
* Update dependent modules to use Config
   * Replace os.getenv() calls with Config.<ENV-VAR>
* Update unit tests in test_config

Side effects of this change:
* None
  • Loading branch information
jonavellecuerdo committed Jan 10, 2024
1 parent 6cca678 commit 050aa01
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 48 deletions.
11 changes: 6 additions & 5 deletions harvester/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
import click
from sickle.oaiexceptions import NoRecordsMatch

from harvester.config import configure_logger, configure_sentry
from harvester.config import Config
from harvester.oai import OAIClient, write_records, write_sets

logger = logging.getLogger(__name__)

CONFIG = Config()


@click.group()
@click.option(
Expand All @@ -40,10 +42,9 @@ def main(ctx: click.Context, host: str, output_file: str, verbose: bool) -> None
ctx.obj["START_TIME"] = perf_counter()
ctx.obj["HOST"] = host
ctx.obj["OUTPUT_FILE"] = output_file

root_logger = logging.getLogger()
logger.info(configure_logger(root_logger, verbose))
logger.info(configure_sentry())
logger.info(CONFIG.configure_logger(verbose))
logger.info(CONFIG.configure_sentry())
CONFIG.check_required_env_vars()


@main.command()
Expand Down
81 changes: 56 additions & 25 deletions harvester/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""harvester.config module."""
import logging
import os
from typing import Any

import sentry_sdk

Expand All @@ -12,30 +13,60 @@
MAX_ALLOWED_ERRORS = 10


def configure_logger(logger: logging.Logger, verbose: bool) -> str:
if verbose:
logging.basicConfig(
format="%(asctime)s %(levelname)s %(name)s.%(funcName)s() line %(lineno)d: "
"%(message)s"
)
logger.setLevel(logging.DEBUG)
class Config:
REQUIRED_ENV_VARS = ("WORKSPACE", "SENTRY_DSN")
OPTIONAL_ENV_VARS = ("RECORD_SKIP_LIST", "STATUS_UPDATE_INTERVAL")

def __init__(self, logger: logging.Logger | None = None):
"""Set root logger as default when creating class instance."""
if logger is None:
self.logger = logging.getLogger()
else:
self.logger = logger

def check_required_env_vars(self) -> None:
"""Method to raise exception if required env vars not set."""
missing_vars = [var for var in self.REQUIRED_ENV_VARS if not os.getenv(var)]
if missing_vars:
message = f"Missing required environment variables: {', '.join(missing_vars)}"
raise OSError(message)

def configure_logger(self, verbose: bool) -> str:
for handler in logging.root.handlers:
handler.addFilter(logging.Filter("harvester"))
else:
logging.basicConfig(
format=("%(asctime)s %(levelname)s %(name)s.%(funcName)s(): %(message)s")
handler.filters.clear()
if verbose:
logging.basicConfig(
format="%(asctime)s %(levelname)s %(name)s.%(funcName)s() "
"line %(lineno)d: %(message)s"
)
self.logger.setLevel(logging.DEBUG)
for handler in logging.root.handlers:
handler.addFilter(logging.Filter("harvester"))
else:
logging.basicConfig(
format=("%(asctime)s %(levelname)s %(name)s.%(funcName)s(): %(message)s")
)
self.logger.setLevel(logging.INFO)
return (
f"Logger '{self.logger.name}' configured with level="
f"{logging.getLevelName(self.logger.getEffectiveLevel())}"
)
logger.setLevel(logging.INFO)
return (
f"Logger '{logger.name}' configured with level="
f"{logging.getLevelName(logger.getEffectiveLevel())}"
)


def configure_sentry() -> str:
env = os.getenv("WORKSPACE")
sentry_dsn = os.getenv("SENTRY_DSN")
if sentry_dsn and sentry_dsn.lower() != "none":
sentry_sdk.init(sentry_dsn, environment=env)
return f"Sentry DSN found, exceptions will be sent to Sentry with env={env}"
return "No Sentry DSN found, exceptions will not be sent to Sentry"

def configure_sentry(self) -> str:
sentry_dsn = self.SENTRY_DSN
if sentry_dsn and sentry_dsn.lower() != "none":
sentry_sdk.init(sentry_dsn, environment=self.WORKSPACE)
return (
"Sentry DSN found, exceptions will be sent to Sentry with env="
f"{self.WORKSPACE}"
)
return "No Sentry DSN found, exceptions will not be sent to Sentry"

def __getattr__(self, name: str) -> Any: # noqa: ANN401
"""Provide dot notation access to configurations and env vars on this class."""
if name in self.REQUIRED_ENV_VARS or name in self.OPTIONAL_ENV_VARS:
if name == "STATUS_UPDATE_INTERVAL":
return os.getenv(name, "1000")
return os.getenv(name)
message = f"'{name}' not a valid configuration variable"
raise AttributeError(message)
6 changes: 4 additions & 2 deletions harvester/oai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import json
import logging
import os
from collections.abc import Iterator
from typing import Any, Literal

Expand All @@ -19,12 +18,15 @@
MAX_ALLOWED_ERRORS,
MAX_RETRIES,
RETRY_STATUS_CODES,
Config,
)
from harvester.exceptions import MaxAllowedErrorsReached
from harvester.utils import send_sentry_message

logger = logging.getLogger(__name__)

CONFIG = Config()


class OAIClient:
def __init__(
Expand Down Expand Up @@ -157,7 +159,7 @@ def write_records(records: Iterator, filepath: str) -> int:
for record in records:
file.write(" ".encode() + record.raw.encode() + "\n".encode())
count += 1
if count % int(os.getenv("STATUS_UPDATE_INTERVAL", "1000")) == 0:
if count % int(CONFIG.STATUS_UPDATE_INTERVAL) == 0:
logger.info(
"Status update: %s records written to output file so far!",
count,
Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,20 @@
import pytest
from click.testing import CliRunner

from harvester.config import Config


@pytest.fixture(autouse=True)
def _test_env(monkeypatch):
monkeypatch.setenv("SENTRY_DSN", "None")
monkeypatch.setenv("WORKSPACE", "test")


@pytest.fixture
def config():
return Config()


@pytest.fixture
def runner():
return CliRunner()
Expand Down
67 changes: 51 additions & 16 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,70 @@
import logging

from harvester.config import configure_logger, configure_sentry
import pytest

from harvester.config import Config

def test_configure_logger_not_verbose():
logger = logging.getLogger(__name__)
result = configure_logger(logger, verbose=False)
assert logger.getEffectiveLevel() == int(logging.INFO)
assert result == "Logger 'tests.test_config' configured with level=INFO"

def test_configure_logger_defaults_to_root_logger():
config = Config(logger=None)
assert config.logger.name == "root"


def test_configure_logger_verbose(caplog):
def test_configure_logger_accepts_specific_logger():
logger = logging.getLogger(__name__)
result = configure_logger(logger, verbose=True)
assert logger.getEffectiveLevel() == int(logging.DEBUG)
assert result == "Logger 'tests.test_config' configured with level=DEBUG"
config = Config(logger=logger)
assert config.logger.name == "tests.test_config"


def test_configure_logger_not_verbose(config):
result = config.configure_logger(verbose=False)
assert config.logger.getEffectiveLevel() == int(logging.INFO)
assert result == "Logger 'root' configured with level=INFO"


def test_configure_sentry_no_env_variable(monkeypatch):
def test_configure_logger_verbose(config):
result = config.configure_logger(verbose=True)
assert config.logger.getEffectiveLevel() == int(logging.DEBUG)
assert result == "Logger 'root' configured with level=DEBUG"


def test_configure_sentry_no_env_variable(config, monkeypatch):
config.configure_logger(verbose=False)
monkeypatch.delenv("SENTRY_DSN", raising=False)
result = configure_sentry()
result = config.configure_sentry()
assert result == "No Sentry DSN found, exceptions will not be sent to Sentry"


def test_configure_sentry_env_variable_is_none(monkeypatch):
def test_configure_sentry_env_variable_is_none(config, monkeypatch):
monkeypatch.setenv("SENTRY_DSN", "None")
result = configure_sentry()
result = config.configure_sentry()
assert result == "No Sentry DSN found, exceptions will not be sent to Sentry"


def test_configure_sentry_env_variable_is_dsn(monkeypatch):
def test_configure_sentry_env_variable_is_dsn(config, monkeypatch):
monkeypatch.setenv("SENTRY_DSN", "https://1234567890@00000.ingest.sentry.io/123456")
result = configure_sentry()
result = config.configure_sentry()
assert result == "Sentry DSN found, exceptions will be sent to Sentry with env=test"


def test_config_check_required_env_vars_success(config):
config.check_required_env_vars()


def test_config_check_required_env_vars_error(config, monkeypatch):
monkeypatch.delenv("SENTRY_DSN")
with pytest.raises(
OSError, match="Missing required environment variables: SENTRY_DSN"
):
config.check_required_env_vars()


def test_config_env_var_access_success(config):
assert config.STATUS_UPDATE_INTERVAL == "1000"


def test_config_env_var_access_error(config):
with pytest.raises(
AttributeError, match="'DOES_NOT_EXIST' not a valid configuration variable"
):
_ = config.DOES_NOT_EXIST

0 comments on commit 050aa01

Please sign in to comment.