MITLibraries · ehanson8 · Dec 6, 2024 · Dec 4, 2024 · Dec 5, 2024 · Dec 6, 2024
@@ -4,12 +4,15 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
+boto3 = "*"
+boto3-stubs = {extras = ["essential","ses"], version = "*"}
 click = "*"
 sentry-sdk = "*"
 
 [dev-packages]
 black = "*"
 coveralls = "*"
+moto="*"
 mypy = "*"
 pre-commit = "*"
 pytest = "*"

@@ -21,7 +21,13 @@ Description of the app
 ```shell
 SENTRY_DSN=### If set to a valid Sentry DSN, enables Sentry exception monitoring. This is not needed for local development.
 WORKSPACE=### Set to `dev` for local development, this will be set to `stage` and `prod` in those environments by Terraform.
+AWS_REGION_NAME=### Default AWS region.
 ```
 
+### Optional
+
+```shell
+LOG_LEVEL=### Logging level. Defaults to 'INFO'.
+```
 
 
@@ -1,12 +1,14 @@
 import logging
 from datetime import timedelta
+from io import StringIO
 from time import perf_counter
 
 import click
 
-from dsc.config import configure_logger, configure_sentry
+from dsc.config import Config
 
 logger = logging.getLogger(__name__)
+CONFIG = Config()
 
 
 @click.command()
@@ -15,9 +17,11 @@
 )
 def main(*, verbose: bool) -> None:
     start_time = perf_counter()
+    stream = StringIO()
     root_logger = logging.getLogger()
-    logger.info(configure_logger(root_logger, verbose=verbose))
-    logger.info(configure_sentry())
+    logger.info(CONFIG.configure_logger(root_logger, stream, verbose=verbose))
+    logger.info(CONFIG.configure_sentry())
+    CONFIG.check_required_env_vars()
     logger.info("Running process")
 
     # Do things here!

@@ -1,33 +1,63 @@
 import logging
 import os
+from collections.abc import Iterable
+from io import StringIO
 
 import sentry_sdk
 
 
-def configure_logger(logger: logging.Logger, *, verbose: bool) -> str:
-    if verbose:
-        logging.basicConfig(
-            format="%(asctime)s %(levelname)s %(name)s.%(funcName)s() line %(lineno)d: "
-            "%(message)s"
-        )
-        logger.setLevel(logging.DEBUG)
-        for handler in logging.root.handlers:
-            handler.addFilter(logging.Filter("dsc"))
-    else:
-        logging.basicConfig(
-            format="%(asctime)s %(levelname)s %(name)s.%(funcName)s(): %(message)s"
+class Config:
+    REQUIRED_ENV_VARS: Iterable[str] = [
+        "WORKSPACE",
+        "SENTRY_DSN",
+        "AWS_REGION_NAME",
+    ]
+
+    OPTIONAL_ENV_VARS: Iterable[str] = ["LOG_LEVEL"]
+
+    def __getattr__(self, name: str) -> str | None:
+        """Provide dot notation access to configurations and env vars on this class."""
+        if name in self.REQUIRED_ENV_VARS or name in self.OPTIONAL_ENV_VARS:
+            return os.getenv(name)
+        message = f"'{name}' not a valid configuration variable"
+        raise AttributeError(message)
+
+    def check_required_env_vars(self) -> None:
+        """Method to raise exception if required env vars not set."""
+        missing_vars = [var for var in self.REQUIRED_ENV_VARS if not os.getenv(var)]
+        if missing_vars:
+            message = f"Missing required environment variables: {', '.join(missing_vars)}"
+            raise OSError(message)
+
+    def configure_logger(
+        self, logger: logging.Logger, stream: StringIO, *, verbose: bool
+    ) -> str:
+        logging_format_base = "%(asctime)s %(levelname)s %(name)s.%(funcName)s()"
+        logger.addHandler(logging.StreamHandler(stream))
+
+        if verbose:
+            log_method, log_level = logger.debug, logging.DEBUG
+            template = logging_format_base + " line %(lineno)d: %(message)s"
+            for handler in logging.root.handlers:
+                handler.addFilter(logging.Filter("dsc"))
+        else:
+            log_method, log_level = logger.info, logging.INFO
+            template = logging_format_base + ": %(message)s"
+
+        logger.setLevel(log_level)
+        logging.basicConfig(format=template)
+        logger.addHandler(logging.StreamHandler(stream))
+        log_method(f"{logging.getLevelName(logger.getEffectiveLevel())}")
+
+        return (
+            f"Logger '{logger.name}' configured with level="
+            f"{logging.getLevelName(logger.getEffectiveLevel())}"
         )
-        logger.setLevel(logging.INFO)
-    return (
-        f"Logger '{logger.name}' configured with level="
-        f"{logging.getLevelName(logger.getEffectiveLevel())}"
-    )
-
-
-def configure_sentry() -> str:
-    env = os.getenv("WORKSPACE")
-    sentry_dsn = os.getenv("SENTRY_DSN")
-    if sentry_dsn and sentry_dsn.lower() != "none":
-        sentry_sdk.init(sentry_dsn, environment=env)
-        return f"Sentry DSN found, exceptions will be sent to Sentry with env={env}"
-    return "No Sentry DSN found, exceptions will not be sent to Sentry"
+
+    def configure_sentry(self) -> str:
+        env = self.WORKSPACE
+        sentry_dsn = self.SENTRY_DSN
+        if sentry_dsn and sentry_dsn.lower() != "none":
+            sentry_sdk.init(sentry_dsn, environment=env)
+            return f"Sentry DSN found, exceptions will be sent to Sentry with env={env}"
+        return "No Sentry DSN found, exceptions will not be sent to Sentry"
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+import boto3
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+    from mypy_boto3_s3.type_defs import PutObjectOutputTypeDef
+
+logger = logging.getLogger(__name__)
+
+
+class S3Client:
+    """A class to perform common S3 operations for this application."""
+
+    def __init__(self) -> None:
+        self.client = boto3.client("s3")
+
+    def archive_file_with_new_key(
+        self, bucket: str, key: str, archived_key_prefix: str
+    ) -> None:
+        """Update the key of the specified file to archive it from processing.
+
+        Args:
+            bucket: The S3 bucket containing the files to be archived.
+            key: The key of the file to archive.
+            archived_key_prefix: The prefix to be applied to the archived file.
+        """
+        self.client.copy_object(
+            Bucket=bucket,
+            CopySource=f"{bucket}/{key}",
+            Key=f"{archived_key_prefix}/{key}",
+        )
+        self.client.delete_object(
+            Bucket=bucket,
+            Key=key,
+        )
+
+    def put_file(
+        self, file_content: str | bytes, bucket: str, key: str
+    ) -> PutObjectOutputTypeDef:
+        """Put a file in a specified S3 bucket with a specified key.
+
+        Args:
+            file_content: The content of the file to be uploaded.
+            bucket: The S3 bucket where the file will be uploaded.
+            key: The key to be used for the uploaded file.
+        """
+        response = self.client.put_object(
+            Body=file_content,
+            Bucket=bucket,
+            Key=key,
+        )
+        logger.debug(f"'{key}' uploaded to S3")
+        return response
+
+    def get_files_iter(
+        self, bucket: str, file_type: str, excluded_key_prefix: str
+    ) -> Iterator[str]:
+        """Retrieve file based on file type, bucket, and without excluded prefix.
+
+        Args:
+            bucket: The S3 bucket to search.
+            file_type: The file type to retrieve.
+            excluded_key_prefix: Files with this key prefix will not be retrieved.
+        """
+        paginator = self.client.get_paginator("list_objects_v2")
+        page_iterator = paginator.paginate(Bucket=bucket)
+
+        for page in page_iterator:
+            files = [
+                content["Key"]
+                for content in page["Contents"]
+                if content["Key"].endswith(file_type)
+                and excluded_key_prefix not in content["Key"]
+            ]
+            yield from files
@@ -28,21 +28,20 @@ ignore = [
     # default
     "ANN101", 
     "ANN102", 
-    "COM812", 
-    "D107", 
-    "N812", 
-    "PTH", 
-
-    # project-specific
     "C90",
+    "COM812",
     "D100",
     "D101", 
     "D102",
     "D103",
     "D104", 
+    "D107",
+    "G004", 
+    "N812", 
     "PLR0912",
     "PLR0913", 
     "PLR0915",
+    "PTH", 
     "S320",
     "S321", 
 ]

@@ -1,13 +1,44 @@
+from io import StringIO
+
+import boto3
 import pytest
 from click.testing import CliRunner
+from moto import mock_aws
+
+from dsc.config import Config
+from dsc.s3 import S3Client
 
 
 @pytest.fixture(autouse=True)
 def _test_env(monkeypatch):
     monkeypatch.setenv("SENTRY_DSN", "None")
     monkeypatch.setenv("WORKSPACE", "test")
+    monkeypatch.setenv("AWS_REGION_NAME", "us-east-1")
+
+
+@pytest.fixture
+def config_instance() -> Config:
+    return Config()
+
+
+@pytest.fixture
+def mocked_s3(config_instance):
+    with mock_aws():
+        s3 = boto3.client("s3", region_name=config_instance.AWS_REGION_NAME)
+        s3.create_bucket(Bucket="awd")
+        yield s3
 
 
 @pytest.fixture
 def runner():
     return CliRunner()
+
+
+@pytest.fixture
+def s3_client():
+    return S3Client()
+
+
+@pytest.fixture
+def stream():
+    return StringIO()
@@ -1,37 +1,45 @@
 import logging
 
-from dsc.config import configure_logger, configure_sentry
+import pytest
 
 
-def test_configure_logger_not_verbose():
+def test_check_required_env_vars(monkeypatch, config_instance):
+    monkeypatch.delenv("WORKSPACE")
+    with pytest.raises(OSError, match="Missing required environment variables:"):
+        config_instance.check_required_env_vars()
+
+
+def test_configure_logger_not_verbose(config_instance, stream):
     logger = logging.getLogger(__name__)
-    result = configure_logger(logger, verbose=False)
-    info_log_level = 20
-    assert logger.getEffectiveLevel() == info_log_level
+    result = config_instance.configure_logger(logger, stream, verbose=False)
+    assert logger.getEffectiveLevel() == logging.INFO
     assert result == "Logger 'tests.test_config' configured with level=INFO"
+    stream.seek(0)
+    assert next(stream) == "INFO\n"
 
 
-def test_configure_logger_verbose():
+def test_configure_logger_verbose(config_instance, stream):
     logger = logging.getLogger(__name__)
-    result = configure_logger(logger, verbose=True)
-    debug_log_level = 10
-    assert logger.getEffectiveLevel() == debug_log_level
+    result = config_instance.configure_logger(logger, stream, verbose=True)
+    assert logger.getEffectiveLevel() == logging.DEBUG
     assert result == "Logger 'tests.test_config' configured with level=DEBUG"
+    stream.seek(0)
+    assert next(stream) == "DEBUG\n"
 
 
-def test_configure_sentry_no_env_variable(monkeypatch):
+def test_configure_sentry_no_env_variable(monkeypatch, config_instance):
     monkeypatch.delenv("SENTRY_DSN", raising=False)
-    result = configure_sentry()
+    result = config_instance.configure_sentry()
     assert result == "No Sentry DSN found, exceptions will not be sent to Sentry"
 
 
-def test_configure_sentry_env_variable_is_none(monkeypatch):
+def test_configure_sentry_env_variable_is_none(monkeypatch, config_instance):
     monkeypatch.setenv("SENTRY_DSN", "None")
-    result = configure_sentry()
+    result = config_instance.configure_sentry()
     assert result == "No Sentry DSN found, exceptions will not be sent to Sentry"
 
 
-def test_configure_sentry_env_variable_is_dsn(monkeypatch):
+def test_configure_sentry_env_variable_is_dsn(monkeypatch, config_instance):
     monkeypatch.setenv("SENTRY_DSN", "https://1234567890@00000.ingest.sentry.io/123456")
-    result = configure_sentry()
+    result = config_instance.configure_sentry()
     assert result == "Sentry DSN found, exceptions will be sent to Sentry with env=test"