From 21de72095f136b0168f59321cd1ecd25b82568fc Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Mon, 8 Sep 2025 08:38:33 +0200 Subject: [PATCH 01/12] #54 - Add QA tooling - initial setup of QA tools - applied Black --- .gitignore | 1 + .pylintrc | 649 +++++++++++++++++++++++++++++++++++++++++++++++ DEVELOPER.md | 127 ++++++++++ pyproject.toml | 11 + requirements.txt | 7 + 5 files changed, 795 insertions(+) create mode 100644 .pylintrc create mode 100644 DEVELOPER.md create mode 100644 pyproject.toml create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index dc63698..862270d 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__ /terraform/*.tfvars /terraform/*.tfstate* /terraform/.terraform* +/.idea/ diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..a4f988c --- /dev/null +++ b/.pylintrc @@ -0,0 +1,649 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked and +# will not be imported (useful for modules/projects where namespaces are +# manipulated during runtime and thus existing member attributes cannot be +# deduced by static analysis). It supports qualified module names, as well as +# Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Resolve imports to .pyi stubs if available. May reduce no-member messages and +# increase not-an-iterable messages. +prefer-stubs=no + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.11 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# Add paths to the list of the source roots. Supports globbing patterns. The +# source root is an absolute path or a path relative to the current working +# directory used to determine a package namespace for modules located under the +# source root. +source-roots= + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[MASTER] + +ignore-paths=tests + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type alias names. If left empty, type +# alias names will be checked with the set naming style. +#typealias-rgx= + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + asyncSetUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=10 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=1 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=120 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + use-implicit-booleaness-not-comparison-to-string, + use-implicit-booleaness-not-comparison-to-zero + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable= + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + +# Let 'consider-using-join' be raised when the separator to join on would be +# non-empty (resulting in expected fixes of the type: ``"- " + " - +# ".join(items)``) +suggest-join-with-non-empty-separator=yes + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are: text, parseable, colorized, +# json2 (improved json format), json (old json format) and msvs (visual +# studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. No available dictionaries : You need to install +# both the python package and the system dependency for enchant to work. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/DEVELOPER.md b/DEVELOPER.md new file mode 100644 index 0000000..8012ce9 --- /dev/null +++ b/DEVELOPER.md @@ -0,0 +1,127 @@ +# Event Gateβ€”for Developers + +- [Get Started](#get-started) +- [Set Up Python Environment](#set-up-python-environment) +- [Run Static Code Analysis](#running-static-code-analysis) +- [Run Black Tool Locally](#run-black-tool-locally) +- [Run mypy Tool Locally](#run-mypy-tool-locally) +- [Run Unit Test](#running-unit-test) +- [Run Action Locally](#run-action-locally) + +## Get Started + +Clone the repository and navigate to the project directory: + +```shell +git clone https://github.com/AbsaOSS/EventGate.git +cd event-gate +``` + +## Set Up Python Environment +```shell +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## Running Static Code Analysis + +This project uses the Pylint tool for static code analysis. Pylint analyzes your code without actually running it. It checks for errors, enforces coding standards, looks for code smells, etc. + +Pylint displays a global evaluation score for the code, rated out of a maximum score of 10.0. We aim to keep our code quality above 9.5. + +### Run Pylint +Run Pylint on all files currently tracked by Git in the project. +```shell +pylint $(git ls-files '*.py') +``` + +To run Pylint on a specific file, follow the pattern `pylint /.py`. + +Example: +```shell +pylint src/writer_kafka.py +``` + +## Run Black Tool Locally +This project uses the [Black](https://github.com/psf/black) tool for code formatting. +Black aims for consistency, generality, readability, and reducing git diffs. +The coding style used can be viewed as a strict subset of PEP 8. + +The project root file `pyproject.toml` defines the Black tool configuration. +In this project, we are accepting a line length of 120 characters. + +Follow these steps to format your code with Black locally: + +### Run Black +Run Black on all files currently tracked by Git in the project. +```shell +black $(git ls-files '*.py') +``` + +To run Black on a specific file, follow the pattern `black /.py`. + +Example: +```shell +black src/writer_kafka.py +``` + +### Expected Output +This is the console's expected output example after running the tool: +``` +All done! ✨ 🍰 ✨ +1 file reformatted. +``` + +## Run mypy Tool Locally + +This project uses the [my[py]](https://mypy.readthedocs.io/en/stable/) tool, a static type checker for Python. + +> Type checkers help ensure that you correctly use variables and functions in your code. +> With mypy, add type hints (PEP 484) to your Python programs, +> and mypy will warn you when you use those types incorrectly. +my[py] configuration is in `pyproject.toml` file. + +Follow these steps to format your code with my[py] locally: + +### Run my[py] + +Run my[py] on all files in the project. +```shell +mypy . +``` + +To run my[py] check on a specific file, follow the pattern `mypy /.py --check-untyped-defs`. + +Example: +```shell +mypy src/writer_kafka.py +``` + +## Running Unit Test + +Unit tests are written using pytest. To run the tests, use the following command: + +```shell +pytest tests/ +``` + +This will execute all tests located in the tests directory. + +## Code Coverage + +Code coverage is collected using the pytest-cov coverage tool. To run the tests and collect coverage information, use the following command: + +```shell +pytest --cov=. -v tests/ --cov-fail-under=80 +``` + +This will execute all tests in the tests directory and generate a code coverage report. + +See the coverage report on the path: + +```shell +open htmlcov/index.html +``` + +## diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..85f99e1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[tool.black] +line-length = 120 +target-version = ['py311'] +force-exclude = '''test''' + +[tool.coverage.run] +omit = ["tests/*"] + +[tool.mypy] +check_untyped_defs = true +exclude = "tests" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..12db53a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +pytest==7.4.3 +pytest-cov==5.0.0 +pytest-mock==3.14.0 +pylint==3.2.6 +black==24.8.0 +mypy==1.15.0 +mypy-extensions==1.0.0 From c9f39ffc9a1747352c0f46aee25ab20829a6d3bd Mon Sep 17 00:00:00 2001 From: miroslavpojer Date: Mon, 8 Sep 2025 08:41:17 +0200 Subject: [PATCH 02/12] - add wokflow --- .github/CODEOWNERS | 2 +- .github/workflows/test.yml | 117 +++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/test.yml diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b21903f..8423567 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @ABMC831 @OlivieFranklova @Zejnilovic @oto-macenauer-absa @petr-pokorny-absa +* @ABMC831 @Zejnilovic @oto-macenauer-absa @petr-pokorny-absa diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..920f172 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,117 @@ +name: Build and Test +on: + pull_request: + branches: + - '**' + types: [ opened, synchronize, reopened ] + +jobs: + static-code-analysis: + runs-on: ubuntu-latest + name: Pylint Static Code Analysis + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + pip install -r requirements.txt + + - name: Analyze code with Pylint + id: analyze-code + run: | + pylint_score=$(pylint $(git ls-files '*.py')| grep 'rated at' | awk '{print $7}' | cut -d'/' -f1) + echo "PYLINT_SCORE=$pylint_score" >> $GITHUB_ENV + + - name: Check Pylint score + run: | + if (( $(echo "$PYLINT_SCORE < 9.5" | bc -l) )); then + echo "Failure: Pylint score is below 9.5 (project score: $PYLINT_SCORE)." + exit 1 + else + echo "Success: Pylint score is above 9.5 (project score: $PYLINT_SCORE)." + fi + + code-format-check: + runs-on: ubuntu-latest + name: Black Format Check + steps: + - name: Checkout repository + uses: actions/checkout@v4.1.5 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v5.1.0 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + pip install -r requirements.txt + + - name: Check code format with Black + id: check-format + run: | + black --check $(git ls-files '*.py') + + unit-test: + name: Unit Tests + runs-on: ubuntu-latest + + defaults: + run: + shell: bash + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + persist-credentials: false + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install Python dependencies + run: | + pip install -r requirements.txt + + - name: Set PYTHONPATH environment variable + run: echo "PYTHONPATH=${GITHUB_WORKSPACE}/release_notes_generator/release_notes_generator" >> $GITHUB_ENV + + - name: Check code coverage with Pytest + run: pytest --cov=. -v tests/ --cov-fail-under=80 + + mypy-check: + runs-on: ubuntu-latest + name: Mypy Type Check + steps: + - name: Checkout repository + uses: actions/checkout@v4.1.5 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v5.1.0 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + pip install -r requirements.txt + - name: Check types with Mypy + id: check-types + run: | + mypy . From fe82566df0aa87253dfea8457651e20b89d996d3 Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Mon, 8 Sep 2025 09:13:49 +0200 Subject: [PATCH 03/12] Refactor access configuration loading and error handling in EventBridge writer for improved readability and maintainability Refactor event_gate_lambda and writer modules for improved type hinting, logging, and error handling; enhance readability and maintainability Refactor event writers to improve type hinting and logging; enhance error handling in write methods Add copyright headers and licensing information to source files Enhance error handling and logging in writers; refactor tests to use pytest Refactor code for consistency and readability; update requirements for testing and dependencies --- README.md | 264 ++++++++++++++++---------- pyproject.toml | 5 + requirements.txt | 22 ++- src/event_gate_lambda.py | 181 ++++++++++-------- src/requirements.txt | 8 - src/writer_eventbridge.py | 74 ++++++-- src/writer_kafka.py | 69 +++++-- src/writer_postgres.py | 91 ++++++--- tests/test_conf_validation.py | 94 +++++----- tests/test_event_gate_lambda.py | 316 ++++++++++++++++--------------- tests/test_writer_postgres.py | 322 ++++++++++++++++++++++---------- 11 files changed, 896 insertions(+), 550 deletions(-) delete mode 100644 src/requirements.txt diff --git a/README.md b/README.md index 0210d9f..ce1747c 100644 --- a/README.md +++ b/README.md @@ -1,109 +1,181 @@ # EventGate -Python lambda for sending well-defined messages to confluent kafka -assumes AWS Deployment with API Gateway exposure of endpoint + +Python AWS Lambda that exposes a simple HTTP API (via API Gateway) for validating and forwarding well-defined JSON messages to multiple backends (Kafka, EventBridge, Postgres). Designed for centralized, schema-governed event ingestion with pluggable writers. + +> Status: Internal prototype / early version -- [Lambda itself](#lambda-itself) +- [Overview](#overview) +- [Features](#features) +- [Architecture](#architecture) - [API](#api) -- [Config](#config) -- [Terraform Deplyoment](#terraform-deplyoment) +- [Configuration](#configuration) +- [Deployment](#deployment) + - [Zip Lambda Package](#zip-lambda-package) + - [Container Image Lambda](#container-image-lambda) +- [Local Development & Testing](#local-development--testing) +- [Security & Authorization](#security--authorization) +- [Writers](#writers) + - [Kafka Writer](#kafka-writer) + - [EventBridge Writer](#eventbridge-writer) + - [Postgres Writer](#postgres-writer) - [Scripts](#scripts) +- [Troubleshooting](#troubleshooting) +- [License](#license) -## Lambda itself -Hearth of the solution lives in the Src folder +## Overview +EventGate receives JSON payloads for registered topics, authorizes the caller via JWT, validates the payload against a JSON Schema, and then forwards the payload to one or more configured sinks (Kafka, EventBridge, Postgres). Schemas and access control are externally configurable (local file or S3) to allow runtime evolution without code changes. + +## Features +- Topic registry with per-topic JSON Schema validation +- Multiple parallel writers (Kafka / EventBridge / Postgres) β€” failure in one does not block the others; aggregated error reporting +- JWT-based per-topic authorization (RS256 public key fetched remotely) +- Runtime-configurable access rules (local or S3) +- API-discoverable schema catalogue +- Pluggable writer initialization via `config.json` +- Terraform IaC examples for AWS deployment (API Gateway + Lambda) +- Supports both Zip-based and Container Image Lambda packaging (Container path enables custom `librdkafka` / SASL_SSL / Kerberos builds) + +## Architecture +High-level flow: +1. Client requests a JWT from an external token provider (link exposed via `/token`). +2. Client submits `POST /topics/{topicName}` with `Authorization: Bearer ` header and JSON body. +3. Lambda resolves topic schema, validates payload, authorizes subject (`sub`) against access map. +4. Writers invoked (Kafka, EventBridge, Postgres). Each returns success/failure. +5. Aggregated response returned: `202 Accepted` if all succeed; `500` with per-writer error list otherwise. + +Key files: +- `src/event_gate_lambda.py` – main Lambda handler and routing +- `conf/*.json` – configuration and topic schemas +- `conf/api.yaml` – OpenAPI 3 definition served at `/api` +- `writer_*.py` – individual sink implementations ## API -POST πŸ”’ method is guarded by JWT token in standard header "bearer" - -| Method | Endpoint | Info | -|---------|-----------------------|------------------------------------------------------------------------------| -| GET | `/api` | OpenAPI 3 definition | -| GET | `/token` | forwards (HTTP303) caller to where to obtain JWT token for posting to topic | -| GET | `/topics` | lists available topics | -| GET | `/topics/{topicName}` | schema for given topic | -| POST πŸ”’ | `/topics/{topicName}` | posts payload (after authorization and schema validation) into kafka topic | -| POST | `terminate` | kills lambda - useful for when forcing config reload is desired | - - -## Config -There are 3 configs for this solution (in conf folder) - - - config.json - - this one needs to live in the conf folder - - defines where are other resources/configs - - for SASL_SSL also points to required secrets - - access.json - - this one could be local or in AWS S3 - - defines who has access to post to individual topics - - topics.json - - this one could be local or in AWS S3 - - defines schema of the topics, as well as enumerates those - -## Terraform Deplyoment - -Whole solution expects to be deployed as lambda in AWS, -there are prepared terraform scripts to make initial deplyoment, and can be found in "terraform" folder - -### Zip lambda - -Designated for use without authentication towards kafka - - - create **zip** archive `scripts/prepare.deplyoment.sh` - - upload **zip** to **S3** - - provide terraform variables with tfvars - - `aws_region` - - `vpc_id` - - `vpc_endpoint` - - `resource prefix` - - all terraform resources would be prefixed my this - - `lambda_role_arn ` - - the role for the lambda - - should be able to make HTTP calls to wherever kafka server lives - - `lambda_vpc_subnet_ids` - - `lambda_package_type` - - `Zip` - - `lambda_src_s3_bucket ` - - the bucket where **zip** is already uploaded - - `lambda_src_s3_key` - - name of already uploaded **zip** - - `lambda_src_ecr_image` - - ignored - - `terraform apply` - -### Containerized lambda - -Designated for use with kerberizes SASL_SSL authentication towards kafka, as it requires custom librdkafka compilation - - - build docker (**[follow comments at the top of Dockerfile](./Dockerfile)**) - - upload docker **image** to **ECR** - - provide terraform variables with tfvars - - `aws_region` - - `vpc_id` - - `vpc_endpoint` - - `resource prefix` - - all terraform resources would be prefixed my this - - `lambda_role_arn ` - - the role for the lambda - - should be able to make HTTP calls to wherever kafka server lives - - `lambda_vpc_subnet_ids` - - `lambda_package_type` - - `Image` - - `lambda_src_s3_bucket ` - - ignored - - `lambda_src_s3_key` - - ignored - - `lambda_src_ecr_image` - - already uploaded **image** in **ECR** - - `terraform apply` +All responses are JSON unless otherwise noted. The POST endpoint requires a valid JWT. + +| Method | Endpoint | Auth | Description | +|--------|----------|------|-------------| +| GET | `/api` | none | Returns OpenAPI 3 definition (raw YAML) | +| GET | `/token` | none | 303 redirect to external token provider | +| GET | `/topics` | none | Lists available topic names | +| GET | `/topics/{topicName}` | none | Returns JSON Schema for the topic | +| POST | `/topics/{topicName}` | JWT | Validates + forwards message to configured sinks | +| POST | `/terminate` | (internal) | Forces Lambda process exit (used to trigger cold start & config reload) | + +Status codes: +- 202 – Accepted (all writers succeeded) +- 400 – Schema validation failure +- 401 – Token missing/invalid +- 403 – Subject unauthorized for topic +- 404 – Unknown topic or route +- 500 – One or more writers failed / internal error + +## Configuration +All core runtime configuration is driven by JSON files located in `conf/` unless S3 paths are specified. + +Primary file: `conf/config.json` +Example (sanitized): +```json +{ + "access_config": "s3:///access.json", + "token_provider_url": "https://", + "token_public_key_url": "https:///public-key", + "kafka_bootstrap_server": "broker1:9092,broker2:9092", + "event_bus_arn": "arn:aws:events:region:acct:event-bus/your-bus" +} +``` +Supporting configs: +- `access.json` – map: topicName -> array of authorized subjects (JWT `sub`). May reside locally or at S3 path referenced by `access_config`. +- `topic_*.json` – each file contains a JSON Schema for a topic. In the current code these are explicitly loaded inside `event_gate_lambda.py`. (Future enhancement: auto-discover or index file.) +- `api.yaml` – OpenAPI spec served verbatim at runtime. + +Environment variables: +- `LOG_LEVEL` (optional) – defaults to `INFO`. + +## Deployment +Infrastructure-as-Code examples live in the `terraform/` directory. Variables are supplied via a `*.tfvars` file or CLI. + +### Zip Lambda Package +Use when no custom native libraries are needed. +1. Run packaging script: `scripts/prepare.deplyoment.sh` (downloads deps + zips sources & config) +2. Upload resulting zip to S3 +3. Provide Terraform variables: + - `aws_region` + - `vpc_id` + - `vpc_endpoint` + - `resource_prefix` (prepended to created resource names) + - `lambda_role_arn` + - `lambda_vpc_subnet_ids` + - `lambda_package_type = "Zip"` + - `lambda_src_s3_bucket` + - `lambda_src_s3_key` +4. `terraform apply` + +### Container Image Lambda +Use when Kafka access needs Kerberos / SASL_SSL or custom `librdkafka` build. +1. Build image (see comments at top of `Dockerfile`) +2. Push to ECR +3. Terraform variables: + - Same networking / role vars as above + - `lambda_package_type = "Image"` + - `lambda_src_ecr_image` (ECR image reference) +4. `terraform apply` + +## Local Development & Testing +Install Python tooling (Python 3.11 suggested) then: +``` +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +pytest -q +``` +To invoke handler manually: +```python +from src import event_gate_lambda as m +# craft an event similar to API Gateway proxy integration +``` + +## Security & Authorization +- JWT tokens must be RS256 signed; the public key is fetched at cold start from `token_public_key_url` (DER base64 inside JSON `{ "key": "..." }`). +- Subject claim (`sub`) is matched against `ACCESS[topicName]`. +- Authorization header forms accepted: + - `Authorization: Bearer ` (preferred) + - Legacy: `bearer: ` custom header +- No token introspection beyond signature & standard claim extraction. + +## Writers +Each writer is initialized during cold start. Failures are isolated; aggregated errors returned in a single `500` response if any writer fails. + +### Kafka Writer +Configured via `kafka_bootstrap_server`. (Future: support auth properties / TLS configuration.) + +### EventBridge Writer +Publishes events to the configured `event_bus_arn` using put events API. + +### Postgres Writer +Example writer (currently a placeholder if no DSN present) demonstrating extensibility pattern. ## Scripts -Useful scripts for dev and Deployment +- `scripts/prepare.deplyoment.sh` – build Zip artifact for Lambda (typo in name retained for now; may rename later) +- `scripts/notebook.ipynb` – exploratory invocation cells per endpoint +- `scripts/get_token.http` – sample HTTP request for tooling (e.g., VSCode REST client) + +## Troubleshooting +| Symptom | Possible Cause | Action | +|---------|----------------|--------| +| 401 Unauthorized | Missing / malformed token header | Ensure `Authorization: Bearer` present | +| 403 Forbidden | Subject not listed in access map | Update `access.json` and redeploy / reload | +| 404 Topic not found | Wrong casing or not loaded in code | Verify loaded topics & file names | +| 500 Writer failure | Downstream (Kafka / EventBridge / DB) unreachable | Check network / VPC endpoints / security groups | +| Lambda keeps old config | Warm container | Call `/terminate` (internal) to force cold start | + +## License +Licensed under the Apache License, Version 2.0. See the [LICENSE](./LICENSE) file for full text. + +Copyright 2025 ABSA Group Limited. -### Notebook -Jupyter notebook, with one cell for lambda initialization and one cell per method, for testing purposes -Obviously using it requires correct configs to be in place (PUBLIC key is being loaded during initilization) +You may not use this project except in compliance with the License. Unless required by law or agreed in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. -### Preapare Deployment -Shell script for fetching python requirements and ziping it together with sources and config into lambda archive -it needs to be uploaded to s3 bucket first before running the terraform. +--- +Contributions & enhancements welcome (subject to project guidelines). diff --git a/pyproject.toml b/pyproject.toml index 85f99e1..cdd2cd8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,3 +9,8 @@ omit = ["tests/*"] [tool.mypy] check_untyped_defs = true exclude = "tests" +ignore_missing_imports = true +python_version = "3.11" +packages = ["src"] +explicit_package_bases = true +disable_error_code = ["import-untyped"] diff --git a/requirements.txt b/requirements.txt index 12db53a..a4ccb09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,15 @@ -pytest==7.4.3 -pytest-cov==5.0.0 -pytest-mock==3.14.0 -pylint==3.2.6 -black==24.8.0 -mypy==1.15.0 -mypy-extensions==1.0.0 +pytest==8.4.2 +pytest-cov==6.3.0 +pytest-mock==3.15.0 +pylint==3.3.8 +black==25.1.0 +mypy==1.17.1 +mypy-extensions==1.1.0 +urllib3==2.5.0 +cryptography==45.0.7 +jsonschema==4.25.1 +PyJWT==2.10.1 +requests==2.32.5 +boto3==1.40.25 +confluent_kafka +psycopg2-binary==2.9.10 \ No newline at end of file diff --git a/src/event_gate_lambda.py b/src/event_gate_lambda.py index b5186c7..62760ca 100644 --- a/src/event_gate_lambda.py +++ b/src/event_gate_lambda.py @@ -1,5 +1,5 @@ # -# Copyright 2024 ABSA Group Limited +# Copyright 2025 ABSA Group Limited # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ import logging import os import sys +from typing import Any, Dict + import urllib3 import boto3 @@ -36,18 +38,15 @@ _CONF_DIR = CONF_DIR _INVALID_CONF_ENV = INVALID_CONF_ENV -sys.path.append(os.path.join(os.path.dirname(__file__))) - -import writer_eventbridge -import writer_kafka -import writer_postgres +from . import writer_eventbridge, writer_kafka, writer_postgres urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) logger = logging.getLogger(__name__) log_level = os.environ.get("LOG_LEVEL", "INFO") logger.setLevel(log_level) -logger.addHandler(logging.StreamHandler()) +if not logger.handlers: + logger.addHandler(logging.StreamHandler()) logger.debug("Initialized LOGGER") logger.debug(f"Using CONF_DIR={_CONF_DIR}") if _INVALID_CONF_ENV: @@ -55,49 +54,45 @@ f"CONF_DIR env var set to non-existent path: {_INVALID_CONF_ENV}; fell back to {_CONF_DIR}" ) -with open(os.path.join(_CONF_DIR, "api.yaml"), "r") as file: +# Resolve project root (parent directory of this file's directory) +_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +_CONF_DIR = os.path.join(_PROJECT_ROOT, "conf") + +with open(os.path.join(_CONF_DIR, "api.yaml"), "r", encoding="utf-8") as file: API = file.read() logger.debug("Loaded API definition") -TOPICS = {} -with open(os.path.join(_CONF_DIR, "topic_runs.json"), "r") as file: +TOPICS: Dict[str, Dict[str, Any]] = {} +with open(os.path.join(_CONF_DIR, "topic_runs.json"), "r", encoding="utf-8") as file: TOPICS["public.cps.za.runs"] = json.load(file) -with open(os.path.join(_CONF_DIR, "topic_dlchange.json"), "r") as file: +with open(os.path.join(_CONF_DIR, "topic_dlchange.json"), "r", encoding="utf-8") as file: TOPICS["public.cps.za.dlchange"] = json.load(file) -with open(os.path.join(_CONF_DIR, "topic_test.json"), "r") as file: +with open(os.path.join(_CONF_DIR, "topic_test.json"), "r", encoding="utf-8") as file: TOPICS["public.cps.za.test"] = json.load(file) logger.debug("Loaded TOPICS") -with open(os.path.join(_CONF_DIR, "config.json"), "r") as file: +with open(os.path.join(_CONF_DIR, "config.json"), "r", encoding="utf-8") as file: CONFIG = json.load(file) logger.debug("Loaded main CONFIG") -aws_s3 = boto3.Session().resource("s3", verify=False) +aws_s3 = boto3.Session().resource("s3", verify=False) # nosec Boto verify disabled intentionally logger.debug("Initialized AWS S3 Client") if CONFIG["access_config"].startswith("s3://"): name_parts = CONFIG["access_config"].split("/") - bucket_name = name_parts[2] - bucket_object = "/".join(name_parts[3:]) - ACCESS = json.loads( - aws_s3.Bucket(bucket_name) - .Object(bucket_object) - .get()["Body"] - .read() - .decode("utf-8") - ) + BUCKET_NAME = name_parts[2] + BUCKET_OBJECT_KEY = "/".join(name_parts[3:]) + ACCESS = json.loads(aws_s3.Bucket(BUCKET_NAME).Object(BUCKET_OBJECT_KEY).get()["Body"].read().decode("utf-8")) else: - with open(CONFIG["access_config"], "r") as file: + with open(CONFIG["access_config"], "r", encoding="utf-8") as file: ACCESS = json.load(file) logger.debug("Loaded ACCESS definitions") TOKEN_PROVIDER_URL = CONFIG["token_provider_url"] -token_public_key_encoded = requests.get( - CONFIG["token_public_key_url"], verify=False -).json()["key"] -TOKEN_PUBLIC_KEY = serialization.load_der_public_key( - base64.b64decode(token_public_key_encoded) -) +# Add timeout to avoid hanging requests +response_json = requests.get(CONFIG["token_public_key_url"], verify=False, timeout=5).json() # nosec external +token_public_key_encoded = response_json["key"] +TOKEN_PUBLIC_KEY: Any = serialization.load_der_public_key(base64.b64decode(token_public_key_encoded)) logger.debug("Loaded TOKEN_PUBLIC_KEY") writer_eventbridge.init(logger, CONFIG) @@ -105,7 +100,16 @@ writer_postgres.init(logger) -def _error_response(status, err_type, message): +def _error_response(status: int, err_type: str, message: str) -> Dict[str, Any]: + """Build a standardized JSON error response body. + + Args: + status: HTTP status code. + err_type: A short error classifier (e.g. 'auth', 'validation'). + message: Human readable error description. + Returns: + A dictionary compatible with API Gateway Lambda Proxy integration. + """ return { "statusCode": status, "headers": {"Content-Type": "application/json"}, @@ -119,59 +123,69 @@ def _error_response(status, err_type, message): } -def get_api(): +def get_api() -> Dict[str, Any]: + """Return the OpenAPI specification text.""" return {"statusCode": 200, "body": API} -def get_token(): +def get_token() -> Dict[str, Any]: + """Return 303 redirect to token provider endpoint.""" logger.debug("Handling GET Token") return {"statusCode": 303, "headers": {"Location": TOKEN_PROVIDER_URL}} -def get_topics(): +def get_topics() -> Dict[str, Any]: + """Return list of available topic names.""" logger.debug("Handling GET Topics") return { "statusCode": 200, "headers": {"Content-Type": "application/json"}, - "body": json.dumps([topicName for topicName in TOPICS]), + "body": json.dumps(list(TOPICS)), } -def get_topic_schema(topicName): - logger.debug(f"Handling GET TopicSchema({topicName})") - if topicName not in TOPICS: - return _error_response(404, "topic", f"Topic '{topicName}' not found") +def get_topic_schema(topic_name: str) -> Dict[str, Any]: + """Return the JSON schema for a specific topic. + + Args: + topic_name: The topic whose schema is requested. + """ + logger.debug("Handling GET TopicSchema(%s)", topic_name) + if topic_name not in TOPICS: + return _error_response(404, "topic", f"Topic '{topic_name}' not found") + + return {"statusCode": 200, "headers": {"Content-Type": "application/json"}, "body": json.dumps(TOPICS[topic_name])} - return { - "statusCode": 200, - "headers": {"Content-Type": "application/json"}, - "body": json.dumps(TOPICS[topicName]), - } +def post_topic_message(topic_name: str, topic_message: Dict[str, Any], token_encoded: str) -> Dict[str, Any]: + """Validate auth and schema; dispatch message to all writers. -def post_topic_message(topicName, topicMessage, tokenEncoded): - logger.debug(f"Handling POST {topicName}") + Args: + topic_name: Target topic name. + topic_message: JSON message payload. + token_encoded: Encoded bearer JWT token string. + """ + logger.debug("Handling POST %s", topic_name) try: - token = jwt.decode(tokenEncoded, TOKEN_PUBLIC_KEY, algorithms=["RS256"]) - except Exception: + token = jwt.decode(token_encoded, TOKEN_PUBLIC_KEY, algorithms=["RS256"]) # type: ignore[arg-type] + except jwt.PyJWTError: # type: ignore[attr-defined] return _error_response(401, "auth", "Invalid or missing token") - if topicName not in TOPICS: - return _error_response(404, "topic", f"Topic '{topicName}' not found") + if topic_name not in TOPICS: + return _error_response(404, "topic", f"Topic '{topic_name}' not found") - user = token["sub"] - if topicName not in ACCESS or user not in ACCESS[topicName]: + user = token.get("sub") + if topic_name not in ACCESS or user not in ACCESS[topic_name]: # type: ignore[index] return _error_response(403, "auth", "User not authorized for topic") try: - validate(instance=topicMessage, schema=TOPICS[topicName]) - except ValidationError as e: - return _error_response(400, "validation", e.message) + validate(instance=topic_message, schema=TOPICS[topic_name]) + except ValidationError as exc: + return _error_response(400, "validation", exc.message) - # Run all writers independently (avoid short-circuit so failures in one don't skip others) - kafka_ok, kafka_err = writer_kafka.write(topicName, topicMessage) - eventbridge_ok, eventbridge_err = writer_eventbridge.write(topicName, topicMessage) - postgres_ok, postgres_err = writer_postgres.write(topicName, topicMessage) + kafka_ok, kafka_err = writer_kafka.write(topic_name, topic_message) + eventbridge_ok, eventbridge_err = writer_eventbridge.write(topic_name, topic_message) + postgres_ok, postgres_err = writer_postgres.write(topic_name, topic_message) errors = [] if not kafka_ok: @@ -195,39 +209,46 @@ def post_topic_message(topicName, topicMessage, tokenEncoded): } -def extract_token(eventHeaders): - # Initial implementation used bearer header directly - if "bearer" in eventHeaders: - return eventHeaders["bearer"] +def extract_token(event_headers: Dict[str, str]) -> str: + """Extract bearer token from headers. - if "Authorization" in eventHeaders and eventHeaders["Authorization"].startswith( - "Bearer " - ): - return eventHeaders["Authorization"][len("Bearer ") :] + Supports lowercase custom 'bearer' header or standard 'Authorization: Bearer '. + Returns empty string if not present (caller handles auth error response). + """ + if "bearer" in event_headers: + return event_headers["bearer"] + auth_header = event_headers.get("Authorization", "") + if auth_header.startswith("Bearer "): + return auth_header[len("Bearer ") :] + return "" - return "" # Will result in 401 +def lambda_handler(event: Dict[str, Any], context: Any): # pylint: disable=unused-argument,too-many-return-statements + """AWS Lambda entry point. -def lambda_handler(event, context): + Dispatches based on API Gateway proxy 'resource' and 'httpMethod'. + """ try: - if event["resource"].lower() == "/api": + resource = event.get("resource", "").lower() + if resource == "/api": return get_api() - if event["resource"].lower() == "/token": + if resource == "/token": return get_token() - if event["resource"].lower() == "/topics": + if resource == "/topics": return get_topics() - if event["resource"].lower() == "/topics/{topic_name}": - if event["httpMethod"] == "GET": + if resource == "/topics/{topic_name}": + method = event.get("httpMethod") + if method == "GET": return get_topic_schema(event["pathParameters"]["topic_name"].lower()) - if event["httpMethod"] == "POST": + if method == "POST": return post_topic_message( event["pathParameters"]["topic_name"].lower(), json.loads(event["body"]), - extract_token(event["headers"]), + extract_token(event.get("headers", {})), ) - if event["resource"].lower() == "/terminate": - sys.exit("TERMINATING") + if resource == "/terminate": + sys.exit("TERMINATING") # pragma: no cover - deliberate termination path return _error_response(404, "route", "Resource not found") - except Exception as e: - logger.error(f"Unexpected exception: {e}") + except Exception as exc: # pylint: disable=broad-exception-caught + logger.error("Unexpected exception: %s", exc) return _error_response(500, "internal", "Unexpected server error") diff --git a/src/requirements.txt b/src/requirements.txt deleted file mode 100644 index 89ddd0e..0000000 --- a/src/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -urllib3 -cryptography -jsonschema -PyJWT -requests -boto3 -confluent_kafka -psycopg2 \ No newline at end of file diff --git a/src/writer_eventbridge.py b/src/writer_eventbridge.py index 6ef8292..cb356a6 100644 --- a/src/writer_eventbridge.py +++ b/src/writer_eventbridge.py @@ -1,44 +1,78 @@ +"""EventBridge writer module. + +Provides initialization and write functionality for publishing events to AWS EventBridge. +""" + import json +import logging +from typing import Any, Dict, Optional, Tuple import boto3 +from botocore.exceptions import BotoCoreError, ClientError + +STATE: Dict[str, Any] = {"logger": logging.getLogger(__name__), "event_bus_arn": "", "client": None} + +def init(logger: logging.Logger, config: Dict[str, Any]) -> None: + """Initialize the EventBridge writer. -def init(logger, CONFIG): - global _logger - global EVENT_BUS_ARN - global aws_eventbridge + Args: + logger: Shared application logger. + config: Configuration dictionary (expects optional 'event_bus_arn'). + """ + STATE["logger"] = logger + STATE["client"] = boto3.client("events") + STATE["event_bus_arn"] = config.get("event_bus_arn", "") + STATE["logger"].debug("Initialized EVENTBRIDGE writer") - _logger = logger - aws_eventbridge = boto3.client("events") - EVENT_BUS_ARN = CONFIG["event_bus_arn"] if "event_bus_arn" in CONFIG else "" - _logger.debug("Initialized EVENTBRIDGE writer") +def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]: + """Publish a message to EventBridge. + Args: + topic_name: Source topic name used as event Source. + message: JSON-serializable payload. + Returns: + Tuple of success flag and optional error message. + """ + logger = STATE["logger"] + event_bus_arn = STATE["event_bus_arn"] + client = STATE["client"] -def write(topicName, message): - if not EVENT_BUS_ARN: - _logger.debug("No EventBus Arn - skipping") + if not event_bus_arn: + logger.debug("No EventBus Arn - skipping") + return True, None + if client is None: # defensive + logger.debug("EventBridge client not initialized - skipping") return True, None try: - _logger.debug(f"Sending to eventBridge {topicName}") - response = aws_eventbridge.put_events( + logger.debug("Sending to eventBridge %s", topic_name) + response = client.put_events( Entries=[ { - "Source": topicName, + "Source": topic_name, "DetailType": "JSON", "Detail": json.dumps(message), - "EventBusName": EVENT_BUS_ARN, + "EventBusName": event_bus_arn, } ] ) - if response["FailedEntryCount"] > 0: + if response.get("FailedEntryCount", 0) > 0: msg = str(response) - _logger.error(msg) + logger.error(msg) return False, msg - except Exception as e: - err_msg = f"The EventBridge writer failed with unknown error: {str(e)}" - _logger.error(err_msg) + except (BotoCoreError, ClientError) as err: + err_msg = f"The EventBridge writer failed: {err}" # specific AWS error + logger.error(err_msg) + return False, err_msg + except Exception as err: # pragma: no cover - unexpected failure path + err_msg = ( + f"The EventBridge writer failed with unknown error: {err}" + if not isinstance(err, (BotoCoreError, ClientError)) + else str(err) + ) + logger.error(err_msg) return False, err_msg return True, None diff --git a/src/writer_kafka.py b/src/writer_kafka.py index a6b6a4b..e411171 100644 --- a/src/writer_kafka.py +++ b/src/writer_kafka.py @@ -1,41 +1,70 @@ +"""Kafka writer module. + +Initializes a Confluent Kafka Producer and publishes messages for a topic. +""" + import json +import logging +from typing import Any, Dict, Optional, Tuple -import boto3 from confluent_kafka import Producer +STATE: Dict[str, Any] = {"logger": logging.getLogger(__name__), "producer": None} + +# Module globals for typing +_logger: logging.Logger = logging.getLogger(__name__) +kafka_producer: Optional[Producer] = None -def init(logger, CONFIG): - global _logger - global kafka_producer +def init(logger: logging.Logger, config: Dict[str, Any]) -> None: + """Initialize Kafka producer. - _logger = logger + Args: + logger: Shared application logger. + config: Configuration dictionary (expects 'kafka_bootstrap_server' plus optional SASL/SSL fields). + """ + STATE["logger"] = logger - producer_config = {"bootstrap.servers": CONFIG["kafka_bootstrap_server"]} - if "kafka_sasl_kerberos_principal" in CONFIG and "kafka_ssl_key_path" in CONFIG: + producer_config: Dict[str, Any] = {"bootstrap.servers": config["kafka_bootstrap_server"]} + if "kafka_sasl_kerberos_principal" in config and "kafka_ssl_key_path" in config: producer_config.update( { "security.protocol": "SASL_SSL", "sasl.mechanism": "GSSAPI", "sasl.kerberos.service.name": "kafka", - "sasl.kerberos.keytab": CONFIG["kafka_sasl_kerberos_keytab_path"], - "sasl.kerberos.principal": CONFIG["kafka_sasl_kerberos_principal"], - "ssl.ca.location": CONFIG["kafka_ssl_ca_path"], - "ssl.certificate.location": CONFIG["kafka_ssl_cert_path"], - "ssl.key.location": CONFIG["kafka_ssl_key_path"], - "ssl.key.password": CONFIG["kafka_ssl_key_password"], + "sasl.kerberos.keytab": config["kafka_sasl_kerberos_keytab_path"], + "sasl.kerberos.principal": config["kafka_sasl_kerberos_principal"], + "ssl.ca.location": config["kafka_ssl_ca_path"], + "ssl.certificate.location": config["kafka_ssl_cert_path"], + "ssl.key.location": config["kafka_ssl_key_path"], + "ssl.key.password": config["kafka_ssl_key_password"], } ) - _logger.debug("producer will use SASL_SSL") - kafka_producer = Producer(producer_config) - _logger.debug("Initialized KAFKA writer") + STATE["logger"].debug("Kafka producer will use SASL_SSL") + + STATE["producer"] = Producer(producer_config) + STATE["logger"].debug("Initialized KAFKA writer") + + +def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]: + """Publish a message to Kafka. + Args: + topic_name: Kafka topic to publish to. + message: JSON-serializable payload. + Returns: + Tuple[success flag, optional error message]. + """ + logger = STATE["logger"] + producer: Optional[Producer] = STATE.get("producer") # type: ignore[assignment] -def write(topicName, message): try: - _logger.debug(f"Sending to kafka {topicName}") - errors = [] + if producer is None: + logger.debug("Kafka producer not initialized - skipping") + return True, None + _logger.debug(f"Sending to kafka {topic_name}") + errors: list[Any] = [] kafka_producer.produce( - topicName, + topic_name, key="", value=json.dumps(message).encode("utf-8"), callback=lambda err, msg: ( diff --git a/src/writer_postgres.py b/src/writer_postgres.py index 03b9fae..a9138a2 100644 --- a/src/writer_postgres.py +++ b/src/writer_postgres.py @@ -1,18 +1,33 @@ +"""Postgres writer module. + +Handles optional initialization via AWS Secrets Manager and topic-based inserts into Postgres. +""" + import json import os +import logging +from typing import Any, Dict, Tuple, Optional import boto3 -from botocore.exceptions import ClientError try: import psycopg2 # noqa: F401 except ImportError: # pragma: no cover - environment without psycopg2 - psycopg2 = None + psycopg2 = None # type: ignore + +# Module level globals for typing +_logger: logging.Logger = logging.getLogger(__name__) +POSTGRES: Dict[str, Any] = {"database": ""} -def init(logger): - global _logger - global POSTGRES +def init(logger: logging.Logger) -> None: + """Initialize Postgres credentials either from AWS Secrets Manager or fallback empty config. + + Args: + logger: Shared application logger. + """ + global _logger # pylint: disable=global-statement + global POSTGRES # pylint: disable=global-statement _logger = logger @@ -33,8 +48,15 @@ def init(logger): _logger.debug("Initialized POSTGRES writer") -def postgres_edla_write(cursor, table, message): - _logger.debug(f"Sending to Postgres - {table}") +def postgres_edla_write(cursor, table: str, message: Dict[str, Any]) -> None: + """Insert a dlchange style event row. + + Args: + cursor: Database cursor. + table: Target table name. + message: Event payload. + """ + _logger.debug("Sending to Postgres - %s", table) cursor.execute( f""" INSERT INTO {table} @@ -92,8 +114,16 @@ def postgres_edla_write(cursor, table, message): ) -def postgres_run_write(cursor, table_runs, table_jobs, message): - _logger.debug(f"Sending to Postgres - {table_runs} and {table_jobs}") +def postgres_run_write(cursor, table_runs: str, table_jobs: str, message: Dict[str, Any]) -> None: + """Insert a run event row plus related job rows. + + Args: + cursor: Database cursor. + table_runs: Runs table name. + table_jobs: Jobs table name. + message: Event payload (includes jobs array). + """ + _logger.debug("Sending to Postgres - %s and %s", table_runs, table_jobs) cursor.execute( f""" INSERT INTO {table_runs} @@ -169,8 +199,15 @@ def postgres_run_write(cursor, table_runs, table_jobs, message): ) -def postgres_test_write(cursor, table, message): - _logger.debug(f"Sending to Postgres - {table}") +def postgres_test_write(cursor, table: str, message: Dict[str, Any]) -> None: + """Insert a test topic row. + + Args: + cursor: Database cursor. + table: Target table name. + message: Event payload. + """ + _logger.debug("Sending to Postgres - %s", table) cursor.execute( f""" INSERT INTO {table} @@ -206,38 +243,46 @@ def postgres_test_write(cursor, table, message): ) -def write(topicName, message): +def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]: + """Dispatch insertion for a topic into the correct Postgres table(s). + + Skips if Postgres not configured or psycopg2 unavailable. Returns success flag and optional error. + + Args: + topic_name: Incoming topic identifier. + message: Event payload. + """ try: - if not POSTGRES["database"]: + if not POSTGRES.get("database"): _logger.debug("No Postgres - skipping") return True, None - if psycopg2 is None: + if psycopg2 is None: # type: ignore _logger.debug("psycopg2 not available - skipping actual Postgres write") return True, None - with psycopg2.connect( + with psycopg2.connect( # type: ignore[attr-defined] database=POSTGRES["database"], host=POSTGRES["host"], user=POSTGRES["user"], password=POSTGRES["password"], port=POSTGRES["port"], - ) as connection: - with connection.cursor() as cursor: - if topicName == "public.cps.za.dlchange": + ) as connection: # type: ignore[call-arg] + with connection.cursor() as cursor: # type: ignore + if topic_name == "public.cps.za.dlchange": postgres_edla_write(cursor, "public_cps_za_dlchange", message) - elif topicName == "public.cps.za.runs": + elif topic_name == "public.cps.za.runs": postgres_run_write( cursor, "public_cps_za_runs", "public_cps_za_runs_jobs", message ) - elif topicName == "public.cps.za.test": + elif topic_name == "public.cps.za.test": postgres_test_write(cursor, "public_cps_za_test", message) else: - msg = f"unknown topic for postgres {topicName}" + msg = f"unknown topic for postgres {topic_name}" _logger.error(msg) return False, msg - connection.commit() - except Exception as e: + connection.commit() # type: ignore + except Exception as e: # pragma: no cover - defensive (still tested though) err_msg = f"The Postgres writer with failed unknown error: {str(e)}" _logger.error(err_msg) return False, err_msg diff --git a/tests/test_conf_validation.py b/tests/test_conf_validation.py index a1ead4e..df153b4 100644 --- a/tests/test_conf_validation.py +++ b/tests/test_conf_validation.py @@ -1,7 +1,22 @@ import os +# +# Copyright 2025 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import json -import unittest from glob import glob +import pytest CONF_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "conf") @@ -17,50 +32,41 @@ def load_json(path): with open(path, "r") as f: return json.load(f) -class TestConfigFiles(unittest.TestCase): - def test_config_files_have_required_keys(self): - # Pick up any config*.json (excluding access and topics) - config_files = [ - f for f in glob(os.path.join(CONF_DIR, "config*.json")) - if os.path.basename(f) not in {"access.json"} - ] - self.assertTrue(config_files, "No config files found matching pattern config*.json") - for path in config_files: - with self.subTest(config=path): - data = load_json(path) - missing = REQUIRED_CONFIG_KEYS - data.keys() - self.assertFalse(missing, f"Config {path} missing keys: {missing}") +@pytest.fixture(scope="module") +def config_files(): + files = [ + f for f in glob(os.path.join(CONF_DIR, "config*.json")) + if os.path.basename(f) not in {"access.json"} + ] + assert files, "No config files found matching pattern config*.json" + return files - def test_access_json_structure(self): - path = os.path.join(CONF_DIR, "access.json") +@pytest.mark.parametrize("key", sorted(REQUIRED_CONFIG_KEYS)) +def test_config_files_have_required_keys(config_files, key): + for path in config_files: data = load_json(path) - self.assertIsInstance(data, dict, "access.json must contain an object mapping topic -> list[user]") - for topic, users in data.items(): - with self.subTest(topic=topic): - self.assertIsInstance(topic, str) - self.assertIsInstance(users, list, f"Topic {topic} value must be a list of users") - self.assertTrue(all(isinstance(u, str) for u in users), f"All users for topic {topic} must be strings") + assert key in data, f"Config {path} missing key: {key}" - def test_topic_json_schemas_basic(self): - topic_files = glob(os.path.join(CONF_DIR, "topic_*.json")) - self.assertTrue(topic_files, "No topic_*.json files found") - for path in topic_files: - with self.subTest(topic_file=path): - schema = load_json(path) - # Basic required structure - self.assertEqual(schema.get("type"), "object", "Schema root 'type' must be 'object'") - props = schema.get("properties") - self.assertIsInstance(props, dict, "Schema must define 'properties' object") - required = schema.get("required") - self.assertIsInstance(required, list, "Schema must define 'required' list") - # Each required field is present in properties - missing_props = [r for r in required if r not in props] - self.assertFalse(missing_props, f"Required fields missing in properties: {missing_props}") - # Ensure property entries themselves have at least a type - for name, definition in props.items(): - self.assertIsInstance(definition, dict, f"Property {name} definition must be an object") - self.assertIn("type", definition, f"Property {name} must specify a 'type'") - -if __name__ == "__main__": # pragma: no cover - unittest.main() +def test_access_json_structure(): + path = os.path.join(CONF_DIR, "access.json") + data = load_json(path) + assert isinstance(data, dict), "access.json must contain an object mapping topic -> list[user]" + for topic, users in data.items(): + assert isinstance(topic, str) + assert isinstance(users, list), f"Topic {topic} value must be a list of users" + assert all(isinstance(u, str) for u in users), f"All users for topic {topic} must be strings" +@pytest.mark.parametrize("topic_file", glob(os.path.join(CONF_DIR, "topic_*.json"))) +def test_topic_json_schemas_basic(topic_file): + assert topic_file, "No topic_*.json files found" + schema = load_json(topic_file) + assert schema.get("type") == "object", "Schema root 'type' must be 'object'" + props = schema.get("properties") + assert isinstance(props, dict), "Schema must define 'properties' object" + required = schema.get("required") + assert isinstance(required, list), "Schema must define 'required' list" + missing_props = [r for r in required if r not in props] + assert not missing_props, f"Required fields missing in properties: {missing_props}" + for name, definition in props.items(): + assert isinstance(definition, dict), f"Property {name} definition must be an object" + assert "type" in definition, f"Property {name} must specify a 'type'" diff --git a/tests/test_event_gate_lambda.py b/tests/test_event_gate_lambda.py index 9a60918..67f12da 100644 --- a/tests/test_event_gate_lambda.py +++ b/tests/test_event_gate_lambda.py @@ -1,9 +1,24 @@ +# +# Copyright 2025 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import base64 import json -import unittest import importlib import sys import types +import pytest from unittest.mock import patch, MagicMock # Inject dummy confluent_kafka if not installed so patching works @@ -21,17 +36,23 @@ def flush(self): dummy_ck.Producer = DummyProducer sys.modules['confluent_kafka'] = dummy_ck -# Inject dummy psycopg2 (not needed but prevents optional import noise) +# Inject dummy psycopg2 (optional dependency) if 'psycopg2' not in sys.modules: - dummy_pg = types.ModuleType('psycopg2') - sys.modules['psycopg2'] = dummy_pg + sys.modules['psycopg2'] = types.ModuleType('psycopg2') -def load_event_gate_module(): - # Start patches before import to intercept side effects - mock_requests_get = patch('requests.get').start() +@pytest.fixture(scope="module") +def event_gate_module(): + started_patches = [] + + def start_patch(target): + p = patch(target) + started_patches.append(p) + return p.start() + + mock_requests_get = start_patch('requests.get') mock_requests_get.return_value.json.return_value = {"key": base64.b64encode(b'dummy_der').decode('utf-8')} - mock_load_key = patch('cryptography.hazmat.primitives.serialization.load_der_public_key').start() + mock_load_key = start_patch('cryptography.hazmat.primitives.serialization.load_der_public_key') mock_load_key.return_value = object() # Mock S3 access_config retrieval @@ -51,32 +72,30 @@ def Object(self, key): class MockS3Resource: def Bucket(self, name): return MockS3Bucket() - mock_session = patch('boto3.Session').start() + + mock_session = start_patch('boto3.Session') mock_session.return_value.resource.return_value = MockS3Resource() # Mock EventBridge client - mock_boto_client = patch('boto3.client').start() + mock_boto_client = start_patch('boto3.client') mock_events_client = MagicMock() mock_events_client.put_events.return_value = {"FailedEntryCount": 0} mock_boto_client.return_value = mock_events_client # Allow kafka producer patching (already stubbed) but still patch to inspect if needed - patch('confluent_kafka.Producer').start() + start_patch('confluent_kafka.Producer') module = importlib.import_module('src.event_gate_lambda') - return module -class TestEventGateLambda(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.event_gate_lambda = load_event_gate_module() + yield module - @classmethod - def tearDownClass(cls): - patch.stopall() + for p in started_patches: + p.stop() + patch.stopall() - # Helper to construct API Gateway like event - def make_event(self, resource, method='GET', body=None, topic=None, headers=None): +@pytest.fixture +def make_event(): + def _make(resource, method='GET', body=None, topic=None, headers=None): return { 'resource': resource, 'httpMethod': method, @@ -84,137 +103,130 @@ def make_event(self, resource, method='GET', body=None, topic=None, headers=None 'pathParameters': {'topic_name': topic} if topic else {}, 'body': json.dumps(body) if isinstance(body, dict) else body } - - def valid_payload(self): - return {"event_id":"e1","tenant_id":"t1","source_app":"app","environment":"dev","timestamp":123} - - # --- GET flows --- - def test_get_topics(self): - event = self.make_event('/topics') - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 200) + return _make + +@pytest.fixture +def valid_payload(): + return {"event_id":"e1","tenant_id":"t1","source_app":"app","environment":"dev","timestamp":123} + +# --- GET flows --- + +def test_get_topics(event_gate_module, make_event): + event = make_event('/topics') + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 200 + body = json.loads(resp['body']) + assert 'public.cps.za.test' in body + +def test_get_topic_schema_found(event_gate_module, make_event): + event = make_event('/topics/{topic_name}', method='GET', topic='public.cps.za.test') + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 200 + schema = json.loads(resp['body']) + assert schema['type'] == 'object' + +def test_get_topic_schema_not_found(event_gate_module, make_event): + event = make_event('/topics/{topic_name}', method='GET', topic='no.such.topic') + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 404 + +# --- POST auth / validation failures --- + +def test_post_missing_token(event_gate_module, make_event, valid_payload): + event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={}) + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 401 + body = json.loads(resp['body']) + assert not body['success'] + assert body['errors'][0]['type'] == 'auth' + +def test_post_unauthorized_user(event_gate_module, make_event, valid_payload): + with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'NotAllowed'}, create=True): + event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'Authorization':'Bearer token'}) + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 403 + body = json.loads(resp['body']) + assert body['errors'][0]['type'] == 'auth' + +def test_post_schema_validation_error(event_gate_module, make_event): + payload = {"event_id":"e1","tenant_id":"t1","source_app":"app","environment":"dev"} # missing timestamp + with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True): + event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=payload, headers={'Authorization':'Bearer token'}) + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 400 + body = json.loads(resp['body']) + assert body['errors'][0]['type'] == 'validation' + +# --- POST success & failure aggregation --- + +def test_post_success_all_writers(event_gate_module, make_event, valid_payload): + with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ + patch('src.event_gate_lambda.writer_kafka.write', return_value=(True, None)), \ + patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(True, None)), \ + patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): + event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'Authorization':'Bearer token'}) + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 202 + body = json.loads(resp['body']) + assert body['success'] + assert body['statusCode'] == 202 + +def test_post_single_writer_failure(event_gate_module, make_event, valid_payload): + with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ + patch('src.event_gate_lambda.writer_kafka.write', return_value=(False, 'Kafka boom')), \ + patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(True, None)), \ + patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): + event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'Authorization':'Bearer token'}) + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 500 body = json.loads(resp['body']) - self.assertIn('public.cps.za.test', body) - - def test_get_topic_schema_found(self): - event = self.make_event('/topics/{topic_name}', method='GET', topic='public.cps.za.test') - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 200) - schema = json.loads(resp['body']) - self.assertEqual(schema['type'], 'object') - - def test_get_topic_schema_not_found(self): - event = self.make_event('/topics/{topic_name}', method='GET', topic='no.such.topic') - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 404) - - # --- POST auth / validation failures --- - def test_post_missing_token(self): - payload = self.valid_payload() - event = self.make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=payload, headers={}) - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 401) + assert not body['success'] + assert len(body['errors']) == 1 + assert body['errors'][0]['type'] == 'kafka' + +def test_post_multiple_writer_failures(event_gate_module, make_event, valid_payload): + with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ + patch('src.event_gate_lambda.writer_kafka.write', return_value=(False, 'Kafka A')), \ + patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(False, 'EB B')), \ + patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): + event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'Authorization':'Bearer token'}) + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 500 body = json.loads(resp['body']) - self.assertFalse(body['success']) - self.assertEqual(body['errors'][0]['type'], 'auth') - - def test_post_unauthorized_user(self): - payload = self.valid_payload() - with patch.object(self.event_gate_lambda.jwt, 'decode', return_value={'sub': 'NotAllowed'}, create=True): - event = self.make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=payload, headers={'Authorization':'Bearer token'}) - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 403) - body = json.loads(resp['body']) - self.assertEqual(body['errors'][0]['type'], 'auth') - - def test_post_schema_validation_error(self): - payload = {"event_id":"e1","tenant_id":"t1","source_app":"app","environment":"dev"} # missing timestamp - with patch.object(self.event_gate_lambda.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True): - event = self.make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=payload, headers={'Authorization':'Bearer token'}) - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 400) - body = json.loads(resp['body']) - self.assertEqual(body['errors'][0]['type'], 'validation') - - # --- POST success & failure aggregation --- - def test_post_success_all_writers(self): - payload = self.valid_payload() - with patch.object(self.event_gate_lambda.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ - patch('src.event_gate_lambda.writer_kafka.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): - event = self.make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=payload, headers={'Authorization':'Bearer token'}) - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 202) - body = json.loads(resp['body']) - self.assertTrue(body['success']) - self.assertEqual(body['statusCode'], 202) - - def test_post_single_writer_failure(self): - payload = self.valid_payload() - with patch.object(self.event_gate_lambda.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ - patch('src.event_gate_lambda.writer_kafka.write', return_value=(False, 'Kafka boom')), \ - patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): - event = self.make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=payload, headers={'Authorization':'Bearer token'}) - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 500) - body = json.loads(resp['body']) - self.assertFalse(body['success']) - self.assertEqual(len(body['errors']), 1) - self.assertEqual(body['errors'][0]['type'], 'kafka') - - def test_post_multiple_writer_failures(self): - payload = self.valid_payload() - with patch.object(self.event_gate_lambda.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ - patch('src.event_gate_lambda.writer_kafka.write', return_value=(False, 'Kafka A')), \ - patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(False, 'EB B')), \ - patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): - event = self.make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=payload, headers={'Authorization':'Bearer token'}) - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 500) - body = json.loads(resp['body']) - self.assertEqual(len(body['errors']), 2) - types = sorted(e['type'] for e in body['errors']) - self.assertEqual(types, ['eventbridge', 'kafka']) - - def test_token_extraction_lowercase_bearer_header(self): - payload = self.valid_payload() - with patch.object(self.event_gate_lambda.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ - patch('src.event_gate_lambda.writer_kafka.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): - event = self.make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=payload, headers={'bearer':'token'}) - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 202) - - def test_unknown_resource(self): - event = self.make_event('/unknown') - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 404) + assert sorted(e['type'] for e in body['errors']) == ['eventbridge', 'kafka'] + +def test_token_extraction_lowercase_bearer_header(event_gate_module, make_event, valid_payload): + with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ + patch('src.event_gate_lambda.writer_kafka.write', return_value=(True, None)), \ + patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(True, None)), \ + patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): + event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'bearer':'token'}) + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 202 + +def test_unknown_resource(event_gate_module, make_event): + event = make_event('/unknown') + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 404 + body = json.loads(resp['body']) + assert body['errors'][0]['type'] == 'route' + +def test_get_api_endpoint(event_gate_module, make_event): + event = make_event('/api') + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 200 + assert 'openapi' in resp['body'].lower() + +def test_get_token_endpoint(event_gate_module, make_event): + event = make_event('/token') + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 303 + assert 'Location' in resp['headers'] + +def test_internal_error_path(event_gate_module, make_event): + with patch('src.event_gate_lambda.get_topics', side_effect=RuntimeError('boom')): + event = make_event('/topics') + resp = event_gate_module.lambda_handler(event, None) + assert resp['statusCode'] == 500 body = json.loads(resp['body']) - self.assertEqual(body['errors'][0]['type'], 'route') - - def test_get_api_endpoint(self): - event = self.make_event('/api') - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 200) - self.assertIn('openapi', resp['body'].lower()) # crude check - - def test_get_token_endpoint(self): - event = self.make_event('/token') - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 303) - self.assertIn('Location', resp['headers']) - - def test_internal_error_path(self): - # Force exception by patching get_topics to raise - with patch('src.event_gate_lambda.get_topics', side_effect=RuntimeError('boom')): - event = self.make_event('/topics') - resp = self.event_gate_lambda.lambda_handler(event, None) - self.assertEqual(resp['statusCode'], 500) - body = json.loads(resp['body']) - self.assertEqual(body['errors'][0]['type'], 'internal') - -if __name__ == '__main__': - unittest.main() + assert body['errors'][0]['type'] == 'internal' diff --git a/tests/test_writer_postgres.py b/tests/test_writer_postgres.py index 84ad7d7..b8b7893 100644 --- a/tests/test_writer_postgres.py +++ b/tests/test_writer_postgres.py @@ -1,9 +1,30 @@ +# +# Copyright 2025 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# import json import logging -import unittest +import os +import types +import pytest +from unittest.mock import patch from src import writer_postgres +@pytest.fixture(scope="module", autouse=True) +def init_module_logger(): + writer_postgres.init(logging.getLogger("test")) class MockCursor: def __init__(self): @@ -11,102 +32,203 @@ def __init__(self): def execute(self, sql, params): self.executions.append((sql.strip(), params)) -class TestWriterPostgres(unittest.TestCase): - @classmethod - def setUpClass(cls): - # Initialize logger and module (will skip DB because no env vars) - writer_postgres.init(logging.getLogger("test")) - - def test_postgres_edla_write_with_optional_fields(self): - cur = MockCursor() - message = { - "event_id": "e1", - "tenant_id": "t1", - "source_app": "app", - "source_app_version": "1.0.0", - "environment": "dev", - "timestamp_event": 111, - "catalog_id": "db.tbl", - "operation": "append", - "location": "s3://bucket/path", - "format": "parquet", - "format_options": {"compression": "snappy"}, - "additional_info": {"foo": "bar"} - } - writer_postgres.postgres_edla_write(cur, "table_a", message) - self.assertEqual(len(cur.executions), 1) - sql, params = cur.executions[0] - # Ensure we inserted 12 params per columns list - self.assertEqual(len(params), 12) - self.assertEqual(params[0], "e1") - self.assertEqual(params[8], "s3://bucket/path") # location - self.assertEqual(params[9], "parquet") - self.assertEqual(json.loads(params[10]), {"compression": "snappy"}) - self.assertEqual(json.loads(params[11]), {"foo": "bar"}) - - def test_postgres_edla_write_missing_optional(self): - cur = MockCursor() - message = { - "event_id": "e2", - "tenant_id": "t2", - "source_app": "app", - "source_app_version": "1.0.1", - "environment": "dev", - "timestamp_event": 222, - "catalog_id": "db.tbl2", - "operation": "overwrite", - "format": "delta" - } - writer_postgres.postgres_edla_write(cur, "table_a", message) - sql, params = cur.executions[0] - # location, format_options, additional_info -> None - self.assertIsNone(params[8]) - self.assertEqual(params[9], "delta") - self.assertIsNone(params[10]) - self.assertIsNone(params[11]) - - def test_postgres_run_write(self): - cur = MockCursor() - message = { - "event_id": "r1", - "job_ref": "job-123", - "tenant_id": "ten", - "source_app": "runapp", - "source_app_version": "2.0.0", - "environment": "dev", - "timestamp_start": 1000, - "timestamp_end": 2000, - "jobs": [ - {"catalog_id": "c1", "status": "succeeded", "timestamp_start": 1100, "timestamp_end": 1200}, - {"catalog_id": "c2", "status": "failed", "timestamp_start": 1300, "timestamp_end": 1400, "message": "err", "additional_info": {"k": "v"}} - ] - } - writer_postgres.postgres_run_write(cur, "runs_table", "jobs_table", message) - # 1 insert for run + 2 inserts for jobs - self.assertEqual(len(cur.executions), 3) - run_sql, run_params = cur.executions[0] - self.assertIn("source_app_version", run_sql) # ensure fixed column name present implicitly - self.assertEqual(run_params[3], "runapp") # source_app param - job2_sql, job2_params = cur.executions[2] - self.assertEqual(job2_params[5], "err") - self.assertEqual(json.loads(job2_params[6]), {"k": "v"}) - - def test_postgres_test_write(self): - cur = MockCursor() - message = { - "event_id": "t1", - "tenant_id": "tenant-x", - "source_app": "test", - "environment": "dev", - "timestamp": 999, - "additional_info": {"a": 1} - } - writer_postgres.postgres_test_write(cur, "table_test", message) - self.assertEqual(len(cur.executions), 1) - sql, params = cur.executions[0] - self.assertEqual(params[0], "t1") - self.assertEqual(params[1], "tenant-x") - self.assertEqual(json.loads(params[5]), {"a": 1}) - -if __name__ == '__main__': - unittest.main() +# --- Insert helpers --- + +def test_postgres_edla_write_with_optional_fields(): + cur = MockCursor() + message = { + "event_id": "e1", + "tenant_id": "t1", + "source_app": "app", + "source_app_version": "1.0.0", + "environment": "dev", + "timestamp_event": 111, + "catalog_id": "db.tbl", + "operation": "append", + "location": "s3://bucket/path", + "format": "parquet", + "format_options": {"compression": "snappy"}, + "additional_info": {"foo": "bar"} + } + writer_postgres.postgres_edla_write(cur, "table_a", message) + assert len(cur.executions) == 1 + _sql, params = cur.executions[0] + assert len(params) == 12 + assert params[0] == "e1" + assert params[8] == "s3://bucket/path" + assert params[9] == "parquet" + assert json.loads(params[10]) == {"compression": "snappy"} + assert json.loads(params[11]) == {"foo": "bar"} + +def test_postgres_edla_write_missing_optional(): + cur = MockCursor() + message = { + "event_id": "e2", + "tenant_id": "t2", + "source_app": "app", + "source_app_version": "1.0.1", + "environment": "dev", + "timestamp_event": 222, + "catalog_id": "db.tbl2", + "operation": "overwrite", + "format": "delta" + } + writer_postgres.postgres_edla_write(cur, "table_a", message) + _sql, params = cur.executions[0] + assert params[8] is None + assert params[9] == "delta" + assert params[10] is None + assert params[11] is None + +def test_postgres_run_write(): + cur = MockCursor() + message = { + "event_id": "r1", + "job_ref": "job-123", + "tenant_id": "ten", + "source_app": "runapp", + "source_app_version": "2.0.0", + "environment": "dev", + "timestamp_start": 1000, + "timestamp_end": 2000, + "jobs": [ + {"catalog_id": "c1", "status": "succeeded", "timestamp_start": 1100, "timestamp_end": 1200}, + {"catalog_id": "c2", "status": "failed", "timestamp_start": 1300, "timestamp_end": 1400, "message": "err", "additional_info": {"k": "v"}} + ] + } + writer_postgres.postgres_run_write(cur, "runs_table", "jobs_table", message) + assert len(cur.executions) == 3 + run_sql, run_params = cur.executions[0] + assert "source_app_version" in run_sql + assert run_params[3] == "runapp" + _job2_sql, job2_params = cur.executions[2] + assert job2_params[5] == "err" + assert json.loads(job2_params[6]) == {"k": "v"} + +def test_postgres_test_write(): + cur = MockCursor() + message = { + "event_id": "t1", + "tenant_id": "tenant-x", + "source_app": "test", + "environment": "dev", + "timestamp": 999, + "additional_info": {"a": 1} + } + writer_postgres.postgres_test_write(cur, "table_test", message) + assert len(cur.executions) == 1 + _sql, params = cur.executions[0] + assert params[0] == "t1" + assert params[1] == "tenant-x" + assert json.loads(params[5]) == {"a": 1} + +# --- write() behavioral paths --- + +@pytest.fixture +def reset_state(monkeypatch): + # Preserve psycopg2 ref + original_psycopg2 = writer_postgres.psycopg2 + writer_postgres.POSTGRES = {"database": ""} + yield + writer_postgres.psycopg2 = original_psycopg2 + writer_postgres.POSTGRES = {"database": ""} + os.environ.pop("POSTGRES_SECRET_NAME", None) + os.environ.pop("POSTGRES_SECRET_REGION", None) + +class DummyCursor: + def __init__(self, store): + self.store = store + def execute(self, sql, params): + self.store.append((sql, params)) + def __enter__(self): + return self + def __exit__(self, exc_type, exc, tb): + return False + +class DummyConnection: + def __init__(self, store): + self.commit_called = False + self.store = store + def cursor(self): + return DummyCursor(self.store) + def commit(self): + self.commit_called = True + def __enter__(self): + return self + def __exit__(self, exc_type, exc, tb): + return False + +class DummyPsycopg: + def __init__(self, store): + self.store = store + def connect(self, **kwargs): + return DummyConnection(self.store) + +def test_write_skips_when_no_database(reset_state): + writer_postgres.POSTGRES = {"database": ""} + ok, err = writer_postgres.write("public.cps.za.test", {}) + assert ok and err is None + +def test_write_skips_when_psycopg2_missing(reset_state, monkeypatch): + writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} + monkeypatch.setattr(writer_postgres, "psycopg2", None) + ok, err = writer_postgres.write("public.cps.za.test", {}) + assert ok and err is None + +def test_write_unknown_topic_returns_false(reset_state, monkeypatch): + store = [] + monkeypatch.setattr(writer_postgres, "psycopg2", DummyPsycopg(store)) + writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} + ok, err = writer_postgres.write("public.cps.za.unknown", {}) + assert not ok and "unknown topic" in err + +def test_write_success_known_topic(reset_state, monkeypatch): + store = [] + monkeypatch.setattr(writer_postgres, "psycopg2", DummyPsycopg(store)) + writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} + message = {"event_id": "id", "tenant_id": "ten", "source_app": "app", "environment": "dev", "timestamp": 123} + ok, err = writer_postgres.write("public.cps.za.test", message) + assert ok and err is None and len(store) == 1 + +def test_write_exception_returns_false(reset_state, monkeypatch): + class FailingPsycopg: + def connect(self, **kwargs): + raise RuntimeError("boom") + monkeypatch.setattr(writer_postgres, "psycopg2", FailingPsycopg()) + writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} + ok, err = writer_postgres.write("public.cps.za.test", {}) + assert not ok and "failed unknown error" in err + +def test_init_with_secret(monkeypatch, reset_state): + secret_dict = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} + os.environ["POSTGRES_SECRET_NAME"] = "mysecret" + os.environ["POSTGRES_SECRET_REGION"] = "eu-west-1" + mock_client = types.SimpleNamespace(get_secret_value=lambda SecretId: {"SecretString": json.dumps(secret_dict)}) + class MockSession: + def client(self, service_name, region_name): + return mock_client + monkeypatch.setattr(writer_postgres.boto3, "Session", lambda: MockSession()) + writer_postgres.init(logging.getLogger("test")) + assert writer_postgres.POSTGRES == secret_dict + +def test_write_dlchange_success(reset_state, monkeypatch): + store = [] + monkeypatch.setattr(writer_postgres, "psycopg2", DummyPsycopg(store)) + writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} + message = { + "event_id":"e1","tenant_id":"t","source_app":"app","source_app_version":"1","environment":"dev", + "timestamp_event":1,"catalog_id":"c","operation":"append","format":"parquet" + } + ok, err = writer_postgres.write("public.cps.za.dlchange", message) + assert ok and err is None and len(store) == 1 + +def test_write_runs_success(reset_state, monkeypatch): + store = [] + monkeypatch.setattr(writer_postgres, "psycopg2", DummyPsycopg(store)) + writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} + message = { + "event_id":"r1","job_ref":"job","tenant_id":"t","source_app":"app","source_app_version":"1","environment":"dev", + "timestamp_start":1,"timestamp_end":2,"jobs":[{"catalog_id":"c","status":"ok","timestamp_start":1,"timestamp_end":2}] + } + ok, err = writer_postgres.write("public.cps.za.runs", message) + assert ok and err is None and len(store) == 2 # run + job insert From fca8811290d44efa6c59f1e5639773d104712f63 Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Tue, 9 Sep 2025 10:32:30 +0200 Subject: [PATCH 04/12] Add CodeRabbit recommendations --- .github/workflows/test.yml | 3 - DEVELOPER.md | 16 +- pyproject.toml | 2 +- requirements.txt | 2 +- src/event_gate_lambda.py | 43 +++-- src/writer_eventbridge.py | 31 ++-- src/writer_kafka.py | 5 +- src/writer_postgres.py | 36 +---- tests/conftest.py | 3 +- tests/test_conf_validation.py | 11 +- tests/test_event_gate_lambda.py | 274 ++++++++++++++++++++------------ tests/test_writer_postgres.py | 71 +++++++-- 12 files changed, 312 insertions(+), 185 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 920f172..1228bf8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -87,9 +87,6 @@ jobs: run: | pip install -r requirements.txt - - name: Set PYTHONPATH environment variable - run: echo "PYTHONPATH=${GITHUB_WORKSPACE}/release_notes_generator/release_notes_generator" >> $GITHUB_ENV - - name: Check code coverage with Pytest run: pytest --cov=. -v tests/ --cov-fail-under=80 diff --git a/DEVELOPER.md b/DEVELOPER.md index 8012ce9..fd7dbd0 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -14,7 +14,7 @@ Clone the repository and navigate to the project directory: ```shell git clone https://github.com/AbsaOSS/EventGate.git -cd event-gate +cd EventGate ``` ## Set Up Python Environment @@ -75,23 +75,23 @@ All done! ✨ 🍰 ✨ ## Run mypy Tool Locally -This project uses the [my[py]](https://mypy.readthedocs.io/en/stable/) tool, a static type checker for Python. +This project uses the [mypy](https://mypy.readthedocs.io/en/stable/) tool, a static type checker for Python. > Type checkers help ensure that you correctly use variables and functions in your code. > With mypy, add type hints (PEP 484) to your Python programs, > and mypy will warn you when you use those types incorrectly. -my[py] configuration is in `pyproject.toml` file. +mypy configuration is in `pyproject.toml` file. -Follow these steps to format your code with my[py] locally: +Follow these steps to type-check your code with mypy locally: -### Run my[py] +### Run mypy -Run my[py] on all files in the project. +Run mypy on all files in the project. ```shell mypy . ``` -To run my[py] check on a specific file, follow the pattern `mypy /.py --check-untyped-defs`. +To run mypy on a specific file, follow the pattern `mypy /.py --check-untyped-defs`. Example: ```shell @@ -124,4 +124,4 @@ See the coverage report on the path: open htmlcov/index.html ``` -## +## diff --git a/pyproject.toml b/pyproject.toml index cdd2cd8..1c721ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.black] line-length = 120 target-version = ['py311'] -force-exclude = '''test''' +# force-exclude = '''test''' [tool.coverage.run] omit = ["tests/*"] diff --git a/requirements.txt b/requirements.txt index a4ccb09..d99961f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,5 +11,5 @@ jsonschema==4.25.1 PyJWT==2.10.1 requests==2.32.5 boto3==1.40.25 -confluent_kafka +confluent-kafka==2.11.1 psycopg2-binary==2.9.10 \ No newline at end of file diff --git a/src/event_gate_lambda.py b/src/event_gate_lambda.py index 62760ca..5d5949c 100644 --- a/src/event_gate_lambda.py +++ b/src/event_gate_lambda.py @@ -50,9 +50,7 @@ logger.debug("Initialized LOGGER") logger.debug(f"Using CONF_DIR={_CONF_DIR}") if _INVALID_CONF_ENV: - logger.warning( - f"CONF_DIR env var set to non-existent path: {_INVALID_CONF_ENV}; fell back to {_CONF_DIR}" - ) + logger.warning(f"CONF_DIR env var set to non-existent path: {_INVALID_CONF_ENV}; fell back to {_CONF_DIR}") # Resolve project root (parent directory of this file's directory) _PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) @@ -210,17 +208,38 @@ def post_topic_message(topic_name: str, topic_message: Dict[str, Any], token_enc def extract_token(event_headers: Dict[str, str]) -> str: - """Extract bearer token from headers. + """Extract bearer token from headers (case-insensitive). - Supports lowercase custom 'bearer' header or standard 'Authorization: Bearer '. - Returns empty string if not present (caller handles auth error response). + Supports: + - Custom 'bearer' header (any casing) whose value is the raw token + - Standard 'Authorization: Bearer ' header (case-insensitive scheme & key) + Returns empty string if token not found or malformed. """ - if "bearer" in event_headers: - return event_headers["bearer"] - auth_header = event_headers.get("Authorization", "") - if auth_header.startswith("Bearer "): - return auth_header[len("Bearer ") :] - return "" + if not event_headers: + return "" + + # Normalize keys to lowercase for case-insensitive lookup + lowered = {str(k).lower(): v for k, v in event_headers.items()} + + # Direct bearer header (raw token) + if "bearer" in lowered and isinstance(lowered["bearer"], str): + token_candidate = lowered["bearer"].strip() + if token_candidate: + return token_candidate + + # Authorization header with Bearer scheme + auth_val = lowered.get("authorization", "") + if not isinstance(auth_val, str): # defensive + return "" + auth_val = auth_val.strip() + if not auth_val: + return "" + + # Case-insensitive match for 'Bearer ' prefix + if not auth_val.lower().startswith("bearer "): + return "" + token_part = auth_val[7:].strip() # len('Bearer ')==7 + return token_part def lambda_handler(event: Dict[str, Any], context: Any): # pylint: disable=unused-argument,too-many-return-statements diff --git a/src/writer_eventbridge.py b/src/writer_eventbridge.py index cb356a6..13dfab5 100644 --- a/src/writer_eventbridge.py +++ b/src/writer_eventbridge.py @@ -5,7 +5,7 @@ import json import logging -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, Optional, Tuple, List import boto3 from botocore.exceptions import BotoCoreError, ClientError @@ -26,6 +26,12 @@ def init(logger: logging.Logger, config: Dict[str, Any]) -> None: STATE["logger"].debug("Initialized EVENTBRIDGE writer") +def _format_failed_entries(entries: List[Dict[str, Any]]) -> str: + failed = [e for e in entries if "ErrorCode" in e or "ErrorMessage" in e] + # Keep message concise but informative + return json.dumps(failed) if failed else "[]" + + def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str]]: """Publish a message to EventBridge. @@ -58,21 +64,16 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str] } ] ) - if response.get("FailedEntryCount", 0) > 0: - msg = str(response) + failed_count = response.get("FailedEntryCount", 0) + if failed_count > 0: + entries = response.get("Entries", []) + failed_repr = _format_failed_entries(entries) + msg = f"{failed_count} EventBridge entries failed: {failed_repr}" logger.error(msg) return False, msg - except (BotoCoreError, ClientError) as err: - err_msg = f"The EventBridge writer failed: {err}" # specific AWS error - logger.error(err_msg) - return False, err_msg - except Exception as err: # pragma: no cover - unexpected failure path - err_msg = ( - f"The EventBridge writer failed with unknown error: {err}" - if not isinstance(err, (BotoCoreError, ClientError)) - else str(err) - ) - logger.error(err_msg) - return False, err_msg + except (BotoCoreError, ClientError) as err: # explicit AWS client-related errors + logger.exception("EventBridge put_events call failed") + return False, str(err) + # Let any unexpected exception propagate for upstream handler (avoids broad except BLE001 / TRY400) return True, None diff --git a/src/writer_kafka.py b/src/writer_kafka.py index e411171..e0717f2 100644 --- a/src/writer_kafka.py +++ b/src/writer_kafka.py @@ -15,6 +15,7 @@ _logger: logging.Logger = logging.getLogger(__name__) kafka_producer: Optional[Producer] = None + def init(logger: logging.Logger, config: Dict[str, Any]) -> None: """Initialize Kafka producer. @@ -67,9 +68,7 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str] topic_name, key="", value=json.dumps(message).encode("utf-8"), - callback=lambda err, msg: ( - errors.append(str(err)) if err is not None else None - ), + callback=lambda err, msg: (errors.append(str(err)) if err is not None else None), ) kafka_producer.flush() if errors: diff --git a/src/writer_postgres.py b/src/writer_postgres.py index a9138a2..14bcf93 100644 --- a/src/writer_postgres.py +++ b/src/writer_postgres.py @@ -35,12 +35,8 @@ def init(logger: logging.Logger) -> None: secret_region = os.environ.get("POSTGRES_SECRET_REGION", "") if secret_name and secret_region: - aws_secrets = boto3.Session().client( - service_name="secretsmanager", region_name=secret_region - ) - postgres_secret = aws_secrets.get_secret_value(SecretId=secret_name)[ - "SecretString" - ] + aws_secrets = boto3.Session().client(service_name="secretsmanager", region_name=secret_region) + postgres_secret = aws_secrets.get_secret_value(SecretId=secret_name)["SecretString"] POSTGRES = json.loads(postgres_secret) else: POSTGRES = {"database": ""} @@ -100,16 +96,8 @@ def postgres_edla_write(cursor, table: str, message: Dict[str, Any]) -> None: message["operation"], message.get("location"), message["format"], - ( - json.dumps(message.get("format_options")) - if "format_options" in message - else None - ), - ( - json.dumps(message.get("additional_info")) - if "additional_info" in message - else None - ), + (json.dumps(message.get("format_options")) if "format_options" in message else None), + (json.dumps(message.get("additional_info")) if "additional_info" in message else None), ), ) @@ -190,11 +178,7 @@ def postgres_run_write(cursor, table_runs: str, table_jobs: str, message: Dict[s job["timestamp_start"], job["timestamp_end"], job.get("message"), - ( - json.dumps(job.get("additional_info")) - if "additional_info" in job - else None - ), + (json.dumps(job.get("additional_info")) if "additional_info" in job else None), ), ) @@ -234,11 +218,7 @@ def postgres_test_write(cursor, table: str, message: Dict[str, Any]) -> None: message["source_app"], message["environment"], message["timestamp"], - ( - json.dumps(message.get("additional_info")) - if "additional_info" in message - else None - ), + (json.dumps(message.get("additional_info")) if "additional_info" in message else None), ), ) @@ -271,9 +251,7 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str] if topic_name == "public.cps.za.dlchange": postgres_edla_write(cursor, "public_cps_za_dlchange", message) elif topic_name == "public.cps.za.runs": - postgres_run_write( - cursor, "public_cps_za_runs", "public_cps_za_runs_jobs", message - ) + postgres_run_write(cursor, "public_cps_za_runs", "public_cps_za_runs_jobs", message) elif topic_name == "public.cps.za.test": postgres_test_write(cursor, "public_cps_za_test", message) else: diff --git a/tests/conftest.py b/tests/conftest.py index 5dd9811..3ee2e24 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,7 +14,8 @@ # limitations under the License. # import os, sys + # Ensure project root is on sys.path so 'src' package is importable during tests -PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) if PROJECT_ROOT not in sys.path: sys.path.insert(0, PROJECT_ROOT) diff --git a/tests/test_conf_validation.py b/tests/test_conf_validation.py index df153b4..01db885 100644 --- a/tests/test_conf_validation.py +++ b/tests/test_conf_validation.py @@ -1,4 +1,5 @@ import os + # # Copyright 2025 ABSA Group Limited # @@ -28,25 +29,26 @@ "event_bus_arn", } + def load_json(path): with open(path, "r") as f: return json.load(f) + @pytest.fixture(scope="module") def config_files(): - files = [ - f for f in glob(os.path.join(CONF_DIR, "config*.json")) - if os.path.basename(f) not in {"access.json"} - ] + files = [f for f in glob(os.path.join(CONF_DIR, "config*.json")) if os.path.basename(f) not in {"access.json"}] assert files, "No config files found matching pattern config*.json" return files + @pytest.mark.parametrize("key", sorted(REQUIRED_CONFIG_KEYS)) def test_config_files_have_required_keys(config_files, key): for path in config_files: data = load_json(path) assert key in data, f"Config {path} missing key: {key}" + def test_access_json_structure(): path = os.path.join(CONF_DIR, "access.json") data = load_json(path) @@ -56,6 +58,7 @@ def test_access_json_structure(): assert isinstance(users, list), f"Topic {topic} value must be a list of users" assert all(isinstance(u, str) for u in users), f"All users for topic {topic} must be strings" + @pytest.mark.parametrize("topic_file", glob(os.path.join(CONF_DIR, "topic_*.json"))) def test_topic_json_schemas_basic(topic_file): assert topic_file, "No topic_*.json files found" diff --git a/tests/test_event_gate_lambda.py b/tests/test_event_gate_lambda.py index 67f12da..fb60e2a 100644 --- a/tests/test_event_gate_lambda.py +++ b/tests/test_event_gate_lambda.py @@ -22,23 +22,28 @@ from unittest.mock import patch, MagicMock # Inject dummy confluent_kafka if not installed so patching works -if 'confluent_kafka' not in sys.modules: - dummy_ck = types.ModuleType('confluent_kafka') +if "confluent_kafka" not in sys.modules: + dummy_ck = types.ModuleType("confluent_kafka") + class DummyProducer: # minimal interface def __init__(self, *a, **kw): pass + def produce(self, *a, **kw): - cb = kw.get('callback') + cb = kw.get("callback") if cb: cb(None, None) + def flush(self): return None + dummy_ck.Producer = DummyProducer - sys.modules['confluent_kafka'] = dummy_ck + sys.modules["confluent_kafka"] = dummy_ck # Inject dummy psycopg2 (optional dependency) -if 'psycopg2' not in sys.modules: - sys.modules['psycopg2'] = types.ModuleType('psycopg2') +if "psycopg2" not in sys.modules: + sys.modules["psycopg2"] = types.ModuleType("psycopg2") + @pytest.fixture(scope="module") def event_gate_module(): @@ -49,43 +54,48 @@ def start_patch(target): started_patches.append(p) return p.start() - mock_requests_get = start_patch('requests.get') - mock_requests_get.return_value.json.return_value = {"key": base64.b64encode(b'dummy_der').decode('utf-8')} + mock_requests_get = start_patch("requests.get") + mock_requests_get.return_value.json.return_value = {"key": base64.b64encode(b"dummy_der").decode("utf-8")} - mock_load_key = start_patch('cryptography.hazmat.primitives.serialization.load_der_public_key') + mock_load_key = start_patch("cryptography.hazmat.primitives.serialization.load_der_public_key") mock_load_key.return_value = object() # Mock S3 access_config retrieval class MockS3ObjectBody: def read(self): - return json.dumps({ - "public.cps.za.runs": ["FooBarUser"], - "public.cps.za.dlchange": ["FooUser", "BarUser"], - "public.cps.za.test": ["TestUser"] - }).encode('utf-8') + return json.dumps( + { + "public.cps.za.runs": ["FooBarUser"], + "public.cps.za.dlchange": ["FooUser", "BarUser"], + "public.cps.za.test": ["TestUser"], + } + ).encode("utf-8") + class MockS3Object: def get(self): return {"Body": MockS3ObjectBody()} + class MockS3Bucket: def Object(self, key): return MockS3Object() + class MockS3Resource: def Bucket(self, name): return MockS3Bucket() - mock_session = start_patch('boto3.Session') + mock_session = start_patch("boto3.Session") mock_session.return_value.resource.return_value = MockS3Resource() # Mock EventBridge client - mock_boto_client = start_patch('boto3.client') + mock_boto_client = start_patch("boto3.client") mock_events_client = MagicMock() mock_events_client.put_events.return_value = {"FailedEntryCount": 0} mock_boto_client.return_value = mock_events_client # Allow kafka producer patching (already stubbed) but still patch to inspect if needed - start_patch('confluent_kafka.Producer') + start_patch("confluent_kafka.Producer") - module = importlib.import_module('src.event_gate_lambda') + module = importlib.import_module("src.event_gate_lambda") yield module @@ -93,140 +103,206 @@ def Bucket(self, name): p.stop() patch.stopall() + @pytest.fixture def make_event(): - def _make(resource, method='GET', body=None, topic=None, headers=None): + def _make(resource, method="GET", body=None, topic=None, headers=None): return { - 'resource': resource, - 'httpMethod': method, - 'headers': headers or {}, - 'pathParameters': {'topic_name': topic} if topic else {}, - 'body': json.dumps(body) if isinstance(body, dict) else body + "resource": resource, + "httpMethod": method, + "headers": headers or {}, + "pathParameters": {"topic_name": topic} if topic else {}, + "body": json.dumps(body) if isinstance(body, dict) else body, } + return _make + @pytest.fixture def valid_payload(): - return {"event_id":"e1","tenant_id":"t1","source_app":"app","environment":"dev","timestamp":123} + return {"event_id": "e1", "tenant_id": "t1", "source_app": "app", "environment": "dev", "timestamp": 123} + # --- GET flows --- + def test_get_topics(event_gate_module, make_event): - event = make_event('/topics') + event = make_event("/topics") resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 200 - body = json.loads(resp['body']) - assert 'public.cps.za.test' in body + assert resp["statusCode"] == 200 + body = json.loads(resp["body"]) + assert "public.cps.za.test" in body + def test_get_topic_schema_found(event_gate_module, make_event): - event = make_event('/topics/{topic_name}', method='GET', topic='public.cps.za.test') + event = make_event("/topics/{topic_name}", method="GET", topic="public.cps.za.test") resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 200 - schema = json.loads(resp['body']) - assert schema['type'] == 'object' + assert resp["statusCode"] == 200 + schema = json.loads(resp["body"]) + assert schema["type"] == "object" + def test_get_topic_schema_not_found(event_gate_module, make_event): - event = make_event('/topics/{topic_name}', method='GET', topic='no.such.topic') + event = make_event("/topics/{topic_name}", method="GET", topic="no.such.topic") resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 404 + assert resp["statusCode"] == 404 + # --- POST auth / validation failures --- + def test_post_missing_token(event_gate_module, make_event, valid_payload): - event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={}) + event = make_event( + "/topics/{topic_name}", method="POST", topic="public.cps.za.test", body=valid_payload, headers={} + ) resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 401 - body = json.loads(resp['body']) - assert not body['success'] - assert body['errors'][0]['type'] == 'auth' + assert resp["statusCode"] == 401 + body = json.loads(resp["body"]) + assert not body["success"] + assert body["errors"][0]["type"] == "auth" + def test_post_unauthorized_user(event_gate_module, make_event, valid_payload): - with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'NotAllowed'}, create=True): - event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'Authorization':'Bearer token'}) + with patch.object(event_gate_module.jwt, "decode", return_value={"sub": "NotAllowed"}, create=True): + event = make_event( + "/topics/{topic_name}", + method="POST", + topic="public.cps.za.test", + body=valid_payload, + headers={"Authorization": "Bearer token"}, + ) resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 403 - body = json.loads(resp['body']) - assert body['errors'][0]['type'] == 'auth' + assert resp["statusCode"] == 403 + body = json.loads(resp["body"]) + assert body["errors"][0]["type"] == "auth" + def test_post_schema_validation_error(event_gate_module, make_event): - payload = {"event_id":"e1","tenant_id":"t1","source_app":"app","environment":"dev"} # missing timestamp - with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True): - event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=payload, headers={'Authorization':'Bearer token'}) + payload = {"event_id": "e1", "tenant_id": "t1", "source_app": "app", "environment": "dev"} # missing timestamp + with patch.object(event_gate_module.jwt, "decode", return_value={"sub": "TestUser"}, create=True): + event = make_event( + "/topics/{topic_name}", + method="POST", + topic="public.cps.za.test", + body=payload, + headers={"Authorization": "Bearer token"}, + ) resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 400 - body = json.loads(resp['body']) - assert body['errors'][0]['type'] == 'validation' + assert resp["statusCode"] == 400 + body = json.loads(resp["body"]) + assert body["errors"][0]["type"] == "validation" + # --- POST success & failure aggregation --- + def test_post_success_all_writers(event_gate_module, make_event, valid_payload): - with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ - patch('src.event_gate_lambda.writer_kafka.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): - event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'Authorization':'Bearer token'}) + with ( + patch.object(event_gate_module.jwt, "decode", return_value={"sub": "TestUser"}, create=True), + patch("src.event_gate_lambda.writer_kafka.write", return_value=(True, None)), + patch("src.event_gate_lambda.writer_eventbridge.write", return_value=(True, None)), + patch("src.event_gate_lambda.writer_postgres.write", return_value=(True, None)), + ): + event = make_event( + "/topics/{topic_name}", + method="POST", + topic="public.cps.za.test", + body=valid_payload, + headers={"Authorization": "Bearer token"}, + ) resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 202 - body = json.loads(resp['body']) - assert body['success'] - assert body['statusCode'] == 202 + assert resp["statusCode"] == 202 + body = json.loads(resp["body"]) + assert body["success"] + assert body["statusCode"] == 202 + def test_post_single_writer_failure(event_gate_module, make_event, valid_payload): - with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ - patch('src.event_gate_lambda.writer_kafka.write', return_value=(False, 'Kafka boom')), \ - patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): - event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'Authorization':'Bearer token'}) + with ( + patch.object(event_gate_module.jwt, "decode", return_value={"sub": "TestUser"}, create=True), + patch("src.event_gate_lambda.writer_kafka.write", return_value=(False, "Kafka boom")), + patch("src.event_gate_lambda.writer_eventbridge.write", return_value=(True, None)), + patch("src.event_gate_lambda.writer_postgres.write", return_value=(True, None)), + ): + event = make_event( + "/topics/{topic_name}", + method="POST", + topic="public.cps.za.test", + body=valid_payload, + headers={"Authorization": "Bearer token"}, + ) resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 500 - body = json.loads(resp['body']) - assert not body['success'] - assert len(body['errors']) == 1 - assert body['errors'][0]['type'] == 'kafka' + assert resp["statusCode"] == 500 + body = json.loads(resp["body"]) + assert not body["success"] + assert len(body["errors"]) == 1 + assert body["errors"][0]["type"] == "kafka" + def test_post_multiple_writer_failures(event_gate_module, make_event, valid_payload): - with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ - patch('src.event_gate_lambda.writer_kafka.write', return_value=(False, 'Kafka A')), \ - patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(False, 'EB B')), \ - patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): - event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'Authorization':'Bearer token'}) + with ( + patch.object(event_gate_module.jwt, "decode", return_value={"sub": "TestUser"}, create=True), + patch("src.event_gate_lambda.writer_kafka.write", return_value=(False, "Kafka A")), + patch("src.event_gate_lambda.writer_eventbridge.write", return_value=(False, "EB B")), + patch("src.event_gate_lambda.writer_postgres.write", return_value=(True, None)), + ): + event = make_event( + "/topics/{topic_name}", + method="POST", + topic="public.cps.za.test", + body=valid_payload, + headers={"Authorization": "Bearer token"}, + ) resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 500 - body = json.loads(resp['body']) - assert sorted(e['type'] for e in body['errors']) == ['eventbridge', 'kafka'] + assert resp["statusCode"] == 500 + body = json.loads(resp["body"]) + assert sorted(e["type"] for e in body["errors"]) == ["eventbridge", "kafka"] + def test_token_extraction_lowercase_bearer_header(event_gate_module, make_event, valid_payload): - with patch.object(event_gate_module.jwt, 'decode', return_value={'sub': 'TestUser'}, create=True), \ - patch('src.event_gate_lambda.writer_kafka.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_eventbridge.write', return_value=(True, None)), \ - patch('src.event_gate_lambda.writer_postgres.write', return_value=(True, None)): - event = make_event('/topics/{topic_name}', method='POST', topic='public.cps.za.test', body=valid_payload, headers={'bearer':'token'}) + with ( + patch.object(event_gate_module.jwt, "decode", return_value={"sub": "TestUser"}, create=True), + patch("src.event_gate_lambda.writer_kafka.write", return_value=(True, None)), + patch("src.event_gate_lambda.writer_eventbridge.write", return_value=(True, None)), + patch("src.event_gate_lambda.writer_postgres.write", return_value=(True, None)), + ): + event = make_event( + "/topics/{topic_name}", + method="POST", + topic="public.cps.za.test", + body=valid_payload, + headers={"bearer": "token"}, + ) resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 202 + assert resp["statusCode"] == 202 + def test_unknown_resource(event_gate_module, make_event): - event = make_event('/unknown') + event = make_event("/unknown") resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 404 - body = json.loads(resp['body']) - assert body['errors'][0]['type'] == 'route' + assert resp["statusCode"] == 404 + body = json.loads(resp["body"]) + assert body["errors"][0]["type"] == "route" + def test_get_api_endpoint(event_gate_module, make_event): - event = make_event('/api') + event = make_event("/api") resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 200 - assert 'openapi' in resp['body'].lower() + assert resp["statusCode"] == 200 + assert "openapi" in resp["body"].lower() + def test_get_token_endpoint(event_gate_module, make_event): - event = make_event('/token') + event = make_event("/token") resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 303 - assert 'Location' in resp['headers'] + assert resp["statusCode"] == 303 + assert "Location" in resp["headers"] + def test_internal_error_path(event_gate_module, make_event): - with patch('src.event_gate_lambda.get_topics', side_effect=RuntimeError('boom')): - event = make_event('/topics') + with patch("src.event_gate_lambda.get_topics", side_effect=RuntimeError("boom")): + event = make_event("/topics") resp = event_gate_module.lambda_handler(event, None) - assert resp['statusCode'] == 500 - body = json.loads(resp['body']) - assert body['errors'][0]['type'] == 'internal' + assert resp["statusCode"] == 500 + body = json.loads(resp["body"]) + assert body["errors"][0]["type"] == "internal" diff --git a/tests/test_writer_postgres.py b/tests/test_writer_postgres.py index b8b7893..f7329d7 100644 --- a/tests/test_writer_postgres.py +++ b/tests/test_writer_postgres.py @@ -22,18 +22,23 @@ from src import writer_postgres + @pytest.fixture(scope="module", autouse=True) def init_module_logger(): writer_postgres.init(logging.getLogger("test")) + class MockCursor: def __init__(self): self.executions = [] + def execute(self, sql, params): self.executions.append((sql.strip(), params)) + # --- Insert helpers --- + def test_postgres_edla_write_with_optional_fields(): cur = MockCursor() message = { @@ -48,7 +53,7 @@ def test_postgres_edla_write_with_optional_fields(): "location": "s3://bucket/path", "format": "parquet", "format_options": {"compression": "snappy"}, - "additional_info": {"foo": "bar"} + "additional_info": {"foo": "bar"}, } writer_postgres.postgres_edla_write(cur, "table_a", message) assert len(cur.executions) == 1 @@ -60,6 +65,7 @@ def test_postgres_edla_write_with_optional_fields(): assert json.loads(params[10]) == {"compression": "snappy"} assert json.loads(params[11]) == {"foo": "bar"} + def test_postgres_edla_write_missing_optional(): cur = MockCursor() message = { @@ -71,7 +77,7 @@ def test_postgres_edla_write_missing_optional(): "timestamp_event": 222, "catalog_id": "db.tbl2", "operation": "overwrite", - "format": "delta" + "format": "delta", } writer_postgres.postgres_edla_write(cur, "table_a", message) _sql, params = cur.executions[0] @@ -80,6 +86,7 @@ def test_postgres_edla_write_missing_optional(): assert params[10] is None assert params[11] is None + def test_postgres_run_write(): cur = MockCursor() message = { @@ -93,8 +100,15 @@ def test_postgres_run_write(): "timestamp_end": 2000, "jobs": [ {"catalog_id": "c1", "status": "succeeded", "timestamp_start": 1100, "timestamp_end": 1200}, - {"catalog_id": "c2", "status": "failed", "timestamp_start": 1300, "timestamp_end": 1400, "message": "err", "additional_info": {"k": "v"}} - ] + { + "catalog_id": "c2", + "status": "failed", + "timestamp_start": 1300, + "timestamp_end": 1400, + "message": "err", + "additional_info": {"k": "v"}, + }, + ], } writer_postgres.postgres_run_write(cur, "runs_table", "jobs_table", message) assert len(cur.executions) == 3 @@ -105,6 +119,7 @@ def test_postgres_run_write(): assert job2_params[5] == "err" assert json.loads(job2_params[6]) == {"k": "v"} + def test_postgres_test_write(): cur = MockCursor() message = { @@ -113,7 +128,7 @@ def test_postgres_test_write(): "source_app": "test", "environment": "dev", "timestamp": 999, - "additional_info": {"a": 1} + "additional_info": {"a": 1}, } writer_postgres.postgres_test_write(cur, "table_test", message) assert len(cur.executions) == 1 @@ -122,8 +137,10 @@ def test_postgres_test_write(): assert params[1] == "tenant-x" assert json.loads(params[5]) == {"a": 1} + # --- write() behavioral paths --- + @pytest.fixture def reset_state(monkeypatch): # Preserve psycopg2 ref @@ -135,46 +152,60 @@ def reset_state(monkeypatch): os.environ.pop("POSTGRES_SECRET_NAME", None) os.environ.pop("POSTGRES_SECRET_REGION", None) + class DummyCursor: def __init__(self, store): self.store = store + def execute(self, sql, params): self.store.append((sql, params)) + def __enter__(self): return self + def __exit__(self, exc_type, exc, tb): return False + class DummyConnection: def __init__(self, store): self.commit_called = False self.store = store + def cursor(self): return DummyCursor(self.store) + def commit(self): self.commit_called = True + def __enter__(self): return self + def __exit__(self, exc_type, exc, tb): return False + class DummyPsycopg: def __init__(self, store): self.store = store + def connect(self, **kwargs): return DummyConnection(self.store) + def test_write_skips_when_no_database(reset_state): writer_postgres.POSTGRES = {"database": ""} ok, err = writer_postgres.write("public.cps.za.test", {}) assert ok and err is None + def test_write_skips_when_psycopg2_missing(reset_state, monkeypatch): writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} monkeypatch.setattr(writer_postgres, "psycopg2", None) ok, err = writer_postgres.write("public.cps.za.test", {}) assert ok and err is None + def test_write_unknown_topic_returns_false(reset_state, monkeypatch): store = [] monkeypatch.setattr(writer_postgres, "psycopg2", DummyPsycopg(store)) @@ -182,6 +213,7 @@ def test_write_unknown_topic_returns_false(reset_state, monkeypatch): ok, err = writer_postgres.write("public.cps.za.unknown", {}) assert not ok and "unknown topic" in err + def test_write_success_known_topic(reset_state, monkeypatch): store = [] monkeypatch.setattr(writer_postgres, "psycopg2", DummyPsycopg(store)) @@ -190,45 +222,66 @@ def test_write_success_known_topic(reset_state, monkeypatch): ok, err = writer_postgres.write("public.cps.za.test", message) assert ok and err is None and len(store) == 1 + def test_write_exception_returns_false(reset_state, monkeypatch): class FailingPsycopg: def connect(self, **kwargs): raise RuntimeError("boom") + monkeypatch.setattr(writer_postgres, "psycopg2", FailingPsycopg()) writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} ok, err = writer_postgres.write("public.cps.za.test", {}) assert not ok and "failed unknown error" in err + def test_init_with_secret(monkeypatch, reset_state): secret_dict = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} os.environ["POSTGRES_SECRET_NAME"] = "mysecret" os.environ["POSTGRES_SECRET_REGION"] = "eu-west-1" mock_client = types.SimpleNamespace(get_secret_value=lambda SecretId: {"SecretString": json.dumps(secret_dict)}) + class MockSession: def client(self, service_name, region_name): return mock_client + monkeypatch.setattr(writer_postgres.boto3, "Session", lambda: MockSession()) writer_postgres.init(logging.getLogger("test")) assert writer_postgres.POSTGRES == secret_dict + def test_write_dlchange_success(reset_state, monkeypatch): store = [] monkeypatch.setattr(writer_postgres, "psycopg2", DummyPsycopg(store)) writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} message = { - "event_id":"e1","tenant_id":"t","source_app":"app","source_app_version":"1","environment":"dev", - "timestamp_event":1,"catalog_id":"c","operation":"append","format":"parquet" + "event_id": "e1", + "tenant_id": "t", + "source_app": "app", + "source_app_version": "1", + "environment": "dev", + "timestamp_event": 1, + "catalog_id": "c", + "operation": "append", + "format": "parquet", } ok, err = writer_postgres.write("public.cps.za.dlchange", message) assert ok and err is None and len(store) == 1 + def test_write_runs_success(reset_state, monkeypatch): store = [] monkeypatch.setattr(writer_postgres, "psycopg2", DummyPsycopg(store)) writer_postgres.POSTGRES = {"database": "db", "host": "h", "user": "u", "password": "p", "port": 5432} message = { - "event_id":"r1","job_ref":"job","tenant_id":"t","source_app":"app","source_app_version":"1","environment":"dev", - "timestamp_start":1,"timestamp_end":2,"jobs":[{"catalog_id":"c","status":"ok","timestamp_start":1,"timestamp_end":2}] + "event_id": "r1", + "job_ref": "job", + "tenant_id": "t", + "source_app": "app", + "source_app_version": "1", + "environment": "dev", + "timestamp_start": 1, + "timestamp_end": 2, + "jobs": [{"catalog_id": "c", "status": "ok", "timestamp_start": 1, "timestamp_end": 2}], } ok, err = writer_postgres.write("public.cps.za.runs", message) assert ok and err is None and len(store) == 2 # run + job insert From cc7fbf10d66b33a2454b4b904d716a9d99c56acd Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Tue, 9 Sep 2025 11:51:44 +0200 Subject: [PATCH 05/12] Refactor configuration imports and enhance error handling in Kafka writer --- requirements.txt | 2 +- src/event_gate_lambda.py | 14 ++-- src/writer_kafka.py | 40 ++++++---- tests/test_conf_path.py | 128 ++++++++++++++++++++++++++++++++ tests/test_event_gate_lambda.py | 1 - 5 files changed, 162 insertions(+), 23 deletions(-) create mode 100644 tests/test_conf_path.py diff --git a/requirements.txt b/requirements.txt index d99961f..965ad80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ PyJWT==2.10.1 requests==2.32.5 boto3==1.40.25 confluent-kafka==2.11.1 -psycopg2-binary==2.9.10 \ No newline at end of file +psycopg2==2.9.10 \ No newline at end of file diff --git a/src/event_gate_lambda.py b/src/event_gate_lambda.py index 5d5949c..7f08f64 100644 --- a/src/event_gate_lambda.py +++ b/src/event_gate_lambda.py @@ -29,12 +29,16 @@ from jsonschema import validate from jsonschema.exceptions import ValidationError +# Unified import pattern to avoid mypy no-redef on symbol names try: - from .conf_path import CONF_DIR, INVALID_CONF_ENV + from . import conf_path as _conf_mod except ImportError: # fallback when executed outside package context - from conf_path import CONF_DIR, INVALID_CONF_ENV + import conf_path as _conf_mod # type: ignore[no-redef] -# Use imported symbols for internal variables +CONF_DIR = _conf_mod.CONF_DIR +INVALID_CONF_ENV = _conf_mod.INVALID_CONF_ENV + +# Internal aliases used by rest of module _CONF_DIR = CONF_DIR _INVALID_CONF_ENV = INVALID_CONF_ENV @@ -52,10 +56,6 @@ if _INVALID_CONF_ENV: logger.warning(f"CONF_DIR env var set to non-existent path: {_INVALID_CONF_ENV}; fell back to {_CONF_DIR}") -# Resolve project root (parent directory of this file's directory) -_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) -_CONF_DIR = os.path.join(_PROJECT_ROOT, "conf") - with open(os.path.join(_CONF_DIR, "api.yaml"), "r", encoding="utf-8") as file: API = file.read() logger.debug("Loaded API definition") diff --git a/src/writer_kafka.py b/src/writer_kafka.py index e0717f2..a96c0b6 100644 --- a/src/writer_kafka.py +++ b/src/writer_kafka.py @@ -8,12 +8,16 @@ from typing import Any, Dict, Optional, Tuple from confluent_kafka import Producer +try: # KafkaException may not exist in stubbed test module + from confluent_kafka import KafkaException # type: ignore +except Exception: # pragma: no cover - fallback for test stub + class KafkaException(Exception): # type: ignore + pass STATE: Dict[str, Any] = {"logger": logging.getLogger(__name__), "producer": None} # Module globals for typing _logger: logging.Logger = logging.getLogger(__name__) -kafka_producer: Optional[Producer] = None def init(logger: logging.Logger, config: Dict[str, Any]) -> None: @@ -22,10 +26,16 @@ def init(logger: logging.Logger, config: Dict[str, Any]) -> None: Args: logger: Shared application logger. config: Configuration dictionary (expects 'kafka_bootstrap_server' plus optional SASL/SSL fields). + Raises: + ValueError: if required 'kafka_bootstrap_server' is missing or empty. """ STATE["logger"] = logger - producer_config: Dict[str, Any] = {"bootstrap.servers": config["kafka_bootstrap_server"]} + if "kafka_bootstrap_server" not in config or not config.get("kafka_bootstrap_server"): + raise ValueError("Missing required config: kafka_bootstrap_server") + bootstrap = config["kafka_bootstrap_server"] + + producer_config: Dict[str, Any] = {"bootstrap.servers": bootstrap} if "kafka_sasl_kerberos_principal" in config and "kafka_ssl_key_path" in config: producer_config.update( { @@ -58,26 +68,28 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str] logger = STATE["logger"] producer: Optional[Producer] = STATE.get("producer") # type: ignore[assignment] + if producer is None: + logger.debug("Kafka producer not initialized - skipping") + return True, None + + errors: list[Any] = [] try: - if producer is None: - logger.debug("Kafka producer not initialized - skipping") - return True, None _logger.debug(f"Sending to kafka {topic_name}") - errors: list[Any] = [] - kafka_producer.produce( + producer.produce( topic_name, key="", value=json.dumps(message).encode("utf-8"), callback=lambda err, msg: (errors.append(str(err)) if err is not None else None), ) - kafka_producer.flush() - if errors: - msg = "; ".join(errors) - _logger.error(msg) - return False, msg - except Exception as e: + producer.flush() + except KafkaException as e: # narrow exception capture err_msg = f"The Kafka writer failed with unknown error: {str(e)}" - _logger.error(err_msg) + _logger.exception(err_msg) return False, err_msg + if errors: + msg = "; ".join(errors) + _logger.error(msg) + return False, msg + return True, None diff --git a/tests/test_conf_path.py b/tests/test_conf_path.py new file mode 100644 index 0000000..c20d999 --- /dev/null +++ b/tests/test_conf_path.py @@ -0,0 +1,128 @@ +import importlib.util +import os +import sys +import tempfile +from pathlib import Path + +import pytest + +import src.conf_path as conf_path_module + + +def test_env_var_valid_directory(monkeypatch): + with tempfile.TemporaryDirectory() as tmp: + custom_conf = Path(tmp) / "myconf" + custom_conf.mkdir() + monkeypatch.setenv("CONF_DIR", str(custom_conf)) + conf_dir, invalid = conf_path_module.resolve_conf_dir() + assert conf_dir == str(custom_conf) + assert invalid is None + + +def test_env_var_invalid_directory_falls_back_parent(monkeypatch): + missing_path = "/nonexistent/path/xyz_does_not_exist" + monkeypatch.setenv("CONF_DIR", missing_path) + conf_dir, invalid = conf_path_module.resolve_conf_dir() + # Should fall back to repository conf directory + assert conf_dir.endswith(os.path.join("EventGate", "conf")) + assert invalid == os.path.abspath(missing_path) + + +def _load_isolated_conf_path(structure_builder): + """Utility to create an isolated module instance of conf_path with custom directory layout. + + structure_builder receives base temp directory and returns path to module directory containing conf_path.py + and the code to write (copied from original). + """ + import inspect + code = Path(conf_path_module.__file__).read_text(encoding="utf-8") + tmp = tempfile.TemporaryDirectory() + base = Path(tmp.name) + module_dir = structure_builder(base, code) + module_file = module_dir / "conf_path.py" + assert module_file.exists() + spec = importlib.util.spec_from_file_location(f"conf_path_isolated_{id(module_dir)}", module_file) + mod = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = mod # ensure import works for dependencies + spec.loader.exec_module(mod) # type: ignore[attr-defined] + # Attach tempdir for cleanup reference + mod._tmp = tmp # type: ignore[attr-defined] + return mod + + +def test_current_dir_conf_used_when_parent_missing(): + def build(base: Path, code: str): + module_dir = base / "pkg" + module_dir.mkdir(parents=True) + # Create conf inside module directory (current_dir/conf) + (module_dir / "conf").mkdir() + # Intentionally DO NOT create parent conf directory (base/conf) + (module_dir / "conf_path.py").write_text(code, encoding="utf-8") + return module_dir + + mod = _load_isolated_conf_path(build) + try: + conf_dir, invalid = mod.resolve_conf_dir() + assert conf_dir.endswith("pkg/conf") # current directory conf chosen + assert invalid is None + finally: + mod._tmp.cleanup() # type: ignore[attr-defined] + + +def test_fallback_parent_conf_even_if_missing(): + def build(base: Path, code: str): + module_dir = base / "pkg2" + module_dir.mkdir(parents=True) + # No conf anywhere + (module_dir / "conf_path.py").write_text(code, encoding="utf-8") + return module_dir + + mod = _load_isolated_conf_path(build) + try: + conf_dir, invalid = mod.resolve_conf_dir() + # Parent conf path returned even though it does not exist + expected_parent_conf = (Path(mod.__file__).parent.parent / "conf").resolve() + assert Path(conf_dir).resolve() == expected_parent_conf + assert invalid is None + finally: + mod._tmp.cleanup() # type: ignore[attr-defined] + + +def test_invalid_env_uses_current_conf_when_parent_missing(monkeypatch): + """Invalid CONF_DIR provided, parent/conf missing, current_dir/conf present -> choose current.""" + def build(base: Path, code: str): + module_dir = base / "pkg_invalid_current" + module_dir.mkdir(parents=True) + (module_dir / "conf").mkdir() + (module_dir / "conf_path.py").write_text(code, encoding="utf-8") + return module_dir + + mod = _load_isolated_conf_path(build) + try: + bad_path = "/definitely/not/there/abc123" + monkeypatch.setenv("CONF_DIR", bad_path) + conf_dir, invalid = mod.resolve_conf_dir() + assert conf_dir.endswith("pkg_invalid_current/conf") + assert invalid == os.path.abspath(bad_path) + finally: + mod._tmp.cleanup() # type: ignore[attr-defined] + + +def test_invalid_env_all_missing_fallback_parent(monkeypatch): + """Invalid CONF_DIR provided, no parent/conf or current/conf -> fallback parent path returned.""" + def build(base: Path, code: str): + module_dir = base / "pkg_invalid_all" + module_dir.mkdir(parents=True) + (module_dir / "conf_path.py").write_text(code, encoding="utf-8") + return module_dir + + mod = _load_isolated_conf_path(build) + try: + bad_path = "/also/not/there/xyz987" + monkeypatch.setenv("CONF_DIR", bad_path) + conf_dir, invalid = mod.resolve_conf_dir() + expected_parent_conf = (Path(mod.__file__).parent.parent / "conf").resolve() + assert Path(conf_dir).resolve() == expected_parent_conf + assert invalid == os.path.abspath(bad_path) + finally: + mod._tmp.cleanup() # type: ignore[attr-defined] diff --git a/tests/test_event_gate_lambda.py b/tests/test_event_gate_lambda.py index fb60e2a..2b84da1 100644 --- a/tests/test_event_gate_lambda.py +++ b/tests/test_event_gate_lambda.py @@ -101,7 +101,6 @@ def Bucket(self, name): for p in started_patches: p.stop() - patch.stopall() @pytest.fixture From 315e2c75f64a2454460a3a0630f6f04857821b1b Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Wed, 10 Sep 2025 10:07:58 +0200 Subject: [PATCH 06/12] Refactor import handling and improve error logging in Kafka writer and event gate lambda --- src/event_gate_lambda.py | 9 ++--- src/writer_kafka.py | 14 ++++---- tests/test_conf_path.py | 31 +++++++++++++++++ tests/test_event_gate_lambda.py | 61 +++++++++++++++++++-------------- 4 files changed, 77 insertions(+), 38 deletions(-) diff --git a/src/event_gate_lambda.py b/src/event_gate_lambda.py index 7f08f64..345f307 100644 --- a/src/event_gate_lambda.py +++ b/src/event_gate_lambda.py @@ -29,14 +29,11 @@ from jsonschema import validate from jsonschema.exceptions import ValidationError -# Unified import pattern to avoid mypy no-redef on symbol names +# Import configuration directory symbols with explicit ImportError fallback try: - from . import conf_path as _conf_mod + from .conf_path import CONF_DIR, INVALID_CONF_ENV # type: ignore[no-redef] except ImportError: # fallback when executed outside package context - import conf_path as _conf_mod # type: ignore[no-redef] - -CONF_DIR = _conf_mod.CONF_DIR -INVALID_CONF_ENV = _conf_mod.INVALID_CONF_ENV + from conf_path import CONF_DIR, INVALID_CONF_ENV # type: ignore[no-redef] # Internal aliases used by rest of module _CONF_DIR = CONF_DIR diff --git a/src/writer_kafka.py b/src/writer_kafka.py index a96c0b6..e04e935 100644 --- a/src/writer_kafka.py +++ b/src/writer_kafka.py @@ -8,16 +8,16 @@ from typing import Any, Dict, Optional, Tuple from confluent_kafka import Producer + try: # KafkaException may not exist in stubbed test module from confluent_kafka import KafkaException # type: ignore -except Exception: # pragma: no cover - fallback for test stub +except (ImportError, ModuleNotFoundError): # pragma: no cover - fallback for test stub + class KafkaException(Exception): # type: ignore pass -STATE: Dict[str, Any] = {"logger": logging.getLogger(__name__), "producer": None} -# Module globals for typing -_logger: logging.Logger = logging.getLogger(__name__) +STATE: Dict[str, Any] = {"logger": logging.getLogger(__name__), "producer": None} def init(logger: logging.Logger, config: Dict[str, Any]) -> None: @@ -74,7 +74,7 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str] errors: list[Any] = [] try: - _logger.debug(f"Sending to kafka {topic_name}") + logger.debug(f"Sending to kafka {topic_name}") producer.produce( topic_name, key="", @@ -84,12 +84,12 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str] producer.flush() except KafkaException as e: # narrow exception capture err_msg = f"The Kafka writer failed with unknown error: {str(e)}" - _logger.exception(err_msg) + logger.exception(err_msg) return False, err_msg if errors: msg = "; ".join(errors) - _logger.error(msg) + logger.error(msg) return False, msg return True, None diff --git a/tests/test_conf_path.py b/tests/test_conf_path.py index c20d999..ff97e3b 100644 --- a/tests/test_conf_path.py +++ b/tests/test_conf_path.py @@ -35,6 +35,7 @@ def _load_isolated_conf_path(structure_builder): and the code to write (copied from original). """ import inspect + code = Path(conf_path_module.__file__).read_text(encoding="utf-8") tmp = tempfile.TemporaryDirectory() base = Path(tmp.name) @@ -90,6 +91,7 @@ def build(base: Path, code: str): def test_invalid_env_uses_current_conf_when_parent_missing(monkeypatch): """Invalid CONF_DIR provided, parent/conf missing, current_dir/conf present -> choose current.""" + def build(base: Path, code: str): module_dir = base / "pkg_invalid_current" module_dir.mkdir(parents=True) @@ -110,6 +112,7 @@ def build(base: Path, code: str): def test_invalid_env_all_missing_fallback_parent(monkeypatch): """Invalid CONF_DIR provided, no parent/conf or current/conf -> fallback parent path returned.""" + def build(base: Path, code: str): module_dir = base / "pkg_invalid_all" module_dir.mkdir(parents=True) @@ -126,3 +129,31 @@ def build(base: Path, code: str): assert invalid == os.path.abspath(bad_path) finally: mod._tmp.cleanup() # type: ignore[attr-defined] + + +def test_module_level_constants_env_valid(monkeypatch): + """Ensure module-level CONF_DIR/INVALID_CONF_ENV reflect a valid env path at import time.""" + with tempfile.TemporaryDirectory() as tmp: + valid_conf = Path(tmp) / "conf" + valid_conf.mkdir() + monkeypatch.setenv("CONF_DIR", str(valid_conf)) + # Force fresh import under unique name + spec = importlib.util.spec_from_file_location("conf_path_valid_mod", conf_path_module.__file__) + mod = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = mod + spec.loader.exec_module(mod) # type: ignore[attr-defined] + assert mod.CONF_DIR == str(valid_conf) # type: ignore[attr-defined] + assert mod.INVALID_CONF_ENV is None # type: ignore[attr-defined] + + +def test_module_level_constants_env_invalid(monkeypatch): + """Ensure module-level INVALID_CONF_ENV is populated when invalid path provided at import.""" + bad_path = "/no/such/dir/abcXYZ123" + monkeypatch.setenv("CONF_DIR", bad_path) + spec = importlib.util.spec_from_file_location("conf_path_invalid_mod", conf_path_module.__file__) + mod = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = mod + spec.loader.exec_module(mod) # type: ignore[attr-defined] + # Module constant should fall back to repository conf directory + assert mod.CONF_DIR.endswith(os.path.join("EventGate", "conf")) # type: ignore[attr-defined] + assert mod.INVALID_CONF_ENV == os.path.abspath(bad_path) # type: ignore[attr-defined] diff --git a/tests/test_event_gate_lambda.py b/tests/test_event_gate_lambda.py index 2b84da1..65ace58 100644 --- a/tests/test_event_gate_lambda.py +++ b/tests/test_event_gate_lambda.py @@ -20,40 +20,50 @@ import types import pytest from unittest.mock import patch, MagicMock - -# Inject dummy confluent_kafka if not installed so patching works -if "confluent_kafka" not in sys.modules: - dummy_ck = types.ModuleType("confluent_kafka") - - class DummyProducer: # minimal interface - def __init__(self, *a, **kw): - pass - - def produce(self, *a, **kw): - cb = kw.get("callback") - if cb: - cb(None, None) - - def flush(self): - return None - - dummy_ck.Producer = DummyProducer - sys.modules["confluent_kafka"] = dummy_ck - -# Inject dummy psycopg2 (optional dependency) -if "psycopg2" not in sys.modules: - sys.modules["psycopg2"] = types.ModuleType("psycopg2") +import importlib.util +from contextlib import ExitStack @pytest.fixture(scope="module") def event_gate_module(): started_patches = [] + exit_stack = ExitStack() def start_patch(target): p = patch(target) started_patches.append(p) return p.start() + # Local, temporary dummy modules only if truly missing + # confluent_kafka + if importlib.util.find_spec("confluent_kafka") is None: # pragma: no cover - environment dependent + dummy_ck = types.ModuleType("confluent_kafka") + + class DummyProducer: # minimal interface + def __init__(self, *a, **kw): + pass + + def produce(self, *a, **kw): + cb = kw.get("callback") + if cb: + cb(None, None) + + def flush(self): # noqa: D401 - simple stub + return None + + dummy_ck.Producer = DummyProducer # type: ignore[attr-defined] + + class DummyKafkaException(Exception): + pass + + dummy_ck.KafkaException = DummyKafkaException # type: ignore[attr-defined] + exit_stack.enter_context(patch.dict(sys.modules, {"confluent_kafka": dummy_ck})) + + # psycopg2 optional dependency + if importlib.util.find_spec("psycopg2") is None: # pragma: no cover - environment dependent + dummy_pg = types.ModuleType("psycopg2") + exit_stack.enter_context(patch.dict(sys.modules, {"psycopg2": dummy_pg})) + mock_requests_get = start_patch("requests.get") mock_requests_get.return_value.json.return_value = {"key": base64.b64encode(b"dummy_der").decode("utf-8")} @@ -76,11 +86,11 @@ def get(self): return {"Body": MockS3ObjectBody()} class MockS3Bucket: - def Object(self, key): + def Object(self, key): # noqa: D401 - simple proxy return MockS3Object() class MockS3Resource: - def Bucket(self, name): + def Bucket(self, name): # noqa: D401 - simple proxy return MockS3Bucket() mock_session = start_patch("boto3.Session") @@ -101,6 +111,7 @@ def Bucket(self, name): for p in started_patches: p.stop() + exit_stack.close() @pytest.fixture From 221108cf9dc4e2c467812b8b5f0ce24bf6103e29 Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Wed, 10 Sep 2025 10:42:12 +0200 Subject: [PATCH 07/12] Add more test --- tests/test_event_gate_lambda.py | 15 ++++ tests/test_event_gate_lambda_local_access.py | 58 ++++++++++++++++ tests/test_extract_token.py | 72 ++++++++++++++++++++ tests/test_writer_eventbridge.py | 53 ++++++++++++++ tests/test_writer_kafka.py | 62 +++++++++++++++++ 5 files changed, 260 insertions(+) create mode 100644 tests/test_event_gate_lambda_local_access.py create mode 100644 tests/test_extract_token.py create mode 100644 tests/test_writer_eventbridge.py create mode 100644 tests/test_writer_kafka.py diff --git a/tests/test_event_gate_lambda.py b/tests/test_event_gate_lambda.py index 65ace58..247a658 100644 --- a/tests/test_event_gate_lambda.py +++ b/tests/test_event_gate_lambda.py @@ -316,3 +316,18 @@ def test_internal_error_path(event_gate_module, make_event): assert resp["statusCode"] == 500 body = json.loads(resp["body"]) assert body["errors"][0]["type"] == "internal" + + +def test_post_invalid_json_body(event_gate_module, make_event): + # Invalid JSON triggers json.loads exception and returns 500 internal error + event = make_event( + "/topics/{topic_name}", + method="POST", + topic="public.cps.za.test", + body="{invalid json", + headers={"Authorization": "Bearer token"}, + ) + resp = event_gate_module.lambda_handler(event, None) + assert resp["statusCode"] == 500 + body = json.loads(resp["body"]) + assert any(e["type"] == "internal" for e in body["errors"]) # internal error path diff --git a/tests/test_event_gate_lambda_local_access.py b/tests/test_event_gate_lambda_local_access.py new file mode 100644 index 0000000..525cdcb --- /dev/null +++ b/tests/test_event_gate_lambda_local_access.py @@ -0,0 +1,58 @@ +import importlib +import io +import json +from unittest.mock import patch, MagicMock + + +def test_local_access_config_branch(): + import src.event_gate_lambda as egl # initial import (s3 path) just to ensure module present + + original_open = open # noqa: A001 + + local_config = { + "access_config": "conf/access.json", + "token_provider_url": "https://example/token", + "token_public_key_url": "https://example/key", + "kafka_bootstrap_server": "localhost:9092", + "event_bus_arn": "arn:aws:events:region:acct:event-bus/bus", + } + + access_payload = { + "public.cps.za.runs": ["User"], + "public.cps.za.dlchange": ["User"], + "public.cps.za.test": ["User"], + } + + def open_side_effect(path, *args, **kwargs): # noqa: D401 + p = str(path) + if p.endswith("config.json"): + return io.StringIO(json.dumps(local_config)) + if p.endswith("conf/access.json") or p == "conf/access.json": + return io.StringIO(json.dumps(access_payload)) + return original_open(path, *args, **kwargs) + + with ( + patch("requests.get") as mock_get, + patch("cryptography.hazmat.primitives.serialization.load_der_public_key") as mock_load_key, + patch("boto3.Session") as mock_session, + patch("boto3.client") as mock_boto_client, + patch("confluent_kafka.Producer"), + patch("builtins.open", side_effect=open_side_effect), + ): + mock_get.return_value.json.return_value = {"key": "ZHVtbXk="} # base64 for 'dummy' + mock_load_key.return_value = object() + + class MockS3: + def Bucket(self, name): # noqa: D401 + raise AssertionError("S3 branch should not be used for local access_config") + + mock_session.return_value.resource.return_value = MockS3() + mock_events = MagicMock() + mock_events.put_events.return_value = {"FailedEntryCount": 0} + mock_boto_client.return_value = mock_events + + # Force reload so import-level logic re-executes with patched open + egl_reloaded = importlib.reload(egl) + + assert not egl_reloaded.CONFIG["access_config"].startswith("s3://") # type: ignore[attr-defined] + assert egl_reloaded.ACCESS["public.cps.za.test"] == ["User"] # type: ignore[attr-defined] diff --git a/tests/test_extract_token.py b/tests/test_extract_token.py new file mode 100644 index 0000000..5a896a7 --- /dev/null +++ b/tests/test_extract_token.py @@ -0,0 +1,72 @@ +import base64 +import json +import importlib +from unittest.mock import patch, MagicMock +import pytest + + +@pytest.fixture(scope="module") +def egl_mod(): + patches = [] + + def start(target): + p = patch(target) + patches.append(p) + return p.start() + + # Patch requests.get for public key fetch + mock_get = start("requests.get") + mock_get.return_value.json.return_value = {"key": base64.b64encode(b"dummy_der").decode("utf-8")} + + # Patch crypto key loader + start_loader = start("cryptography.hazmat.primitives.serialization.load_der_public_key") + start_loader.return_value = object() + + # Patch boto3.Session resource for S3 to avoid bucket validation/network + class MockBody: + def read(self): + return json.dumps( + { + "public.cps.za.runs": ["User"], + "public.cps.za.dlchange": ["User"], + "public.cps.za.test": ["User"], + } + ).encode("utf-8") + + class MockObject: + def get(self): + return {"Body": MockBody()} + + class MockBucket: + def Object(self, key): # noqa: D401 + return MockObject() + + class MockS3: + def Bucket(self, name): # noqa: D401 + return MockBucket() + + mock_session = start("boto3.Session") + mock_session.return_value.resource.return_value = MockS3() + + # Patch boto3.client for EventBridge + mock_client = start("boto3.client") + mock_events = MagicMock() + mock_events.put_events.return_value = {"FailedEntryCount": 0} + mock_client.return_value = mock_events + + # Patch Kafka Producer + start("confluent_kafka.Producer") + + module = importlib.import_module("src.event_gate_lambda") + yield module + + for p in patches: + p.stop() + + +def test_extract_token_empty(egl_mod): + assert egl_mod.extract_token({}) == "" + + +def test_extract_token_direct_bearer_header(egl_mod): + token = egl_mod.extract_token({"Bearer": " tok123 "}) diff --git a/tests/test_writer_eventbridge.py b/tests/test_writer_eventbridge.py new file mode 100644 index 0000000..0d67ea9 --- /dev/null +++ b/tests/test_writer_eventbridge.py @@ -0,0 +1,53 @@ +import logging +from unittest.mock import MagicMock, patch + +import src.writer_eventbridge as we + + +def test_write_skips_when_no_event_bus(): + we.STATE["logger"] = logging.getLogger("test") + we.STATE["event_bus_arn"] = "" # no bus configured + ok, err = we.write("topic", {"k": 1}) + assert ok and err is None + + +def test_write_success(): + we.STATE["logger"] = logging.getLogger("test") + we.STATE["event_bus_arn"] = "arn:aws:events:region:acct:event-bus/bus" # fake + mock_client = MagicMock() + mock_client.put_events.return_value = {"FailedEntryCount": 0, "Entries": []} + we.STATE["client"] = mock_client + ok, err = we.write("topic", {"k": 2}) + assert ok and err is None + mock_client.put_events.assert_called_once() + + +def test_write_failed_entries(): + we.STATE["logger"] = logging.getLogger("test") + we.STATE["event_bus_arn"] = "arn:aws:events:region:acct:event-bus/bus" + mock_client = MagicMock() + mock_client.put_events.return_value = { + "FailedEntryCount": 1, + "Entries": [ + {"EventId": "1", "ErrorCode": "Err", "ErrorMessage": "Bad"}, + {"EventId": "2"}, + ], + } + we.STATE["client"] = mock_client + ok, err = we.write("topic", {"k": 3}) + assert not ok and "EventBridge" in err + + +def test_write_client_error(): + from botocore.exceptions import BotoCoreError + + class DummyError(BotoCoreError): + pass + + we.STATE["logger"] = logging.getLogger("test") + we.STATE["event_bus_arn"] = "arn:aws:events:region:acct:event-bus/bus" + mock_client = MagicMock() + mock_client.put_events.side_effect = DummyError() + we.STATE["client"] = mock_client + ok, err = we.write("topic", {"k": 4}) + assert not ok and err is not None diff --git a/tests/test_writer_kafka.py b/tests/test_writer_kafka.py new file mode 100644 index 0000000..6017b3e --- /dev/null +++ b/tests/test_writer_kafka.py @@ -0,0 +1,62 @@ +import json +import logging +from types import SimpleNamespace +import src.writer_kafka as wk + + +class FakeProducerSuccess: + def __init__(self, *a, **kw): + self.produced = [] + + def produce(self, topic, key, value, callback): # noqa: D401 + self.produced.append((topic, key, value)) + # simulate success + callback(None, SimpleNamespace()) + + def flush(self): + return None + + +class FakeProducerError(FakeProducerSuccess): + def produce(self, topic, key, value, callback): # noqa: D401 + # simulate async error + callback("ERR", None) + + +def test_write_skips_when_producer_none(monkeypatch): + wk.STATE["logger"] = logging.getLogger("test") + wk.STATE["producer"] = None + ok, err = wk.write("topic", {"a": 1}) + assert ok and err is None + + +def test_write_success(monkeypatch): + wk.STATE["logger"] = logging.getLogger("test") + wk.STATE["producer"] = FakeProducerSuccess() + ok, err = wk.write("topic", {"b": 2}) + assert ok and err is None + + +def test_write_async_error(monkeypatch): + wk.STATE["logger"] = logging.getLogger("test") + wk.STATE["producer"] = FakeProducerError() + ok, err = wk.write("topic", {"c": 3}) + assert not ok and "ERR" in err + + +class DummyKafkaException(Exception): + pass + + +def test_write_kafka_exception(monkeypatch): + wk.STATE["logger"] = logging.getLogger("test") + + class RaisingProducer(FakeProducerSuccess): + def produce(self, *a, **kw): # noqa: D401 + raise DummyKafkaException("boom") + + # Monkeypatch KafkaException symbol used in except + monkeypatch.setattr(wk, "KafkaException", DummyKafkaException, raising=False) + wk.STATE["producer"] = RaisingProducer() + ok, err = wk.write("topic", {"d": 4}) + assert not ok and "boom" in err From 6f969ce97b5ef0683dfaceceaa39546a0127e3de Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Wed, 10 Sep 2025 10:56:42 +0200 Subject: [PATCH 08/12] Enhance Kafka writer with configurable flush timeout and improved error handling --- src/writer_kafka.py | 13 ++++++++++++- tests/test_event_gate_lambda.py | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/writer_kafka.py b/src/writer_kafka.py index e04e935..a8790c1 100644 --- a/src/writer_kafka.py +++ b/src/writer_kafka.py @@ -5,6 +5,7 @@ import json import logging +import os from typing import Any, Dict, Optional, Tuple from confluent_kafka import Producer @@ -18,6 +19,8 @@ class KafkaException(Exception): # type: ignore STATE: Dict[str, Any] = {"logger": logging.getLogger(__name__), "producer": None} +# Configurable flush timeout (seconds) to avoid hanging indefinitely +_KAFKA_FLUSH_TIMEOUT_SEC = float(os.environ.get("KAFKA_FLUSH_TIMEOUT", "5")) def init(logger: logging.Logger, config: Dict[str, Any]) -> None: @@ -81,7 +84,15 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str] value=json.dumps(message).encode("utf-8"), callback=lambda err, msg: (errors.append(str(err)) if err is not None else None), ) - producer.flush() + try: + remaining = producer.flush(_KAFKA_FLUSH_TIMEOUT_SEC) # type: ignore[arg-type] + except TypeError: # Fallback for stub producers without timeout parameter + remaining = producer.flush() # type: ignore[call-arg] + # remaining can be number of undelivered messages (confluent_kafka returns int) + if not errors and isinstance(remaining, int) and remaining > 0: + timeout_msg = f"Kafka flush timeout after {_KAFKA_FLUSH_TIMEOUT_SEC}s: {remaining} message(s) still pending" + logger.error(timeout_msg) + return False, timeout_msg except KafkaException as e: # narrow exception capture err_msg = f"The Kafka writer failed with unknown error: {str(e)}" logger.exception(err_msg) diff --git a/tests/test_event_gate_lambda.py b/tests/test_event_gate_lambda.py index 247a658..13ee3a1 100644 --- a/tests/test_event_gate_lambda.py +++ b/tests/test_event_gate_lambda.py @@ -203,6 +203,27 @@ def test_post_schema_validation_error(event_gate_module, make_event): assert body["errors"][0]["type"] == "validation" +def test_post_invalid_token_decode(event_gate_module, make_event, valid_payload): + class DummyJwtError(Exception): + pass + + # Patch jwt.decode to raise PyJWTError-like exception; use existing attribute if present + with patch.object( + event_gate_module.jwt, "decode", side_effect=event_gate_module.jwt.PyJWTError("bad"), create=True + ): + event = make_event( + "/topics/{topic_name}", + method="POST", + topic="public.cps.za.test", + body=valid_payload, + headers={"Authorization": "Bearer abc"}, + ) + resp = event_gate_module.lambda_handler(event, None) + assert resp["statusCode"] == 401 + body = json.loads(resp["body"]) + assert body["errors"][0]["type"] == "auth" + + # --- POST success & failure aggregation --- From 59ba78f54597a554d9ea4801730d57a844127795 Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Wed, 10 Sep 2025 11:04:16 +0200 Subject: [PATCH 09/12] Add QA tooling documentation and configuration for testing environment --- DEVELOPER.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- README.md | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/DEVELOPER.md b/DEVELOPER.md index fd7dbd0..fee972e 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -108,20 +108,62 @@ pytest tests/ This will execute all tests located in the tests directory. +### Focused / Selective Test Runs +Run a single test file: +```shell +pytest tests/test_writer_kafka.py -q +``` +Filter by keyword expression: +```shell +pytest -k kafka -q +``` +Run a single test function (node id): +```shell +pytest tests/test_event_gate_lambda.py::test_post_multiple_writer_failures -q +``` + ## Code Coverage Code coverage is collected using the pytest-cov coverage tool. To run the tests and collect coverage information, use the following command: ```shell -pytest --cov=. -v tests/ --cov-fail-under=80 +pytest --cov=. -v tests/ --cov-fail-under=80 --cov-report=term-missing ``` -This will execute all tests in the tests directory and generate a code coverage report. - -See the coverage report on the path: +This will execute all tests in the tests directory and generate a code coverage report with missing line details and enforce a minimum 80% threshold. +Open the HTML coverage report: ```shell open htmlcov/index.html ``` -## +### Test Environment Variables +The following environment variables influence runtime behavior under test: +- LOG_LEVEL – override logging verbosity (default INFO) +- KAFKA_FLUSH_TIMEOUT – seconds passed to confluent_kafka Producer.flush(timeout) (default 5) +- POSTGRES_SECRET_NAME / POSTGRES_SECRET_REGION – when set, writer_postgres.init() fetches secret via AWS Secrets Manager (tests usually leave unset or patch boto3) + +### Mocking / Isolation Strategy +- External network calls (requests.get for token public key, S3 object fetch, EventBridge put_events, Kafka producer) are monkeypatched or patched with unittest.mock before importing modules that perform side-effect initialization (notably src/event_gate_lambda). +- Tests that need to exercise alternate import-time paths (e.g., local vs S3 access_config) patch builtins.open and reload the module (see tests/test_event_gate_lambda_local_access.py). +- Optional dependencies (confluent_kafka, psycopg2) are stubbed only when truly absent using importlib.util.find_spec to avoid masking real installations. +- Kafka producer errors are simulated via callback error injection or custom stub classes; flush timeout path covered by setting KAFKA_FLUSH_TIMEOUT and custom producer returning non-zero remaining messages. + +### Type Checking / Lint / Format +Run static type checks: +```shell +mypy . +``` +Run lint (pylint over tracked Python files): +```shell +pylint $(git ls-files '*.py') +``` +Auto-format: +```shell +black $(git ls-files '*.py') +``` + +### Quick CI Smoke (local approximation) +```shell +mypy . && pylint $(git ls-files '*.py') && pytest --cov=. --cov-fail-under=80 -q +``` diff --git a/README.md b/README.md index ce1747c..6e50ade 100644 --- a/README.md +++ b/README.md @@ -128,12 +128,42 @@ Install Python tooling (Python 3.11 suggested) then: python -m venv .venv source .venv/bin/activate pip install -r requirements.txt +``` +Run full test suite (quiet): +``` pytest -q ``` -To invoke handler manually: +Run with coverage (terminal + per-file missing lines) enforcing 80% minimum: +``` +pytest --cov=. --cov-report=term-missing --cov-fail-under=80 +``` +Run a single test file or filtered tests: +``` +pytest tests/test_writer_kafka.py -q +pytest -k kafka -q +``` +Type checking & lint: +``` +mypy . +pylint $(git ls-files '*.py') +``` +Format: +``` +black $(git ls-files '*.py') +``` +Key environment variables for local runs / tests: +- LOG_LEVEL: Override logging level (default INFO) +- KAFKA_FLUSH_TIMEOUT: Seconds for Kafka producer.flush(timeout) (default 5) + +Kafka tests stub the real producer; EventBridge & S3 interactions are patched to avoid network calls. Tests that exercise module import side-effects (like loading config from S3) first patch boto3 / requests before importing the module to ensure deterministic behavior. + +To simulate local (non-S3) access config branch, see tests/test_event_gate_lambda_local_access.py which reloads the module with patched open(). + +To manually invoke the handler: ```python from src import event_gate_lambda as m -# craft an event similar to API Gateway proxy integration +resp = m.lambda_handler({"resource": "/topics"}, None) +print(resp) ``` ## Security & Authorization From 00677cb332d5d89480d4da2cfc181dbd62e43f5b Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Wed, 10 Sep 2025 11:36:39 +0200 Subject: [PATCH 10/12] Remove section on running Action locally from developer documentation --- DEVELOPER.md | 1 - 1 file changed, 1 deletion(-) diff --git a/DEVELOPER.md b/DEVELOPER.md index fee972e..55485e5 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -6,7 +6,6 @@ - [Run Black Tool Locally](#run-black-tool-locally) - [Run mypy Tool Locally](#run-mypy-tool-locally) - [Run Unit Test](#running-unit-test) -- [Run Action Locally](#run-action-locally) ## Get Started From a59748aabce282cf0996396476ce6c942651ef8e Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Wed, 10 Sep 2025 14:43:31 +0200 Subject: [PATCH 11/12] Fixed PR comments --- DEVELOPER.md | 31 ------------------------------- src/conf_path.py | 24 +++++++++++++++++------- src/event_gate_lambda.py | 10 ++++++---- src/writer_eventbridge.py | 16 ++++++++++++++++ src/writer_kafka.py | 20 ++++++++++++++++++-- src/writer_postgres.py | 33 ++++++++++++++++++++++++++++++++- 6 files changed, 89 insertions(+), 45 deletions(-) diff --git a/DEVELOPER.md b/DEVELOPER.md index 55485e5..7811941 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -135,34 +135,3 @@ Open the HTML coverage report: ```shell open htmlcov/index.html ``` - -### Test Environment Variables -The following environment variables influence runtime behavior under test: -- LOG_LEVEL – override logging verbosity (default INFO) -- KAFKA_FLUSH_TIMEOUT – seconds passed to confluent_kafka Producer.flush(timeout) (default 5) -- POSTGRES_SECRET_NAME / POSTGRES_SECRET_REGION – when set, writer_postgres.init() fetches secret via AWS Secrets Manager (tests usually leave unset or patch boto3) - -### Mocking / Isolation Strategy -- External network calls (requests.get for token public key, S3 object fetch, EventBridge put_events, Kafka producer) are monkeypatched or patched with unittest.mock before importing modules that perform side-effect initialization (notably src/event_gate_lambda). -- Tests that need to exercise alternate import-time paths (e.g., local vs S3 access_config) patch builtins.open and reload the module (see tests/test_event_gate_lambda_local_access.py). -- Optional dependencies (confluent_kafka, psycopg2) are stubbed only when truly absent using importlib.util.find_spec to avoid masking real installations. -- Kafka producer errors are simulated via callback error injection or custom stub classes; flush timeout path covered by setting KAFKA_FLUSH_TIMEOUT and custom producer returning non-zero remaining messages. - -### Type Checking / Lint / Format -Run static type checks: -```shell -mypy . -``` -Run lint (pylint over tracked Python files): -```shell -pylint $(git ls-files '*.py') -``` -Auto-format: -```shell -black $(git ls-files '*.py') -``` - -### Quick CI Smoke (local approximation) -```shell -mypy . && pylint $(git ls-files '*.py') && pytest --cov=. --cov-fail-under=80 -q -``` diff --git a/src/conf_path.py b/src/conf_path.py index d150dc2..e1c37fc 100644 --- a/src/conf_path.py +++ b/src/conf_path.py @@ -13,17 +13,27 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import os -# Module providing reusable configuration directory resolution. -# Resolution order: -# 1. CONF_DIR env var if it exists and points to a directory -# 2. /conf (project_root = parent of this file's directory) -# 3. /conf (flattened deployment) -# 4. Fallback to /conf even if missing (subsequent file operations will raise) +"""Module providing reusable configuration directory resolution. +Resolution order: +1. CONF_DIR env var if it exists and points to a directory +2. /conf (project_root = parent of this file's directory) +3. /conf (flattened deployment) +4. Fallback to /conf even if missing (subsequent file operations will raise) +""" + +import os def resolve_conf_dir(env_var: str = "CONF_DIR"): + """Resolve the configuration directory path. + + Args: + env_var: Name of the environment variable to check first (defaults to CONF_DIR). + Returns: + Tuple (conf_dir, invalid_env) where conf_dir is the chosen directory path and + invalid_env is the rejected env var path if provided but invalid, else None. + """ project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) current_dir = os.path.dirname(__file__) diff --git a/src/event_gate_lambda.py b/src/event_gate_lambda.py index 345f307..f4eb4fd 100644 --- a/src/event_gate_lambda.py +++ b/src/event_gate_lambda.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +"""Event Gate Lambda function implementation.""" import base64 import json import logging @@ -29,6 +31,8 @@ from jsonschema import validate from jsonschema.exceptions import ValidationError +from . import writer_eventbridge, writer_kafka, writer_postgres + # Import configuration directory symbols with explicit ImportError fallback try: from .conf_path import CONF_DIR, INVALID_CONF_ENV # type: ignore[no-redef] @@ -39,8 +43,6 @@ _CONF_DIR = CONF_DIR _INVALID_CONF_ENV = INVALID_CONF_ENV -from . import writer_eventbridge, writer_kafka, writer_postgres - urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) logger = logging.getLogger(__name__) @@ -49,9 +51,9 @@ if not logger.handlers: logger.addHandler(logging.StreamHandler()) logger.debug("Initialized LOGGER") -logger.debug(f"Using CONF_DIR={_CONF_DIR}") +logger.debug("Using CONF_DIR=%s", _CONF_DIR) if _INVALID_CONF_ENV: - logger.warning(f"CONF_DIR env var set to non-existent path: {_INVALID_CONF_ENV}; fell back to {_CONF_DIR}") + logger.warning("CONF_DIR env var set to non-existent path: %s; fell back to %s", _INVALID_CONF_ENV, _CONF_DIR) with open(os.path.join(_CONF_DIR, "api.yaml"), "r", encoding="utf-8") as file: API = file.read() diff --git a/src/writer_eventbridge.py b/src/writer_eventbridge.py index 13dfab5..eae6332 100644 --- a/src/writer_eventbridge.py +++ b/src/writer_eventbridge.py @@ -1,3 +1,19 @@ +# +# Copyright 2025 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + """EventBridge writer module. Provides initialization and write functionality for publishing events to AWS EventBridge. diff --git a/src/writer_kafka.py b/src/writer_kafka.py index a8790c1..692aeaa 100644 --- a/src/writer_kafka.py +++ b/src/writer_kafka.py @@ -1,3 +1,19 @@ +# +# Copyright 2025 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + """Kafka writer module. Initializes a Confluent Kafka Producer and publishes messages for a topic. @@ -15,7 +31,7 @@ except (ImportError, ModuleNotFoundError): # pragma: no cover - fallback for test stub class KafkaException(Exception): # type: ignore - pass + """Fallback KafkaException if confluent_kafka is not installed.""" STATE: Dict[str, Any] = {"logger": logging.getLogger(__name__), "producer": None} @@ -77,7 +93,7 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str] errors: list[Any] = [] try: - logger.debug(f"Sending to kafka {topic_name}") + logger.debug("Sending to kafka %s", topic_name) producer.produce( topic_name, key="", diff --git a/src/writer_postgres.py b/src/writer_postgres.py index 14bcf93..28289e8 100644 --- a/src/writer_postgres.py +++ b/src/writer_postgres.py @@ -1,3 +1,19 @@ +# +# Copyright 2025 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + """Postgres writer module. Handles optional initialization via AWS Secrets Manager and topic-based inserts into Postgres. @@ -15,6 +31,21 @@ except ImportError: # pragma: no cover - environment without psycopg2 psycopg2 = None # type: ignore +# Define a unified psycopg2 error base for safe exception handling even if psycopg2 missing +if psycopg2 is not None: # type: ignore + try: # pragma: no cover - attribute presence depends on installed psycopg2 variant + PsycopgError = psycopg2.Error # type: ignore[attr-defined] + except AttributeError: # pragma: no cover + + class PsycopgError(Exception): # type: ignore + """Shim psycopg2 error base when psycopg2 provides no Error attribute.""" + +else: # fallback shim when psycopg2 absent + + class PsycopgError(Exception): # type: ignore + """Shim psycopg2 error base when psycopg2 is not installed.""" + + # Module level globals for typing _logger: logging.Logger = logging.getLogger(__name__) POSTGRES: Dict[str, Any] = {"database": ""} @@ -260,7 +291,7 @@ def write(topic_name: str, message: Dict[str, Any]) -> Tuple[bool, Optional[str] return False, msg connection.commit() # type: ignore - except Exception as e: # pragma: no cover - defensive (still tested though) + except (RuntimeError, PsycopgError) as e: # narrowed exception set err_msg = f"The Postgres writer with failed unknown error: {str(e)}" _logger.error(err_msg) return False, err_msg From 78fbc13f348c66738a3a6aceef9a3fcfa9c432e8 Mon Sep 17 00:00:00 2001 From: Oto Macenauer Date: Wed, 10 Sep 2025 14:51:37 +0200 Subject: [PATCH 12/12] Update Python environment setup and enhance local development documentation --- DEVELOPER.md | 4 ++-- README.md | 50 +++++++++--------------------------------------- requirements.txt | 2 +- 3 files changed, 12 insertions(+), 44 deletions(-) diff --git a/DEVELOPER.md b/DEVELOPER.md index 7811941..df23aca 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -18,8 +18,8 @@ cd EventGate ## Set Up Python Environment ```shell -python3 -m venv venv -source venv/bin/activate +python3 -m venv .venv +source .venv/bin/activate pip install -r requirements.txt ``` diff --git a/README.md b/README.md index 6e50ade..b404b26 100644 --- a/README.md +++ b/README.md @@ -123,48 +123,16 @@ Use when Kafka access needs Kerberos / SASL_SSL or custom `librdkafka` build. 4. `terraform apply` ## Local Development & Testing -Install Python tooling (Python 3.11 suggested) then: -``` -python -m venv .venv -source .venv/bin/activate -pip install -r requirements.txt -``` -Run full test suite (quiet): -``` -pytest -q -``` -Run with coverage (terminal + per-file missing lines) enforcing 80% minimum: -``` -pytest --cov=. --cov-report=term-missing --cov-fail-under=80 -``` -Run a single test file or filtered tests: -``` -pytest tests/test_writer_kafka.py -q -pytest -k kafka -q -``` -Type checking & lint: -``` -mypy . -pylint $(git ls-files '*.py') -``` -Format: -``` -black $(git ls-files '*.py') -``` -Key environment variables for local runs / tests: -- LOG_LEVEL: Override logging level (default INFO) -- KAFKA_FLUSH_TIMEOUT: Seconds for Kafka producer.flush(timeout) (default 5) -Kafka tests stub the real producer; EventBridge & S3 interactions are patched to avoid network calls. Tests that exercise module import side-effects (like loading config from S3) first patch boto3 / requests before importing the module to ensure deterministic behavior. - -To simulate local (non-S3) access config branch, see tests/test_event_gate_lambda_local_access.py which reloads the module with patched open(). - -To manually invoke the handler: -```python -from src import event_gate_lambda as m -resp = m.lambda_handler({"resource": "/topics"}, None) -print(resp) -``` +| Purpose | Relative link | +|---------|---------------| +| Get started | [Get Started](./DEVELOPER.md#get-started) | +| Python environment setup | [Set Up Python Environment](./DEVELOPER.md#set-up-python-environment) | +| Static code analysis (Pylint) | [Running Static Code Analysis](./DEVELOPER.md#running-static-code-analysis) | +| Formatting (Black) | [Run Black Tool Locally](./DEVELOPER.md#run-black-tool-locally) | +| Type checking (mypy) | [Run mypy Tool Locally](./DEVELOPER.md#run-mypy-tool-locally) | +| Unit tests | [Running Unit Test](./DEVELOPER.md#running-unit-test) | +| Code coverage | [Code Coverage](./DEVELOPER.md#code-coverage) | ## Security & Authorization - JWT tokens must be RS256 signed; the public key is fetched at cold start from `token_public_key_url` (DER base64 inside JSON `{ "key": "..." }`). diff --git a/requirements.txt b/requirements.txt index 965ad80..36aba72 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ PyJWT==2.10.1 requests==2.32.5 boto3==1.40.25 confluent-kafka==2.11.1 -psycopg2==2.9.10 \ No newline at end of file +psycopg2==2.9.10