From 6388c9b92c4f8f6f9dc636637f81f246b79205d1 Mon Sep 17 00:00:00 2001 From: jonavellecuerdo Date: Tue, 22 Aug 2023 15:57:39 -0400 Subject: [PATCH 1/2] IN-886 Refactor Carbon app to use new Data Warehouse secret Why these changes are being introduced: * Use the JSON formatted secret in Secrets Manager to obtain credentials for the Data Warehouse and remove redundant secrets. How this addresses that need: * Configure the engine with 'CONNECTION_STRING' * Create database connection test and add CLI option to run test * Add Make command to run database connection test * Remove unused 'click.echo' commands * Add error handling when retrieving database versions Side effects of this change: * None Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/IN-886 --- Makefile | 3 +++ README.md | 15 ++++++++++++++- carbon/cli.py | 47 ++++++++++++++++++++++++++--------------------- carbon/db.py | 26 ++++++++++++++++++++++++-- tests/test_cli.py | 7 +++++++ 5 files changed, 74 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index de8ee2b..146bbb8 100644 --- a/Makefile +++ b/Makefile @@ -96,3 +96,6 @@ dist-stage: docker login -u AWS -p $$(aws ecr get-login-password --region us-east-1) $(ECR_URL_STAGE) docker push $(ECR_URL_STAGE):latest docker push $(ECR_URL_STAGE):`git describe --always` + +database-connection-test-stage: # use after the Data Warehouse password is changed every year to confirm that the new password works. + aws ecs run-task --cluster carbon-ecs-stage --task-definition carbon-ecs-stage-people --launch-type="FARGATE" --region us-east-1 --network-configuration '{"awsvpcConfiguration": {"subnets": ["subnet-05df31ac28dd1a4b0","subnet-04cfa272d4f41dc8a"], "securityGroups": ["sg-0f11e2619db7da196"],"assignPublicIp": "DISABLED"}}' --overrides '{"containerOverrides": [ {"name": "carbon-ecs-stage", "command": ["--database_connection_test"]}]}' diff --git a/README.md b/README.md index 20bbdea..3c67f2e 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,20 @@ From the project folder: 1. Download Oracle Instant Client (basiclite is sufficient) and set the ORACLE_LIB_DIR env variable. -2. Run pipenv run patronload. +2. Run `pipenv run carbon`. + +## Connecting to the Data Warehouse + +The password for the Data Warehouse is updated each year. To verify that the updated password works, the app must be run as an ECS task in the stage environment because Cloudconnector is not enabled in `dev1`. The app can run a database connection test when called with the flag, `--database_connection_test`. + +1. Export stage credentials and set `ECR_NAME_STAGE` and `ECR_URL_STAGE` env variables. +2. Run `make install`. +3. Run `make database-connection-test-stage`. +4. View the logs from the ECS task run on CloudWatch. + * On CloudWatch, select the `carbon-ecs-stage` log group. + * Select the most recent log stream. + * Verify that the following log is included: + > Successfully connected to the Data Warehouse: \ ## Deploying diff --git a/carbon/cli.py b/carbon/cli.py index 1f01278..0dfe298 100644 --- a/carbon/cli.py +++ b/carbon/cli.py @@ -12,7 +12,8 @@ @click.command() @click.version_option() -def main() -> None: +@click.option("--database_connection_test", is_flag=True) +def main(*, database_connection_test: bool) -> None: """Generate feeds for Symplectic Elements. Specify which FEED_TYPE should be generated. This should be either @@ -33,23 +34,27 @@ def main() -> None: --ftp should be used. """ config_values = load_config_values() - sns_log(config_values=config_values, status="start") - - try: - root_logger = logging.getLogger() - logger.info(configure_logger(root_logger, os.getenv("LOG_LEVEL", "INFO"))) - configure_sentry() - logger.info( - "Carbon config settings loaded for environment: %s", - config_values["WORKSPACE"], - ) - - engine.configure(config_values["CONNECTION_STRING"]) - - click.echo(f"Starting carbon run for {config_values['FEED_TYPE']}") - FTPFeeder({"feed_type": config_values["FEED_TYPE"]}, config_values).run() - click.echo(f"Finished carbon run for {config_values['FEED_TYPE']}") - except RuntimeError: - sns_log(config_values=config_values, status="fail") - else: - sns_log(config_values=config_values, status="success") + # [TEMP]: The connection string must use 'oracle+oracledb' to differentiate + # between the cx_Oracle and python-oracledb drivers + config_values["CONNECTION_STRING"] = config_values["CONNECTION_STRING"].replace( + "oracle", "oracle+oracledb" + ) + root_logger = logging.getLogger() + logger.info(configure_logger(root_logger, os.getenv("LOG_LEVEL", "INFO"))) + configure_sentry() + logger.info( + "Carbon config settings loaded for environment: %s", + config_values["WORKSPACE"], + ) + + engine.configure(config_values["CONNECTION_STRING"], thick_mode=True) + engine.run_connection_test() + + if not database_connection_test: + sns_log(config_values=config_values, status="start") + try: + FTPFeeder({"feed_type": config_values["FEED_TYPE"]}, config_values).run() + except Exception as error: # noqa: BLE001 + sns_log(config_values=config_values, status="fail", error=error) + else: + sns_log(config_values=config_values, status="success") diff --git a/carbon/db.py b/carbon/db.py index 3b5b996..a17032d 100644 --- a/carbon/db.py +++ b/carbon/db.py @@ -1,4 +1,6 @@ +import logging import os +from typing import Any from sqlalchemy import ( Column, @@ -14,6 +16,8 @@ create_engine, ) +logger = logging.getLogger(__name__) + os.environ["NLS_LANG"] = "AMERICAN_AMERICA.UTF8" metadata = MetaData() @@ -101,8 +105,26 @@ def __call__(self) -> Engine: ) raise AttributeError(nonconfigured_engine_error_message) - def configure(self, conn: str) -> None: - self._engine = self._engine or create_engine(conn) + def configure(self, conn: str, **kwargs: Any) -> None: # noqa: ANN401 + self._engine = self._engine or create_engine(conn, **kwargs) + + def run_connection_test(self) -> None: + logger.info("Testing connection to the Data Warehouse") + try: + connection = self._engine.connect() # type: ignore[union-attr] + except Exception as error: + error_message = f"Failed to connect to the Data Warehouse: {error}" + logger.exception(error_message) + else: + dbapi_connection = connection.connection + version = ( + dbapi_connection.version if hasattr(dbapi_connection, "version") else "" + ) + logger.info( + "Successfully connected to the Data Warehouse: %s", + version, # type: ignore[union-attr] + ) + connection.close() # create the database engine diff --git a/tests/test_cli.py b/tests/test_cli.py index 37dc617..36d1675 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -122,3 +122,10 @@ def test_file_is_ftped( assert result.exit_code == 0 assert os.path.exists(os.path.join(ftp_directory, "people.xml")) + + +def test_cli_database_connection_success(caplog, runner): + result = runner.invoke(main, ["--database_connection_test"]) + assert result.exit_code == 0 + assert "Testing connection to the Data Warehouse" in caplog.text + assert "Successfully connected to the Data Warehouse" in caplog.text From 055cc63215bd3ebc03f39627c6c927e7a9c248ac Mon Sep 17 00:00:00 2001 From: jonavellecuerdo Date: Wed, 23 Aug 2023 14:57:28 -0400 Subject: [PATCH 2/2] Address comments * Update README * Clean up variable name 'conn' * Add docstring to DatabaseEngine.run_connection_test --- README.md | 23 ++++++++++++++--------- carbon/app.py | 8 ++++---- carbon/db.py | 9 +++++++-- tests/conftest.py | 30 +++++++++++++++--------------- 4 files changed, 40 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 3c67f2e..ac837b3 100644 --- a/README.md +++ b/README.md @@ -10,31 +10,36 @@ Carbon is a tool for generating a feed of people that can be loaded into Symplec * To lint the repo: `make lint` * To run the app: `pipenv run carbon --help` -The Data Warehouse runs on a older version of Oracle that necessitates the thick mode of python-oracledb which requires the Oracle Instant Client Library (this app was developed with version 21.9.0.0.0.). The test suite uses SQLite, so you can develop and test without connecting to the Data Warehouse. - -If you do need to connect to the data warehouse, you have two options, one using Docker and one without. +The Data Warehouse runs on an older version of Oracle that necessitates the `thick` mode of python-oracledb, which requires the Oracle Instant Client Library (this app was developed with version 21.9.0.0.0). The test suite uses SQLite, so you can develop and test without connecting to the Data Warehouse. ### With Docker -Note: as of this writing, the Apple M1 Macs cannot run Oracle Instant Client, so Docker is the only option for development on those machines. +Note: As of this writing, the Apple M1 Macs cannot run Oracle Instant Client, so Docker is the only option for development on those machines. From the project folder: -1. Run `make dependencies` with appropriate AWS credentials. +1. Export AWS credentials for the `dev1` environment. + +2. Run `make dependencies` to download the Oracle Instant Client from S3. + +3. Run `make dist-dev` to build the Docker container image. + +4. Run `make publish-dev` to push the Docker container image to ECR for the `dev1` environment. + +5. Run any `make` commands for testing the application. -2. Run `make dist-dev` to build the container. +Any tests that involve connecting to the Data Warehouse will need to be run as an ECS task in `stage`, which requires building and publishing the Docker container image to ECR for the `stage` environment. As noted in step 1, the appropriate AWS credentials for the `stage` must be set to run the commands for building and publishing the Docker container image. The `ECR_NAME_STAGE` and `ECR_URL_STAGE` environment variables must also be set; the values correspond to the 'Repository name' and 'URI' indicated on ECR for the container image, respectively. -3. Run `docker run carbon-dev:latest`. ### Without Docker -1. Download Oracle Instant Client (basiclite is sufficient) and set the ORACLE_LIB_DIR env variable. +1. Download Oracle Instant Client (basiclite is sufficient) and set the `ORACLE_LIB_DIR` env variable. 2. Run `pipenv run carbon`. ## Connecting to the Data Warehouse -The password for the Data Warehouse is updated each year. To verify that the updated password works, the app must be run as an ECS task in the stage environment because Cloudconnector is not enabled in `dev1`. The app can run a database connection test when called with the flag, `--database_connection_test`. +The password for the Data Warehouse is updated each year. To verify that the updated password works, the app must be run as an ECS task in the `stage` environment because Cloudconnector is not enabled in `dev1`. The app can run a database connection test when called with the flag, `--database_connection_test`. 1. Export stage credentials and set `ECR_NAME_STAGE` and `ECR_URL_STAGE` env variables. 2. Run `make install`. diff --git a/carbon/app.py b/carbon/app.py index 471689a..dac9f44 100644 --- a/carbon/app.py +++ b/carbon/app.py @@ -147,8 +147,8 @@ def people() -> Generator[dict[str, Any], Any, None]: .where(persons.c.PERSONNEL_SUBAREA_CODE.in_(PS_CODES)) .where(func.upper(persons.c.JOB_TITLE).in_(TITLES)) ) - with closing(engine().connect()) as conn: - result = conn.execute(sql) + with closing(engine().connect()) as connection: + result = connection.execute(sql) for row in result: yield dict(zip(result.keys(), row, strict=True)) @@ -165,8 +165,8 @@ def articles() -> Generator[dict[str, Any], Any, None]: .where(aa_articles.c.DOI.is_not(None)) .where(aa_articles.c.MIT_ID.is_not(None)) ) - with closing(engine().connect()) as conn: - result = conn.execute(sql) + with closing(engine().connect()) as connection: + result = connection.execute(sql) for row in result: yield dict(zip(result.keys(), row, strict=True)) diff --git a/carbon/db.py b/carbon/db.py index a17032d..7702268 100644 --- a/carbon/db.py +++ b/carbon/db.py @@ -105,10 +105,15 @@ def __call__(self) -> Engine: ) raise AttributeError(nonconfigured_engine_error_message) - def configure(self, conn: str, **kwargs: Any) -> None: # noqa: ANN401 - self._engine = self._engine or create_engine(conn, **kwargs) + def configure(self, connection_string: str, **kwargs: Any) -> None: # noqa: ANN401 + self._engine = self._engine or create_engine(connection_string, **kwargs) def run_connection_test(self) -> None: + """Test connection to the Data Warehouse. + + Verify that the provided Data Warehouse credentials can be used + to successfully connect to the Data Warehouse. + """ logger.info("Testing connection to the Data Warehouse") try: connection = self._engine.connect() # type: ignore[union-attr] diff --git a/tests/conftest.py b/tests/conftest.py index 973d38e..757eccf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,23 +95,23 @@ def ftp_server_wrapper(ftp_server): @pytest.fixture def _load_data(records, aa_data): - with closing(engine().connect()) as conn: - conn.execute(persons.delete()) - conn.execute(orcids.delete()) - conn.execute(dlcs.delete()) - conn.execute(aa_articles.delete()) + with closing(engine().connect()) as connection: + connection.execute(persons.delete()) + connection.execute(orcids.delete()) + connection.execute(dlcs.delete()) + connection.execute(aa_articles.delete()) for r in records: - conn.execute(persons.insert(), r["person"]) - conn.execute(orcids.insert(), r["orcid"]) - conn.execute(dlcs.insert(), r["dlc"]) - conn.execute(aa_articles.insert(), aa_data) - conn.commit() + connection.execute(persons.insert(), r["person"]) + connection.execute(orcids.insert(), r["orcid"]) + connection.execute(dlcs.insert(), r["dlc"]) + connection.execute(aa_articles.insert(), aa_data) + connection.commit() yield - with closing(engine().connect()) as conn: - conn.execute(persons.delete()) - conn.execute(orcids.delete()) - conn.execute(dlcs.delete()) - conn.execute(aa_articles.delete()) + with closing(engine().connect()) as connection: + connection.execute(persons.delete()) + connection.execute(orcids.delete()) + connection.execute(dlcs.delete()) + connection.execute(aa_articles.delete()) @pytest.fixture