MITLibraries · jonavellecuerdo · Aug 25, 2023 · Aug 22, 2023 · Aug 23, 2023 · ehanson8
diff --git a/Makefile b/Makefile
@@ -96,3 +96,6 @@ dist-stage:
 	docker login -u AWS -p $$(aws ecr get-login-password --region us-east-1) $(ECR_URL_STAGE)
 	docker push $(ECR_URL_STAGE):latest
 	docker push $(ECR_URL_STAGE):`git describe --always`
+
+database-connection-test-stage: # use after the Data Warehouse password is changed every year to confirm that the new password works.
+	aws ecs run-task --cluster carbon-ecs-stage --task-definition carbon-ecs-stage-people --launch-type="FARGATE" --region us-east-1 --network-configuration '{"awsvpcConfiguration": {"subnets": ["subnet-05df31ac28dd1a4b0","subnet-04cfa272d4f41dc8a"], "securityGroups": ["sg-0f11e2619db7da196"],"assignPublicIp": "DISABLED"}}' --overrides '{"containerOverrides": [ {"name": "carbon-ecs-stage", "command": ["--database_connection_test"]}]}'
diff --git a/README.md b/README.md
@@ -10,27 +10,45 @@ Carbon is a tool for generating a feed of people that can be loaded into Symplec
 * To lint the repo: `make lint`
 * To run the app: `pipenv run carbon --help`
 
-The Data Warehouse runs on a older version of Oracle that necessitates the thick mode of python-oracledb which requires the Oracle Instant Client Library (this app was developed with version 21.9.0.0.0.). The test suite uses SQLite, so you can develop and test without connecting to the Data Warehouse.
-
-If you do need to connect to the data warehouse, you have two options, one using Docker and one without.
+The Data Warehouse runs on an older version of Oracle that necessitates the `thick` mode of python-oracledb, which requires the Oracle Instant Client Library (this app was developed with version 21.9.0.0.0). The test suite uses SQLite, so you can develop and test without connecting to the Data Warehouse.
 
 ### With Docker
 
-Note: as of this writing, the Apple M1 Macs cannot run Oracle Instant Client, so Docker is the only option for development on those machines.
+Note: As of this writing, the Apple M1 Macs cannot run Oracle Instant Client, so Docker is the only option for development on those machines.
 
 From the project folder:
 
-1. Run `make dependencies` with appropriate AWS credentials.
+1. Export AWS credentials for the `dev1` environment.
+
+2. Run `make dependencies` to download the Oracle Instant Client from S3.
+
+3. Run `make dist-dev` to build the Docker container image.
+
+4. Run `make publish-dev` to push the Docker container image to ECR for the `dev1` environment. 
 
-2. Run `make dist-dev` to build the container.
+5. Run any `make` commands for testing the application.
+
+Any tests that involve connecting to the Data Warehouse will need to be run as an ECS task in `stage`, which requires building and publishing the Docker container image to ECR for the `stage` environment. As noted in step 1, the appropriate AWS credentials for the `stage` must be set to run the commands for building and publishing the Docker container image. The `ECR_NAME_STAGE` and `ECR_URL_STAGE` environment variables must also be set; the values correspond to the 'Repository name' and 'URI' indicated on ECR for the container image, respectively.
 
-3. Run `docker run carbon-dev:latest`.
 
 ### Without Docker
 
-1. Download Oracle Instant Client (basiclite is sufficient) and set the ORACLE_LIB_DIR env variable.
+1. Download Oracle Instant Client (basiclite is sufficient) and set the `ORACLE_LIB_DIR` env variable.
+
+2. Run `pipenv run carbon`.
+
+## Connecting to the Data Warehouse
+
+The password for the Data Warehouse is updated each year. To verify that the updated password works, the app must be run as an ECS task in the `stage` environment because Cloudconnector is not enabled in `dev1`. The app can run a database connection test when called with the flag, `--database_connection_test`.
 
-2. Run pipenv run patronload.
+1. Export stage credentials and set `ECR_NAME_STAGE` and `ECR_URL_STAGE` env variables.
+2. Run `make install`.
+3. Run `make database-connection-test-stage`.
+4. View the logs from the ECS task run on CloudWatch.
+   * On CloudWatch, select the `carbon-ecs-stage` log group.
+   * Select the most recent log stream.
+   * Verify that the following log is included:
+      > Successfully connected to the Data Warehouse: \<VERSION NUMBER\>
 
 ## Deploying
 

diff --git a/carbon/app.py b/carbon/app.py
@@ -147,8 +147,8 @@ def people() -> Generator[dict[str, Any], Any, None]:
         .where(persons.c.PERSONNEL_SUBAREA_CODE.in_(PS_CODES))
         .where(func.upper(persons.c.JOB_TITLE).in_(TITLES))
     )
-    with closing(engine().connect()) as conn:
-        result = conn.execute(sql)
+    with closing(engine().connect()) as connection:
+        result = connection.execute(sql)
         for row in result:
             yield dict(zip(result.keys(), row, strict=True))
 
@@ -165,8 +165,8 @@ def articles() -> Generator[dict[str, Any], Any, None]:
         .where(aa_articles.c.DOI.is_not(None))
         .where(aa_articles.c.MIT_ID.is_not(None))
     )
-    with closing(engine().connect()) as conn:
-        result = conn.execute(sql)
+    with closing(engine().connect()) as connection:
+        result = connection.execute(sql)
         for row in result:
             yield dict(zip(result.keys(), row, strict=True))
 

diff --git a/carbon/cli.py b/carbon/cli.py
@@ -12,7 +12,8 @@
 
 @click.command()
 @click.version_option()
-def main() -> None:
+@click.option("--database_connection_test", is_flag=True)
+def main(*, database_connection_test: bool) -> None:
     """Generate feeds for Symplectic Elements.
 
     Specify which FEED_TYPE should be generated. This should be either
@@ -33,23 +34,27 @@ def main() -> None:
     --ftp should be used.
     """
     config_values = load_config_values()
-    sns_log(config_values=config_values, status="start")
-
-    try:
-        root_logger = logging.getLogger()
-        logger.info(configure_logger(root_logger, os.getenv("LOG_LEVEL", "INFO")))
-        configure_sentry()
-        logger.info(
-            "Carbon config settings loaded for environment: %s",
-            config_values["WORKSPACE"],
-        )
-
-        engine.configure(config_values["CONNECTION_STRING"])
-
-        click.echo(f"Starting carbon run for {config_values['FEED_TYPE']}")
-        FTPFeeder({"feed_type": config_values["FEED_TYPE"]}, config_values).run()
-        click.echo(f"Finished carbon run for {config_values['FEED_TYPE']}")
-    except RuntimeError:
-        sns_log(config_values=config_values, status="fail")
-    else:
-        sns_log(config_values=config_values, status="success")
+    # [TEMP]: The connection string must use 'oracle+oracledb' to differentiate
+    # between the cx_Oracle and python-oracledb drivers
+    config_values["CONNECTION_STRING"] = config_values["CONNECTION_STRING"].replace(
+        "oracle", "oracle+oracledb"
+    )
+    root_logger = logging.getLogger()
+    logger.info(configure_logger(root_logger, os.getenv("LOG_LEVEL", "INFO")))
+    configure_sentry()
+    logger.info(
+        "Carbon config settings loaded for environment: %s",
+        config_values["WORKSPACE"],
+    )
+
+    engine.configure(config_values["CONNECTION_STRING"], thick_mode=True)
+    engine.run_connection_test()
+
+    if not database_connection_test:
+        sns_log(config_values=config_values, status="start")
+        try:
+            FTPFeeder({"feed_type": config_values["FEED_TYPE"]}, config_values).run()
+        except Exception as error:  # noqa: BLE001
+            sns_log(config_values=config_values, status="fail", error=error)
+        else:
+            sns_log(config_values=config_values, status="success")
diff --git a/carbon/db.py b/carbon/db.py
@@ -1,4 +1,6 @@
+import logging
 import os
+from typing import Any
 
 from sqlalchemy import (
     Column,
@@ -14,6 +16,8 @@
     create_engine,
 )
 
+logger = logging.getLogger(__name__)
+
 os.environ["NLS_LANG"] = "AMERICAN_AMERICA.UTF8"
 
 metadata = MetaData()
@@ -101,8 +105,31 @@ def __call__(self) -> Engine:
         )
         raise AttributeError(nonconfigured_engine_error_message)
 
-    def configure(self, conn: str) -> None:
-        self._engine = self._engine or create_engine(conn)
+    def configure(self, connection_string: str, **kwargs: Any) -> None:  # noqa: ANN401
+        self._engine = self._engine or create_engine(connection_string, **kwargs)
+
+    def run_connection_test(self) -> None:
+        """Test connection to the Data Warehouse.
+
+        Verify that the provided Data Warehouse credentials can be used
+        to successfully connect to the Data Warehouse.
+        """
+        logger.info("Testing connection to the Data Warehouse")
+        try:
+            connection = self._engine.connect()  # type: ignore[union-attr]
+        except Exception as error:
+            error_message = f"Failed to connect to the Data Warehouse: {error}"
+            logger.exception(error_message)
+        else:
+            dbapi_connection = connection.connection
+            version = (
+                dbapi_connection.version if hasattr(dbapi_connection, "version") else ""
+            )
+            logger.info(
+                "Successfully connected to the Data Warehouse: %s",
+                version,  # type: ignore[union-attr]
+            )
+            connection.close()
 
 
 # create the database engine

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -95,23 +95,23 @@ def ftp_server_wrapper(ftp_server):
 
 @pytest.fixture
 def _load_data(records, aa_data):
-    with closing(engine().connect()) as conn:
-        conn.execute(persons.delete())
-        conn.execute(orcids.delete())
-        conn.execute(dlcs.delete())
-        conn.execute(aa_articles.delete())
+    with closing(engine().connect()) as connection:
+        connection.execute(persons.delete())
+        connection.execute(orcids.delete())
+        connection.execute(dlcs.delete())
+        connection.execute(aa_articles.delete())
         for r in records:
-            conn.execute(persons.insert(), r["person"])
-            conn.execute(orcids.insert(), r["orcid"])
-            conn.execute(dlcs.insert(), r["dlc"])
-        conn.execute(aa_articles.insert(), aa_data)
-        conn.commit()
+            connection.execute(persons.insert(), r["person"])
+            connection.execute(orcids.insert(), r["orcid"])
+            connection.execute(dlcs.insert(), r["dlc"])
+        connection.execute(aa_articles.insert(), aa_data)
+        connection.commit()
     yield
-    with closing(engine().connect()) as conn:
-        conn.execute(persons.delete())
-        conn.execute(orcids.delete())
-        conn.execute(dlcs.delete())
-        conn.execute(aa_articles.delete())
+    with closing(engine().connect()) as connection:
+        connection.execute(persons.delete())
+        connection.execute(orcids.delete())
+        connection.execute(dlcs.delete())
+        connection.execute(aa_articles.delete())
 
 
 @pytest.fixture

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -122,3 +122,10 @@ def test_file_is_ftped(
         assert result.exit_code == 0
 
     assert os.path.exists(os.path.join(ftp_directory, "people.xml"))
+
+
+def test_cli_database_connection_success(caplog, runner):
+    result = runner.invoke(main, ["--database_connection_test"])
+    assert result.exit_code == 0
+    assert "Testing connection to the Data Warehouse" in caplog.text
+    assert "Successfully connected to the Data Warehouse" in caplog.text