Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ The test suite uses SQLite, so you can develop and test without connecting to th
**Note**: As of this writing, Apple M1 Macs cannot run Oracle Instant Client.
* If you are on a machine that can run Oracle Instant Client, follow the steps outlined in [Without Docker](#without-docker).

The data retrieved by the Carbon application contains personally identifiable information (PII), so downloading the files is not recommended. However, if examining the files created by Carbon is **absolutely necessary** for testing purposes, this can be done on your local machine via a Docker container. For more information, please refer to the Confluence document: [How to download files from an application that connects to the Data Warehouse](https://mitlibraries.atlassian.net/wiki/spaces/~712020c6fcb37f39c94e54a6bfd09607f29eb4/pages/3469443097/Running+applications+in+a+local+Docker+Container).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great warning that I should reuse in alma-patronload


**Note:** Any downloaded files or `.env` files must be **immediately deleted** after testing is complete.

#### With Docker

1. Run `make dependencies` to download the Oracle Instant Client from S3.
Expand All @@ -47,6 +51,7 @@ The test suite uses SQLite, so you can develop and test without connecting to th

4. Run any `make` commands for testing the application. In the Makefile, the names of relevant make commands will contain the suffix '-with-docker'.


#### Without Docker

1. Download [Oracle Instant Client](https://www.oracle.com/database/technologies/instant-client/downloads.html) (basiclite is sufficient) and set the `ORACLE_LIB_DIR` env variable.
Expand Down
38 changes: 15 additions & 23 deletions carbon/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,26 +46,6 @@ def ntransfercmd(self, cmd: str, rest: str | int | None = None) -> tuple[socket,
)
return conn, size

def storbinary(
self,
cmd: str,
fp: IO, # type: ignore[override]
blocksize: int = 8192,
callback: Callable | None = None,
rest: str | None = None, # type: ignore[override]
) -> str:
"""Store a file in binary mode."""
self.voidcmd("TYPE I")
with self.transfercmd(cmd, rest) as conn:
while 1:
buf = fp.read(blocksize)
if not buf:
break
conn.sendall(buf)
if callback:
callback(buf)
return self.voidresp()


class FileWriter:
"""A writer that outputs normalized XML strings to a specified file.
Expand All @@ -91,8 +71,11 @@ def write(self, feed_type: str) -> None:
elif feed_type == "articles":
xml_feed = ArticlesXmlFeed(engine=self.engine, output_file=self.output_file)
xml_feed.run()

logger.info(
"The feed has collected %s '%s' records", xml_feed.record_count, feed_type
"The '%s' feed has processed %s records.",
feed_type,
xml_feed.processed_record_count,
)


Expand Down Expand Up @@ -164,8 +147,17 @@ def __call__(self) -> None:
ftps.connect(host=self.host, port=self.port)
ftps.login(user=self.user, passwd=self.password)
ftps.prot_p()
ftps.storbinary(cmd=f"STOR {self.path}", fp=self.content_feed)
ftps.quit()
try:
ftps.storbinary(cmd=f"STOR {self.path}", fp=self.content_feed)
except TimeoutError:
logger.warning(
"Timeout occurred but XML file may have been uploaded anyway. \
Please check the Elements FTP server to confirm."
)
except Exception:
raise
else:
ftps.quit()


class DatabaseToFilePipe:
Expand Down
4 changes: 3 additions & 1 deletion carbon/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ def main(*, output_file: IO, run_connection_tests: bool, use_sns_logging: bool)
pipe.run_connection_test()

if not run_connection_tests:
logger.info("Carbon run has started.")
logger.info(
"Carbon run for the '%s' feed has started.", config_values["FEED_TYPE"]
)
if use_sns_logging:
sns_log(config_values=config_values, status="start")
try:
Expand Down
4 changes: 2 additions & 2 deletions carbon/feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class BaseXmlFeed(ABC):

root_element_name: str = ""
query: Select = select()
record_count: int = 0
processed_record_count: int = 0

def __init__(self, engine: DatabaseEngine, output_file: IO):
self.engine = engine
Expand All @@ -52,7 +52,6 @@ def records(self) -> Generator[dict[str, Any], Any, None]:
with closing(self.engine().connect()) as connection:
result = connection.execute(self.query)
for row in result:
self.record_count += 1
yield dict(zip(result.keys(), row, strict=True))

@abstractmethod
Expand Down Expand Up @@ -101,6 +100,7 @@ def run(self, **kwargs: dict[str, Any]) -> None:
for record in self.records:
element = self._add_element(record)
xml_file.write(element)
self.processed_record_count += 1


class ArticlesXmlFeed(BaseXmlFeed):
Expand Down