From c6556bcddf957751dc84556da5417ec4b9fd8fac Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Wed, 2 Sep 2020 16:28:12 +0800 Subject: [PATCH] Deal correctly with errors when archiving logfiles Before logfiles are rotated (i.e., removed) a series of processing tasks are performed onto them. Among these, the first one is to archive the logfile into the NGAS server itself, if the server has been configured for this. During normal operations this is not a problem, but exactly on the first try, when the janitor process has just been created and the HTTP server might not be bound yet, it might result on an ECONNREFUSED error. This produced big error messages on the logs, while in reality this is a transient error that should disappear on the next try. A second, more general problem, was found while inspecting this code: logfiles were not renamed back to have their original ".unsaved" extensions when errors happened. This meant that when errors in general were found (and in particular when ECONNREFUSED was raised) logfiles were not picked up by successive janitor cycles. This commit acknowledges these problems, improving the handling of the ECONNREFUSED error in particular, and of errors in general. On the on hand, when the ECONNREFUSED error is encountered we simply issue a warning log statement instead of letting the exception to propagate up through the stack. On the other hand, if *any* error happens during archiving we rename the file back to its original *.unsaved name so it gets picked up again in the next janitor cycle. To make code and error handling a bit simpler I took the chance of moving the archiving of files into a separate try_archiving() function, whose invocation is then surrounded by the error handling block. Additionally I also added a sorted() call to process unsaved logfiles in time order, which until now wasn't guaranteed (and is a nice property to have). This commit addresses #26. Signed-off-by: Rodrigo Tobar --- .../janitor/rotated_logfiles_handler.py | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/ngamsServer/ngamsServer/janitor/rotated_logfiles_handler.py b/src/ngamsServer/ngamsServer/janitor/rotated_logfiles_handler.py index c6e4f0aa..1c9055fe 100644 --- a/src/ngamsServer/ngamsServer/janitor/rotated_logfiles_handler.py +++ b/src/ngamsServer/ngamsServer/janitor/rotated_logfiles_handler.py @@ -21,10 +21,12 @@ # """Deals with logging rotation files, archiving them, and removing old ones""" +import errno import functools import glob import logging import os +import socket from ngamsLib.ngamsCore import rmFile, mvFile, loadPlugInEntryPoint from ngamsLib import ngamsLib @@ -52,6 +54,17 @@ def get_logfile_handler_plugins(cfg): return _lh_plugins + +def try_archiving(cfg, srv, fname): + if not cfg.getArchiveRotatedLogfiles(): + return + file_uri = "file://" + fname + host, port = srv.get_self_endpoint() + proto = srv.get_server_access_proto() + ngamsPClient.ngamsPClient(host, port, proto=proto).archive( + file_uri, 'ngas/nglog') + + def run(srvObj, stopEvt): cfg = srvObj.getCfg() @@ -61,19 +74,21 @@ def run(srvObj, stopEvt): # ngamsServer.NgasRotatingFileHandler class, which is the file handler # attached to the root logger in the server process logger.debug("Checking if there are unsaved rotated logfiles") - for unsaved in glob.glob(os.path.join(logdir, 'LOG-ROTATE-*.nglog.unsaved')): + for unsaved in sorted(glob.glob(os.path.join(logdir, 'LOG-ROTATE-*.nglog.unsaved'))): # Remove the .unsaved bit, leave the rest fname = '.'.join(unsaved.split('.')[:-1]) - mvFile(unsaved, fname) - - # Connect to the server and send a pull ARCHIVE request - if cfg.getArchiveRotatedLogfiles(): - file_uri = "file://" + fname - host, port = srvObj.get_self_endpoint() - proto = srvObj.get_server_access_proto() - ngamsPClient.ngamsPClient(host, port, proto=proto).archive( - file_uri, 'ngas/nglog') + try: + mvFile(unsaved, fname) + try_archiving(cfg, srvObj, fname) + except Exception as e: + mvFile(fname, unsaved) + if isinstance(e, socket.error) and e.errno == errno.ECONNREFUSED: + # this is expected when the server is just starting up, + # let's ignore for now + logger.warning("Server not up yet, postponing processing of %s", unsaved) + continue + raise # Do additional things with our logfiles for plugin in get_logfile_handler_plugins(cfg):