Skip to content

Commit

Permalink
Use secure logging for exceptions (#1645)
Browse files Browse the repository at this point in the history
  • Loading branch information
YuanTingHsieh committed Apr 4, 2023
1 parent 8c79b70 commit f96cfa7
Show file tree
Hide file tree
Showing 25 changed files with 105 additions and 71 deletions.
3 changes: 2 additions & 1 deletion nvflare/apis/utils/fl_context_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from nvflare.apis.shareable import Shareable
from nvflare.fuel.sec.audit import AuditService
from nvflare.fuel.utils import fobs
from nvflare.security.logging import secure_format_exception

logger = logging.getLogger("fl_context_utils")

Expand All @@ -31,7 +32,7 @@ def get_serializable_data(fl_ctx: FLContext):
fobs.dumps(v)
new_fl_ctx.props[k] = v
except BaseException as e:
msg = f"Object in FLContext with key {k} and type {type(v)} is not serializable (discarded): {e}"
msg = f"Object in FLContext with key {k} and type {type(v)} is not serializable (discarded): {secure_format_exception(e)}"
logger.warning(generate_log_message(fl_ctx, msg))

return new_fl_ctx
Expand Down
9 changes: 5 additions & 4 deletions nvflare/app_common/executors/splitnn_learner_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from nvflare.apis.signal import Signal
from nvflare.app_common.abstract.learner_spec import Learner
from nvflare.app_common.workflows.splitnn_workflow import SplitNNConstants
from nvflare.security.logging import secure_format_exception


class SplitNNLearnerExecutor(Executor):
Expand Down Expand Up @@ -51,7 +52,7 @@ def handle_event(self, event_type: str, fl_ctx: FLContext):
if self.learner:
self.learner.abort(fl_ctx)
except Exception as e:
self.log_exception(fl_ctx, f"learner abort exception: {e}")
self.log_exception(fl_ctx, f"learner abort exception: {secure_format_exception(e)}")
elif event_type == EventType.END_RUN:
self.finalize(fl_ctx)

Expand All @@ -63,7 +64,7 @@ def initialize(self, fl_ctx: FLContext):
raise TypeError(f"learner must be Learner type. Got: {type(self.learner)}")
self.learner.initialize(engine.get_all_components(), fl_ctx)
except Exception as e:
self.log_exception(fl_ctx, f"learner initialize exception: {e}")
self.log_exception(fl_ctx, f"learner initialize exception: {secure_format_exception(e)}")

def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext, abort_signal: Signal) -> Shareable:
self.log_info(fl_ctx, f"Client trainer got task: {task_name}")
Expand All @@ -81,12 +82,12 @@ def execute(self, task_name: str, shareable: Shareable, fl_ctx: FLContext, abort
return make_reply(ReturnCode.TASK_UNKNOWN)
except Exception as e:
# Task execution error, return EXECUTION_EXCEPTION Shareable
self.log_exception(fl_ctx, f"learner execute exception: {e}")
self.log_exception(fl_ctx, f"learner execute exception: {secure_format_exception(e)}")
return make_reply(ReturnCode.EXECUTION_EXCEPTION)

def finalize(self, fl_ctx: FLContext):
try:
if self.learner:
self.learner.finalize(fl_ctx)
except Exception as e:
self.log_exception(fl_ctx, f"learner finalize exception: {e}")
self.log_exception(fl_ctx, f"learner finalize exception: {secure_format_exception(e)}")
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from nvflare.app_common.statistics.statisitcs_objects_decomposer import fobs_registration
from nvflare.app_common.statistics.statistics_config_utils import get_feature_bin_range
from nvflare.fuel.utils import fobs
from nvflare.security.logging import secure_format_exception


class StatisticsTaskHandler(TaskHandler):
Expand Down Expand Up @@ -110,7 +111,7 @@ def _populate_result_statistics(self, statistics_result, ds_features, tm: Statis
self.log_exception(
fl_ctx,
f"Failed to populate result statistics of dataset {ds_name}"
f" and feature {feature.feature_name} with exception: {e}",
f" and feature {feature.feature_name} with exception: {secure_format_exception(e)}",
)

def get_numeric_features(self) -> Dict[str, List[Feature]]:
Expand Down
5 changes: 3 additions & 2 deletions nvflare/app_common/workflows/splitnn_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from nvflare.app_common.abstract.shareable_generator import ShareableGenerator
from nvflare.app_common.app_constant import AppConstants
from nvflare.app_common.app_event_type import AppEventType
from nvflare.security.logging import secure_format_exception
from nvflare.widgets.info_collector import GroupInfoCollector, InfoCollector


Expand Down Expand Up @@ -254,9 +255,9 @@ def control_flow(self, abort_signal: Signal, fl_ctx: FLContext):
self._phase = AppConstants.PHASE_FINISHED
self.log_debug(fl_ctx, "SplitNN training ended.")
except BaseException as e:
error_msg = f"SplitNN control_flow exception {e}"
error_msg = f"SplitNN control_flow exception {secure_format_exception(e)}"
self.log_error(fl_ctx, error_msg)
self.system_panic(str(e), fl_ctx)
self.system_panic(error_msg, fl_ctx)

def stop_controller(self, fl_ctx: FLContext):
self._phase = AppConstants.PHASE_FINISHED
Expand Down
15 changes: 11 additions & 4 deletions nvflare/fuel/f3/cellnet/cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,10 @@ def drop_connectors(self):
self.communicator.remove_connector(self.bb_ext_connector.handle)
self.communicator.remove_endpoint(FQCN.ROOT_SERVER)
except Exception as ex:
self.log_error(msg=None, log_text=f"{self.my_info.fqcn}: error removing bb_ext_connector {ex}")
self.log_error(
msg=None,
log_text=f"{self.my_info.fqcn}: error removing bb_ext_connector {secure_format_exception(ex)}",
)
self.bb_ext_connector = None

# drop ad-hoc connectors to cells on server
Expand Down Expand Up @@ -1058,7 +1061,7 @@ def _send_to_endpoint(self, to_endpoint: Endpoint, message: Message) -> str:
category=self._stats_category(message), value=self._msg_size_mbs(message)
)
except Exception as ex:
err_text = f"Failed to send message to {to_endpoint.name}: {ex}"
err_text = f"Failed to send message to {to_endpoint.name}: {secure_format_exception(ex)}"
self.log_error(err_text, message)
self.logger.debug(secure_format_traceback())
err = ReturnCode.COMM_ERROR
Expand Down Expand Up @@ -1912,7 +1915,9 @@ def state_change(self, endpoint: Endpoint):
self.logger.debug(f"{self.my_info.fqcn}: calling cell_connected_cb")
self.cell_connected_cb(agent, *self.cell_connected_cb_args, **self.cell_connected_cb_kwargs)
except Exception as ex:
self.log_error(f"exception in cell_connected_cb: {ex}", None, log_except=True)
self.log_error(
f"exception in cell_connected_cb: {secure_format_exception(ex)}", None, log_except=True
)

elif endpoint.state in [EndpointState.CLOSING, EndpointState.DISCONNECTED, EndpointState.IDLE]:
# remove this agent
Expand All @@ -1926,7 +1931,9 @@ def state_change(self, endpoint: Endpoint):
agent, *self.cell_disconnected_cb_args, **self.cell_disconnected_cb_kwargs
)
except Exception as ex:
self.log_error(f"exception in cell_disconnected_cb: {ex}", None, log_except=True)
self.log_error(
f"exception in cell_disconnected_cb: {secure_format_exception(ex)}", None, log_except=True
)

def get_sub_cell_names(self) -> Tuple[List[str], List[str]]:
"""
Expand Down
8 changes: 4 additions & 4 deletions nvflare/fuel/f3/cellnet/connector_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
import logging
import os
import time
import traceback
from typing import Union

from nvflare.fuel.common.excepts import ConfigError
from nvflare.fuel.f3.cellnet.defs import ConnectorRequirementKey
from nvflare.fuel.f3.cellnet.fqcn import FqcnInfo
from nvflare.fuel.f3.comm_config import CommConfigurator
from nvflare.fuel.f3.communicator import CommError, Communicator, Mode
from nvflare.security.logging import secure_format_exception, secure_format_traceback

_KEY_RESOURCES = "resources"
_KEY_INT = "internal"
Expand Down Expand Up @@ -209,11 +209,11 @@ def _get_connector(

return ConnectorData(handle, connect_url, active)
except CommError as ex:
self.logger.error(f"Failed to get connector: {ex}")
self.logger.error(f"Failed to get connector: {secure_format_exception(ex)}")
return None
except Exception as ex:
self.logger.error(f"Unexpected exception: {ex}")
self.logger.error(traceback.format_exc())
self.logger.error(f"Unexpected exception: {secure_format_exception(ex)}")
self.logger.error(secure_format_traceback())
return None

def get_external_listener(self, url: str, adhoc: bool) -> Union[None, ConnectorData]:
Expand Down
3 changes: 2 additions & 1 deletion nvflare/fuel/f3/cellnet/net_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
from nvflare.fuel.hci.reg import CommandModule, CommandModuleSpec, CommandSpec
from nvflare.fuel.hci.server.constants import ConnProps
from nvflare.fuel.utils.stats_utils import VALID_HIST_MODES, parse_hist_mode
from nvflare.security.logging import secure_format_exception


def _to_int(s: str):
try:
return int(s)
except Exception as ex:
return f"'{s}' is not a valid number: {ex}"
return f"'{s}' is not a valid number: {secure_format_exception(ex)}"


class NetManager(CommandModule):
Expand Down
3 changes: 2 additions & 1 deletion nvflare/fuel/f3/comm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from nvflare.fuel.f3.drivers.net_utils import MAX_PAYLOAD_SIZE
from nvflare.fuel.utils.config_service import ConfigService
from nvflare.security.logging import secure_format_exception

_comm_config_files = ["comm_config.json", "comm_config.json.default"]

Expand Down Expand Up @@ -46,7 +47,7 @@ def __init__(self):
self.logger.debug(f"config file {file_name} not found from config path")
config = None
except Exception as ex:
self.logger.error(f"failed to load config file {file_name}: {ex}")
self.logger.error(f"failed to load config file {file_name}: {secure_format_exception(ex)}")
config = None
self.config = config

Expand Down
5 changes: 4 additions & 1 deletion nvflare/fuel/f3/communicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from nvflare.fuel.f3.endpoint import Endpoint, EndpointMonitor
from nvflare.fuel.f3.message import Message, MessageReceiver
from nvflare.fuel.f3.sfm.conn_manager import ConnManager, Mode
from nvflare.security.logging import secure_format_exception

log = logging.getLogger(__name__)
_running_instances = weakref.WeakSet()
Expand Down Expand Up @@ -77,7 +78,9 @@ def stop(self):
try:
_running_instances.remove(self)
except KeyError as ex:
log.error(f"Logical error, communicator {self.local_endpoint.name} is not started: {ex}")
log.error(
f"Logical error, communicator {self.local_endpoint.name} is not started: {secure_format_exception(ex)}"
)

log.debug(f"Communicator endpoint: {self.local_endpoint.name} has stopped")

Expand Down
7 changes: 4 additions & 3 deletions nvflare/fuel/f3/drivers/aio_conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from nvflare.fuel.f3.drivers.net_utils import MAX_FRAME_SIZE
from nvflare.fuel.f3.sfm.prefix import PREFIX_LEN, Prefix
from nvflare.fuel.hci.security import get_certificate_common_name
from nvflare.security.logging import secure_format_exception

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -59,7 +60,7 @@ def send_frame(self, frame: BytesAlike):
try:
self.aio_ctx.run_coro(self._async_send_frame(frame))
except Exception as ex:
log.error(f"Error calling send coroutine for connection {self}: {ex}")
log.error(f"Error calling send coroutine for connection {self}: {secure_format_exception(ex)}")

async def read_loop(self):
try:
Expand All @@ -74,7 +75,7 @@ async def read_loop(self):
except CancelledError as error:
log.debug(f"Connection {self} is closed by peer: {error}")
except Exception as ex:
log.error(f"Read error for connection {self}: {ex}")
log.error(f"Read error for connection {self}: {secure_format_exception(ex)}")

# Internal methods

Expand All @@ -83,7 +84,7 @@ async def _async_send_frame(self, frame: BytesAlike):
self.writer.write(frame)
await self.writer.drain()
except Exception as ex:
log.error(f"Error sending frame for connection {self}: {ex}")
log.error(f"Error sending frame for connection {self}: {secure_format_exception(ex)}")

async def _async_read_frame(self):

Expand Down
6 changes: 4 additions & 2 deletions nvflare/fuel/f3/drivers/aio_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import threading
import time

from nvflare.security.logging import secure_format_exception


class AioContext:
"""Asyncio context. Used to share the asyncio event loop among multiple classes"""
Expand Down Expand Up @@ -51,7 +53,7 @@ def run_aio_loop(self):
self.loop.run_forever()
self.loop.run_until_complete(self.loop.shutdown_asyncgens())
except Exception as ex:
self.logger.error(f"error running aio loop: {ex}")
self.logger.error(f"error running aio loop: {secure_format_exception(ex)}")
raise ex
finally:
self.logger.debug(f"{self.name}: AIO Loop run done!")
Expand Down Expand Up @@ -85,7 +87,7 @@ def stop_aio_loop(self, grace=1.0):
try:
self.loop.call_soon_threadsafe(self.loop.stop)
except Exception as ex:
self.logger.debug(f"Loop stopping error: {ex}")
self.logger.debug(f"Loop stopping error: {secure_format_exception(ex)}")

start = time.time()
while self.loop.is_running():
Expand Down
Loading

0 comments on commit f96cfa7

Please sign in to comment.