Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 97 additions & 18 deletions services/logging_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,109 @@
import logging
import json
import sys
from datetime import datetime, timezone
from typing import Any, Dict, Optional, Tuple


class JsonFormatter(logging.Formatter):
def format(self, record):
log_record = {
"""Formatter that outputs JSON lines with a minimal structured schema.
It safely serializes record attributes and includes any extra attributes
supplied via the ``extra`` parameter on logging calls.
"""

def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str:
# Always use UTC ISO-8601 with milliseconds
dt = datetime.fromtimestamp(record.created, tz=timezone.utc)
return dt.isoformat(timespec="milliseconds")

def format(self, record: logging.LogRecord) -> str: # type: ignore[override]
log_record: Dict[str, Any] = {
"level": record.levelname,
"time": self.formatTime(record, self.datefmt),
"name": record.name,
"logger": record.name,
"message": record.getMessage(),
}
# Optionally add extra context if present
if hasattr(record, "request_id"):
log_record["request_id"] = record.request_id
if hasattr(record, "user"):
log_record["user"] = record.user
if hasattr(record, "document_id"):
log_record["document_id"] = record.document_id
return json.dumps(log_record)


def setup_logging(level=logging.INFO):
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(JsonFormatter())

# Include standard exception info if present
if record.exc_info:
# Use the built-in formatter to render exception text but don't
# include it in the machine-readable payload. Keep a short flag.
log_record["exc_text"] = self.formatException(record.exc_info)

# Attach any extra attributes provided via ``extra={...}``
for key, value in record.__dict__.items():
if key in ("name", "msg", "args", "levelname", "levelno", "pathname", "filename",
"module", "exc_info", "exc_text", "stack_info", "lineno", "funcName",
"created", "msecs", "relativeCreated", "thread", "threadName", "processName",
"process"):
continue
# Only include JSON-serializable extras; fall back to str()
try:
json.dumps({key: value})
log_record[key] = value
except Exception:
log_record[key] = str(value)

Comment on lines +38 to +51
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Prevent extras from clobbering core fields; narrow exception catch.

  • Extras can overwrite "level", "time", "logger", or "message" today.
  • Avoid except Exception per lint and best practices.
-        # Attach any extra attributes provided via ``extra={...}``
-        for key, value in record.__dict__.items():
-            if key in ("name", "msg", "args", "levelname", "levelno", "pathname", "filename",
-                       "module", "exc_info", "exc_text", "stack_info", "lineno", "funcName",
-                       "created", "msecs", "relativeCreated", "thread", "threadName", "processName",
-                       "process"):
-                continue
-            # Only include JSON-serializable extras; fall back to str()
-            try:
-                json.dumps({key: value})
-                log_record[key] = value
-            except Exception:
-                log_record[key] = str(value)
+        # Attach any extra attributes provided via ``extra={...}``
+        reserved_keys = {"level", "time", "logger", "message", "exc_text"}
+        for key, value in record.__dict__.items():
+            if (
+                key in reserved_keys
+                or key in (
+                    "name", "msg", "args", "levelname", "levelno", "pathname", "filename",
+                    "module", "exc_info", "exc_text", "stack_info", "lineno", "funcName",
+                    "created", "msecs", "relativeCreated", "thread", "threadName", "processName",
+                    "process"
+                )
+            ):
+                continue
+            # Only include JSON-serializable extras; fall back to str()
+            try:
+                json.dumps({key: value})
+                log_record[key] = value
+            except (TypeError, ValueError, OverflowError, RecursionError):
+                log_record[key] = str(value)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# Attach any extra attributes provided via ``extra={...}``
for key, value in record.__dict__.items():
if key in ("name", "msg", "args", "levelname", "levelno", "pathname", "filename",
"module", "exc_info", "exc_text", "stack_info", "lineno", "funcName",
"created", "msecs", "relativeCreated", "thread", "threadName", "processName",
"process"):
continue
# Only include JSON-serializable extras; fall back to str()
try:
json.dumps({key: value})
log_record[key] = value
except Exception:
log_record[key] = str(value)
# Attach any extra attributes provided via ``extra={...}``
reserved_keys = {"level", "time", "logger", "message", "exc_text"}
for key, value in record.__dict__.items():
if (
key in reserved_keys
or key in (
"name", "msg", "args", "levelname", "levelno", "pathname", "filename",
"module", "exc_info", "exc_text", "stack_info", "lineno", "funcName",
"created", "msecs", "relativeCreated", "thread", "threadName", "processName",
"process"
)
):
continue
# Only include JSON-serializable extras; fall back to str()
try:
json.dumps({key: value})
log_record[key] = value
except (TypeError, ValueError, OverflowError, RecursionError):
log_record[key] = str(value)
🧰 Tools
🪛 Ruff (0.13.1)

49-49: Do not catch blind exception: Exception

(BLE001)

🤖 Prompt for AI Agents
In services/logging_config.py around lines 38 to 51, the loop that attaches
extra record attributes currently allows extras to overwrite core JSON fields
(e.g., "level", "time", "logger", "message") and uses a broad except Exception;
update the skip list to also include "level", "time", "logger", and "message" so
those extras are ignored and narrow the exception handler to only the exceptions
json.dumps can raise (e.g., TypeError, ValueError, OverflowError) so
non-serializable values fall back to str() without catching unrelated errors.

# Ensure deterministic output order for readability in logs
try:
return json.dumps(log_record, ensure_ascii=False, separators=(",", ":"))
except Exception:
# Fallback to a safe string representation
return json.dumps({"level": record.levelname, "message": record.getMessage()})


def setup_logging(level: int = logging.INFO, *, force: bool = False) -> None:
"""Configure root logger to emit JSON logs to stdout.
Args:
level: Logging level for the root logger.
force: If True, replace existing handlers. Default False (adds a handler only
if no stream handler is present).
Notes:
Call this once at application startup (for example from `main.py`).
"""

root_logger = logging.getLogger()
root_logger.setLevel(level)
root_logger.handlers = [handler]

# Call setup_logging() at app startup (e.g., in main.py or api/__init__.py)
# Detect existing StreamHandler(s) that write to stdout specifically.
stdout_handlers = [h for h in root_logger.handlers
if isinstance(h, logging.StreamHandler) and getattr(h, 'stream', None) is sys.stdout]

# If a stdout handler already exists and we're not forcing an update, do nothing.
if stdout_handlers and not force:
return

# If forcing, remove existing StreamHandler(s) (but leave file/syslog handlers intact).
if force:
root_logger.handlers = [h for h in root_logger.handlers if not isinstance(h, logging.StreamHandler)]

# Only add the stdout JSON handler if one isn't already present (avoid duplicates).
stdout_already = any(isinstance(h, logging.StreamHandler) and getattr(h, 'stream', None) is sys.stdout
for h in root_logger.handlers)
if not stdout_already:
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(JsonFormatter())
root_logger.addHandler(handler)


class RequestLoggerAdapter(logging.LoggerAdapter):
"""Convenience adapter to attach contextual info (like request_id) to logs.
Usage:
logger = RequestLoggerAdapter(logging.getLogger(__name__), {"request_id": rid})
logger.info("started")
"""

def process(self, msg: str, kwargs: Dict[str, Any]) -> Tuple[str, Dict[str, Any]]:
extra = kwargs.setdefault("extra", {})
# Merge adapter's context without overwriting existing keys
for k, v in (self.extra or {}).items():
extra.setdefault(k, v)
return msg, kwargs


# Example: Call `setup_logging()` at app startup (e.g., in main.py or api/__init__.py)