Issues/104 (#105)

* Run black + remove unused imports. * Log request at different levels depending on request status. Fixes #104. * Add tests for new feature. Related to #104. * Cache request's body to make it accessible after processing. * Apply right log level to multipart log as well. Co-authored-by: sebastien <s.gelis@ksd.fr>
Rhumbix · Aug 28, 2020 · 9bfe9c9 · 9bfe9c9
1 parent 089f843
commit 9bfe9c9
Show file tree

Hide file tree

Showing 6 changed files with 264 additions and 211 deletions.
diff --git a/request_logging/decorators.py b/request_logging/decorators.py
@@ -1,4 +1,3 @@
-from functools import wraps
 from .middleware import NO_LOGGING_ATTR, NO_LOGGING_MSG_ATTR, NO_LOGGING_MSG
 
 
@@ -7,4 +6,5 @@ def wrapper(func):
         setattr(func, NO_LOGGING_ATTR, True)
         setattr(func, NO_LOGGING_MSG_ATTR, (msg if msg else NO_LOGGING_MSG) if not silent else None)
         return func
+
     return wrapper
diff --git a/request_logging/middleware.py b/request_logging/middleware.py
@@ -2,6 +2,7 @@
 import re
 
 from django.conf import settings
+
 try:
     # Django >= 1.10
     from django.urls import resolve, Resolver404
@@ -14,28 +15,28 @@
 DEFAULT_HTTP_4XX_LOG_LEVEL = logging.ERROR
 DEFAULT_COLORIZE = True
 DEFAULT_MAX_BODY_LENGTH = 50000  # log no more than 3k bytes of content
-DEFAULT_SENSITIVE_HEADERS = ['HTTP_AUTHORIZATION', 'HTTP_PROXY_AUTHORIZATION']
+DEFAULT_SENSITIVE_HEADERS = ["HTTP_AUTHORIZATION", "HTTP_PROXY_AUTHORIZATION"]
 SETTING_NAMES = {
-    'log_level': 'REQUEST_LOGGING_DATA_LOG_LEVEL',
-    'http_4xx_log_level': 'REQUEST_LOGGING_HTTP_4XX_LOG_LEVEL',
-    'legacy_colorize': 'REQUEST_LOGGING_DISABLE_COLORIZE',
-    'colorize': 'REQUEST_LOGGING_ENABLE_COLORIZE',
-    'max_body_length': 'REQUEST_LOGGING_MAX_BODY_LENGTH',
-    'sensitive_headers': 'REQUEST_LOGGING_SENSITIVE_HEADERS',
+    "log_level": "REQUEST_LOGGING_DATA_LOG_LEVEL",
+    "http_4xx_log_level": "REQUEST_LOGGING_HTTP_4XX_LOG_LEVEL",
+    "legacy_colorize": "REQUEST_LOGGING_DISABLE_COLORIZE",
+    "colorize": "REQUEST_LOGGING_ENABLE_COLORIZE",
+    "max_body_length": "REQUEST_LOGGING_MAX_BODY_LENGTH",
+    "sensitive_headers": "REQUEST_LOGGING_SENSITIVE_HEADERS",
 }
-BINARY_REGEX = re.compile(r'(.+Content-Type:.*?)(\S+)/(\S+)(?:\r\n)*(.+)', re.S | re.I)
-BINARY_TYPES = ('image', 'application')
-NO_LOGGING_ATTR = 'no_logging'
-NO_LOGGING_MSG_ATTR = 'no_logging_msg'
-NO_LOGGING_MSG = 'No logging for this endpoint'
-request_logger = logging.getLogger('django.request')
+BINARY_REGEX = re.compile(r"(.+Content-Type:.*?)(\S+)/(\S+)(?:\r\n)*(.+)", re.S | re.I)
+BINARY_TYPES = ("image", "application")
+NO_LOGGING_ATTR = "no_logging"
+NO_LOGGING_MSG_ATTR = "no_logging_msg"
+NO_LOGGING_MSG = "No logging for this endpoint"
+request_logger = logging.getLogger("django.request")
 
 
 class Logger:
     def log(self, level, msg, logging_context):
-        args = logging_context['args']
-        kwargs = logging_context['kwargs']
-        for line in re.split(r'\r?\n', str(msg)):
+        args = logging_context["args"]
+        kwargs = logging_context["kwargs"]
+        for line in re.split(r"\r?\n", str(msg)):
             request_logger.log(level, line, *args, **kwargs)
 
     def log_error(self, level, msg, logging_context):
@@ -56,9 +57,9 @@ def log_error(self, level, msg, logging_context):
         self._log(level, msg, self.log_error_colour, logging_context)
 
     def _log(self, level, msg, colour, logging_context):
-        args = logging_context['args']
-        kwargs = logging_context['kwargs']
-        for line in re.split(r'\r?\n', str(msg)):
+        args = logging_context["args"]
+        kwargs = logging_context["kwargs"]
+        for line in re.split(r"\r?\n", str(msg)):
             line = colorize(line, fg=colour)
             request_logger.log(level, line, *args, **kwargs)
 
@@ -67,59 +68,67 @@ class LoggingMiddleware(object):
     def __init__(self, get_response=None):
         self.get_response = get_response
 
-        self.log_level = getattr(settings, SETTING_NAMES['log_level'], DEFAULT_LOG_LEVEL)
-        self.http_4xx_log_level = getattr(settings, SETTING_NAMES['http_4xx_log_level'], DEFAULT_HTTP_4XX_LOG_LEVEL)
-        self.sensitive_headers = getattr(settings, SETTING_NAMES['sensitive_headers'], DEFAULT_SENSITIVE_HEADERS)
+        self.log_level = getattr(settings, SETTING_NAMES["log_level"], DEFAULT_LOG_LEVEL)
+        self.http_4xx_log_level = getattr(settings, SETTING_NAMES["http_4xx_log_level"], DEFAULT_HTTP_4XX_LOG_LEVEL)
+        self.sensitive_headers = getattr(settings, SETTING_NAMES["sensitive_headers"], DEFAULT_SENSITIVE_HEADERS)
         if not isinstance(self.sensitive_headers, list):
             raise ValueError(
-                "{} should be list. {} is not list.".format(SETTING_NAMES['sensitive_headers'], self.sensitive_headers)
+                "{} should be list. {} is not list.".format(SETTING_NAMES["sensitive_headers"], self.sensitive_headers)
             )
 
-        for log_attr in ('log_level', 'http_4xx_log_level'):
+        for log_attr in ("log_level", "http_4xx_log_level"):
             level = getattr(self, log_attr)
-            if level not in [logging.NOTSET, logging.DEBUG, logging.INFO,
-                             logging.WARNING, logging.ERROR, logging.CRITICAL]:
+            if level not in [
+                logging.NOTSET,
+                logging.DEBUG,
+                logging.INFO,
+                logging.WARNING,
+                logging.ERROR,
+                logging.CRITICAL,
+            ]:
                 raise ValueError("Unknown log level({}) in setting({})".format(level, SETTING_NAMES[log_attr]))
 
         # TODO: remove deprecated legacy settings
-        enable_colorize = getattr(settings, SETTING_NAMES['legacy_colorize'], None)
+        enable_colorize = getattr(settings, SETTING_NAMES["legacy_colorize"], None)
         if enable_colorize is None:
-            enable_colorize = getattr(settings, SETTING_NAMES['colorize'], DEFAULT_COLORIZE)
+            enable_colorize = getattr(settings, SETTING_NAMES["colorize"], DEFAULT_COLORIZE)
 
         if not isinstance(enable_colorize, bool):
             raise ValueError(
-                "{} should be boolean. {} is not boolean.".format(SETTING_NAMES['colorize'], enable_colorize)
+                "{} should be boolean. {} is not boolean.".format(SETTING_NAMES["colorize"], enable_colorize)
             )
 
-        self.max_body_length = getattr(settings, SETTING_NAMES['max_body_length'], DEFAULT_MAX_BODY_LENGTH)
+        self.max_body_length = getattr(settings, SETTING_NAMES["max_body_length"], DEFAULT_MAX_BODY_LENGTH)
         if not isinstance(self.max_body_length, int):
             raise ValueError(
-                "{} should be int. {} is not int.".format(SETTING_NAMES['max_body_length'], self.max_body_length)
+                "{} should be int. {} is not int.".format(SETTING_NAMES["max_body_length"], self.max_body_length)
             )
 
         self.logger = ColourLogger("cyan", "magenta") if enable_colorize else Logger()
-        self.boundary = ''
+        self.boundary = ""
+        self.cached_request_body = None
 
     def __call__(self, request):
-        self.process_request( request )
-        response = self.get_response( request )
-        self.process_response( request, response )
+        self.cached_request_body = request.body
+        response = self.get_response(request)
+        self.process_request(request, response)
+        self.process_response(request, response)
         return response
 
-    def process_request(self, request):
+    def process_request(self, request, response=None):
         skip_logging, because = self._should_log_route(request)
         if skip_logging:
             if because is not None:
                 return self._skip_logging_request(request, because)
         else:
-            return self._log_request(request)
+            return self._log_request(request, response)
 
     def _should_log_route(self, request):
         # request.urlconf may be set by middleware or application level code.
         # Use this urlconf if present or default to None.
         # https://docs.djangoproject.com/en/2.1/topics/http/urls/#how-django-processes-a-request
         # https://docs.djangoproject.com/en/2.1/ref/request-response/#attributes-set-by-middleware
-        urlconf = getattr(request, 'urlconf', None)
+        urlconf = getattr(request, "urlconf", None)
 
         try:
             route_match = resolve(request.path, urlconf=urlconf)
@@ -130,15 +139,15 @@ def _should_log_route(self, request):
         view = route_match.func
         func = view
         # This is for "django rest framework"
-        if hasattr(view, 'cls'):
-            if hasattr(view, 'actions'):
+        if hasattr(view, "cls"):
+            if hasattr(view, "actions"):
                 actions = view.actions
                 method_name = actions.get(method)
                 if method_name:
                     func = getattr(view.cls, view.actions[method], None)
             else:
                 func = getattr(view.cls, method, None)
-        elif hasattr(view, 'view_class'):
+        elif hasattr(view, "view_class"):
             # This is for django class-based views
             func = getattr(view.view_class, method, None)
         no_logging = getattr(func, NO_LOGGING_ATTR, False)
@@ -148,46 +157,56 @@ def _should_log_route(self, request):
     def _skip_logging_request(self, request, reason):
         method_path = "{} {}".format(request.method, request.get_full_path())
         no_log_context = {
-            'args': (),
-            'kwargs': {
-                'extra': {
-                    'no_logging': reason
-                },
-            },
+            "args": (),
+            "kwargs": {"extra": {"no_logging": reason}},
         }
         self.logger.log(logging.INFO, method_path + " (not logged because '" + reason + "')", no_log_context)
 
-    def _log_request(self, request):
+    def _log_request(self, request, response):
         method_path = "{} {}".format(request.method, request.get_full_path())
-
         logging_context = self._get_logging_context(request, None)
-        self.logger.log(logging.INFO, method_path, logging_context)
-        self._log_request_headers(request, logging_context)
-        self._log_request_body(request, logging_context)
 
-    def _log_request_headers(self, request, logging_context):
-        headers = {k: v if k not in self.sensitive_headers else '*****' for k, v in request.META.items() if k.startswith('HTTP_')}
+        # Determine log level depending on response status
+        log_level = self.log_level
+        if response is not None:
+            if response.status_code in range(400, 500):
+                log_level = self.http_4xx_log_level
+            elif response.status_code in range(500, 600):
+                log_level = logging.ERROR
+
+        self.logger.log(logging.INFO, method_path, logging_context)
+        self._log_request_headers(request, logging_context, log_level)
+        self._log_request_body(request, logging_context, log_level)
+
+    def _log_request_headers(self, request, logging_context, log_level):
+        headers = {
+            k: v if k not in self.sensitive_headers else "*****"
+            for k, v in request.META.items()
+            if k.startswith("HTTP_")
+        }
 
         if headers:
-            self.logger.log(self.log_level, headers, logging_context)
+            self.logger.log(log_level, headers, logging_context)
 
-    def _log_request_body(self, request, logging_context):
-        if request.body:
-            content_type = request.META.get('CONTENT_TYPE', '')
-            is_multipart = content_type.startswith('multipart/form-data')
+    def _log_request_body(self, request, logging_context, log_level):
+        if self.cached_request_body is not None:
+            content_type = request.META.get("CONTENT_TYPE", "")
+            is_multipart = content_type.startswith("multipart/form-data")
             if is_multipart:
-                self.boundary = '--' + content_type[30:]  # First 30 characters are "multipart/form-data; boundary="
+                self.boundary = "--" + content_type[30:]  # First 30 characters are "multipart/form-data; boundary="
             if is_multipart:
-                self._log_multipart(self._chunked_to_max(request.body), logging_context)
+                self._log_multipart(self._chunked_to_max(self.cached_request_body), logging_context, log_level)
             else:
-                self.logger.log(self.log_level, self._chunked_to_max(request.body), logging_context)
+                self.logger.log(log_level, self._chunked_to_max(self.cached_request_body), logging_context)
 
     def process_response(self, request, response):
         resp_log = "{} {} - {}".format(request.method, request.get_full_path(), response.status_code)
         skip_logging, because = self._should_log_route(request)
         if skip_logging:
             if because is not None:
-                self.logger.log_error(logging.INFO, resp_log, {'args': {}, 'kwargs': { 'extra' :  { 'no_logging': because } }})
+                self.logger.log_error(
+                    logging.INFO, resp_log, {"args": {}, "kwargs": {"extra": {"no_logging": because}}}
+                )
             return response
         logging_context = self._get_logging_context(request, response)
 
@@ -214,16 +233,11 @@ def _get_logging_context(self, request, response):
         This allows the logging context to be created per process request/response call.
         """
         return {
-            'args': (),
-            'kwargs': {
-                'extra': {
-                    'request': request,
-                    'response': response,
-                },
-            },
+            "args": (),
+            "kwargs": {"extra": {"request": request, "response": response}},
         }
 
-    def _log_multipart(self, body, logging_context):
+    def _log_multipart(self, body, logging_context, log_level):
         """
         Splits multipart body into parts separated by "boundary", then matches each part to BINARY_REGEX
         which searches for existence of "Content-Type" and capture of what type is this part.
@@ -233,34 +247,33 @@ def _log_multipart(self, body, logging_context):
         try:
             body_str = body.decode()
         except UnicodeDecodeError:
-            self.logger.log(self.log_level, "(multipart/form)", logging_context)
+            self.logger.log(log_level, "(multipart/form)", logging_context)
             return
 
         parts = body_str.split(self.boundary)
         last = len(parts) - 1
         for i, part in enumerate(parts):
-            if 'Content-Type:' in part:
+            if "Content-Type:" in part:
                 match = BINARY_REGEX.search(part)
-                if match and match.group(2) in BINARY_TYPES and not match.group(4) in ('', '\r\n'):
-                    part = match.expand(r'\1\2/\3\r\n\r\n(binary data)\r\n')
+                if match and match.group(2) in BINARY_TYPES and not match.group(4) in ("", "\r\n"):
+                    part = match.expand(r"\1\2/\3\r\n\r\n(binary data)\r\n")
 
             if i != last:
                 part = part + self.boundary
 
-            self.logger.log(self.log_level, part, logging_context)
+            self.logger.log(log_level, part, logging_context)
 
     def _log_resp(self, level, response, logging_context):
-        if re.match('^application/json', response.get('Content-Type', ''), re.I):
+        if re.match("^application/json", response.get("Content-Type", ""), re.I):
             self.logger.log(level, response._headers, logging_context)
             if response.streaming:
                 # There's a chance that if it's streaming it's because large and it might hit
                 # the max_body_length very often. Not to mention that StreamingHttpResponse
                 # documentation advises to iterate only once on the content.
                 # So the idea here is to just _not_ log it.
-                self.logger.log(level, '(data_stream)', logging_context)
+                self.logger.log(level, "(data_stream)", logging_context)
             else:
-                self.logger.log(level, self._chunked_to_max(response.content),
-                                logging_context)
+                self.logger.log(level, self._chunked_to_max(response.content), logging_context)
 
     def _chunked_to_max(self, msg):
         return msg[0:self.max_body_length]
diff --git a/setup.py b/setup.py
@@ -10,21 +10,21 @@
 
 
 def read(f):
-    return open(f, 'r', encoding='utf-8').read()
+    return open(f, "r", encoding="utf-8").read()
 
 
 def get_version(package):
     """
     Return package version as listed in `__version__` in `init.py`.
     """
-    init_py = open(os.path.join(package, '__init__.py')).read()
+    init_py = open(os.path.join(package, "__init__.py")).read()
     return re.search("__version__ = ['\"]([^'\"]+)['\"]", init_py).group(1)
 
 
-version = get_version('request_logging')
+version = get_version("request_logging")
 
 
-if sys.argv[-1] == 'publish':
+if sys.argv[-1] == "publish":
     if os.system("pip freeze | grep twine"):
         print("twine not installed.\nUse `pip install twine`.\nExiting.")
         sys.exit()
@@ -33,24 +33,22 @@ def get_version(package):
     print("You probably want to also tag the version now:")
     print("  git tag -a %s -m 'version %s'" % (version, version))
     print("  git push --tags")
-    shutil.rmtree('dist')
-    shutil.rmtree('build')
+    shutil.rmtree("dist")
+    shutil.rmtree("build")
     sys.exit()
 
 
 setup(
-    name='django-request-logging',
+    name="django-request-logging",
     version=version,
-    description='Django middleware that logs http request body.',
-    long_description=read('README.md'),
-    long_description_content_type='text/markdown',
-    url='https://github.com/Rhumbix/django-request-logging.git',
-    author='Rhumbix',
-    author_email='dev@rhumbix.com',
-    license='MIT',
-    packages=['request_logging'],
-    install_requires=[
-        'Django',
-    ],
-    zip_safe=False
+    description="Django middleware that logs http request body.",
+    long_description=read("README.md"),
+    long_description_content_type="text/markdown",
+    url="https://github.com/Rhumbix/django-request-logging.git",
+    author="Rhumbix",
+    author_email="dev@rhumbix.com",
+    license="MIT",
+    packages=["request_logging"],
+    install_requires=["Django"],
+    zip_safe=False,
 )