Skip to content

Commit

Permalink
Issues/104 (#105)
Browse files Browse the repository at this point in the history
* Run black + remove unused imports.

* Log request at different levels depending on request status. Fixes #104.

* Add tests for new feature. Related to #104.

* Cache request's body to make it accessible after processing.

* Apply right log level to multipart log as well.

Co-authored-by: sebastien <s.gelis@ksd.fr>
  • Loading branch information
KSD-France and sebastien committed Aug 28, 2020
1 parent 089f843 commit 9bfe9c9
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 211 deletions.
2 changes: 1 addition & 1 deletion request_logging/decorators.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from functools import wraps
from .middleware import NO_LOGGING_ATTR, NO_LOGGING_MSG_ATTR, NO_LOGGING_MSG


Expand All @@ -7,4 +6,5 @@ def wrapper(func):
setattr(func, NO_LOGGING_ATTR, True)
setattr(func, NO_LOGGING_MSG_ATTR, (msg if msg else NO_LOGGING_MSG) if not silent else None)
return func

return wrapper
173 changes: 93 additions & 80 deletions request_logging/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re

from django.conf import settings

try:
# Django >= 1.10
from django.urls import resolve, Resolver404
Expand All @@ -14,28 +15,28 @@
DEFAULT_HTTP_4XX_LOG_LEVEL = logging.ERROR
DEFAULT_COLORIZE = True
DEFAULT_MAX_BODY_LENGTH = 50000 # log no more than 3k bytes of content
DEFAULT_SENSITIVE_HEADERS = ['HTTP_AUTHORIZATION', 'HTTP_PROXY_AUTHORIZATION']
DEFAULT_SENSITIVE_HEADERS = ["HTTP_AUTHORIZATION", "HTTP_PROXY_AUTHORIZATION"]
SETTING_NAMES = {
'log_level': 'REQUEST_LOGGING_DATA_LOG_LEVEL',
'http_4xx_log_level': 'REQUEST_LOGGING_HTTP_4XX_LOG_LEVEL',
'legacy_colorize': 'REQUEST_LOGGING_DISABLE_COLORIZE',
'colorize': 'REQUEST_LOGGING_ENABLE_COLORIZE',
'max_body_length': 'REQUEST_LOGGING_MAX_BODY_LENGTH',
'sensitive_headers': 'REQUEST_LOGGING_SENSITIVE_HEADERS',
"log_level": "REQUEST_LOGGING_DATA_LOG_LEVEL",
"http_4xx_log_level": "REQUEST_LOGGING_HTTP_4XX_LOG_LEVEL",
"legacy_colorize": "REQUEST_LOGGING_DISABLE_COLORIZE",
"colorize": "REQUEST_LOGGING_ENABLE_COLORIZE",
"max_body_length": "REQUEST_LOGGING_MAX_BODY_LENGTH",
"sensitive_headers": "REQUEST_LOGGING_SENSITIVE_HEADERS",
}
BINARY_REGEX = re.compile(r'(.+Content-Type:.*?)(\S+)/(\S+)(?:\r\n)*(.+)', re.S | re.I)
BINARY_TYPES = ('image', 'application')
NO_LOGGING_ATTR = 'no_logging'
NO_LOGGING_MSG_ATTR = 'no_logging_msg'
NO_LOGGING_MSG = 'No logging for this endpoint'
request_logger = logging.getLogger('django.request')
BINARY_REGEX = re.compile(r"(.+Content-Type:.*?)(\S+)/(\S+)(?:\r\n)*(.+)", re.S | re.I)
BINARY_TYPES = ("image", "application")
NO_LOGGING_ATTR = "no_logging"
NO_LOGGING_MSG_ATTR = "no_logging_msg"
NO_LOGGING_MSG = "No logging for this endpoint"
request_logger = logging.getLogger("django.request")


class Logger:
def log(self, level, msg, logging_context):
args = logging_context['args']
kwargs = logging_context['kwargs']
for line in re.split(r'\r?\n', str(msg)):
args = logging_context["args"]
kwargs = logging_context["kwargs"]
for line in re.split(r"\r?\n", str(msg)):
request_logger.log(level, line, *args, **kwargs)

def log_error(self, level, msg, logging_context):
Expand All @@ -56,9 +57,9 @@ def log_error(self, level, msg, logging_context):
self._log(level, msg, self.log_error_colour, logging_context)

def _log(self, level, msg, colour, logging_context):
args = logging_context['args']
kwargs = logging_context['kwargs']
for line in re.split(r'\r?\n', str(msg)):
args = logging_context["args"]
kwargs = logging_context["kwargs"]
for line in re.split(r"\r?\n", str(msg)):
line = colorize(line, fg=colour)
request_logger.log(level, line, *args, **kwargs)

Expand All @@ -67,59 +68,67 @@ class LoggingMiddleware(object):
def __init__(self, get_response=None):
self.get_response = get_response

self.log_level = getattr(settings, SETTING_NAMES['log_level'], DEFAULT_LOG_LEVEL)
self.http_4xx_log_level = getattr(settings, SETTING_NAMES['http_4xx_log_level'], DEFAULT_HTTP_4XX_LOG_LEVEL)
self.sensitive_headers = getattr(settings, SETTING_NAMES['sensitive_headers'], DEFAULT_SENSITIVE_HEADERS)
self.log_level = getattr(settings, SETTING_NAMES["log_level"], DEFAULT_LOG_LEVEL)
self.http_4xx_log_level = getattr(settings, SETTING_NAMES["http_4xx_log_level"], DEFAULT_HTTP_4XX_LOG_LEVEL)
self.sensitive_headers = getattr(settings, SETTING_NAMES["sensitive_headers"], DEFAULT_SENSITIVE_HEADERS)
if not isinstance(self.sensitive_headers, list):
raise ValueError(
"{} should be list. {} is not list.".format(SETTING_NAMES['sensitive_headers'], self.sensitive_headers)
"{} should be list. {} is not list.".format(SETTING_NAMES["sensitive_headers"], self.sensitive_headers)
)

for log_attr in ('log_level', 'http_4xx_log_level'):
for log_attr in ("log_level", "http_4xx_log_level"):
level = getattr(self, log_attr)
if level not in [logging.NOTSET, logging.DEBUG, logging.INFO,
logging.WARNING, logging.ERROR, logging.CRITICAL]:
if level not in [
logging.NOTSET,
logging.DEBUG,
logging.INFO,
logging.WARNING,
logging.ERROR,
logging.CRITICAL,
]:
raise ValueError("Unknown log level({}) in setting({})".format(level, SETTING_NAMES[log_attr]))

# TODO: remove deprecated legacy settings
enable_colorize = getattr(settings, SETTING_NAMES['legacy_colorize'], None)
enable_colorize = getattr(settings, SETTING_NAMES["legacy_colorize"], None)
if enable_colorize is None:
enable_colorize = getattr(settings, SETTING_NAMES['colorize'], DEFAULT_COLORIZE)
enable_colorize = getattr(settings, SETTING_NAMES["colorize"], DEFAULT_COLORIZE)

if not isinstance(enable_colorize, bool):
raise ValueError(
"{} should be boolean. {} is not boolean.".format(SETTING_NAMES['colorize'], enable_colorize)
"{} should be boolean. {} is not boolean.".format(SETTING_NAMES["colorize"], enable_colorize)
)

self.max_body_length = getattr(settings, SETTING_NAMES['max_body_length'], DEFAULT_MAX_BODY_LENGTH)
self.max_body_length = getattr(settings, SETTING_NAMES["max_body_length"], DEFAULT_MAX_BODY_LENGTH)
if not isinstance(self.max_body_length, int):
raise ValueError(
"{} should be int. {} is not int.".format(SETTING_NAMES['max_body_length'], self.max_body_length)
"{} should be int. {} is not int.".format(SETTING_NAMES["max_body_length"], self.max_body_length)
)

self.logger = ColourLogger("cyan", "magenta") if enable_colorize else Logger()
self.boundary = ''
self.boundary = ""
self.cached_request_body = None

def __call__(self, request):
self.process_request( request )
response = self.get_response( request )
self.process_response( request, response )
self.cached_request_body = request.body
response = self.get_response(request)
self.process_request(request, response)
self.process_response(request, response)
return response

def process_request(self, request):
def process_request(self, request, response=None):
skip_logging, because = self._should_log_route(request)
if skip_logging:
if because is not None:
return self._skip_logging_request(request, because)
else:
return self._log_request(request)
return self._log_request(request, response)

def _should_log_route(self, request):
# request.urlconf may be set by middleware or application level code.
# Use this urlconf if present or default to None.
# https://docs.djangoproject.com/en/2.1/topics/http/urls/#how-django-processes-a-request
# https://docs.djangoproject.com/en/2.1/ref/request-response/#attributes-set-by-middleware
urlconf = getattr(request, 'urlconf', None)
urlconf = getattr(request, "urlconf", None)

try:
route_match = resolve(request.path, urlconf=urlconf)
Expand All @@ -130,15 +139,15 @@ def _should_log_route(self, request):
view = route_match.func
func = view
# This is for "django rest framework"
if hasattr(view, 'cls'):
if hasattr(view, 'actions'):
if hasattr(view, "cls"):
if hasattr(view, "actions"):
actions = view.actions
method_name = actions.get(method)
if method_name:
func = getattr(view.cls, view.actions[method], None)
else:
func = getattr(view.cls, method, None)
elif hasattr(view, 'view_class'):
elif hasattr(view, "view_class"):
# This is for django class-based views
func = getattr(view.view_class, method, None)
no_logging = getattr(func, NO_LOGGING_ATTR, False)
Expand All @@ -148,46 +157,56 @@ def _should_log_route(self, request):
def _skip_logging_request(self, request, reason):
method_path = "{} {}".format(request.method, request.get_full_path())
no_log_context = {
'args': (),
'kwargs': {
'extra': {
'no_logging': reason
},
},
"args": (),
"kwargs": {"extra": {"no_logging": reason}},
}
self.logger.log(logging.INFO, method_path + " (not logged because '" + reason + "')", no_log_context)

def _log_request(self, request):
def _log_request(self, request, response):
method_path = "{} {}".format(request.method, request.get_full_path())

logging_context = self._get_logging_context(request, None)
self.logger.log(logging.INFO, method_path, logging_context)
self._log_request_headers(request, logging_context)
self._log_request_body(request, logging_context)

def _log_request_headers(self, request, logging_context):
headers = {k: v if k not in self.sensitive_headers else '*****' for k, v in request.META.items() if k.startswith('HTTP_')}
# Determine log level depending on response status
log_level = self.log_level
if response is not None:
if response.status_code in range(400, 500):
log_level = self.http_4xx_log_level
elif response.status_code in range(500, 600):
log_level = logging.ERROR

self.logger.log(logging.INFO, method_path, logging_context)
self._log_request_headers(request, logging_context, log_level)
self._log_request_body(request, logging_context, log_level)

def _log_request_headers(self, request, logging_context, log_level):
headers = {
k: v if k not in self.sensitive_headers else "*****"
for k, v in request.META.items()
if k.startswith("HTTP_")
}

if headers:
self.logger.log(self.log_level, headers, logging_context)
self.logger.log(log_level, headers, logging_context)

def _log_request_body(self, request, logging_context):
if request.body:
content_type = request.META.get('CONTENT_TYPE', '')
is_multipart = content_type.startswith('multipart/form-data')
def _log_request_body(self, request, logging_context, log_level):
if self.cached_request_body is not None:
content_type = request.META.get("CONTENT_TYPE", "")
is_multipart = content_type.startswith("multipart/form-data")
if is_multipart:
self.boundary = '--' + content_type[30:] # First 30 characters are "multipart/form-data; boundary="
self.boundary = "--" + content_type[30:] # First 30 characters are "multipart/form-data; boundary="
if is_multipart:
self._log_multipart(self._chunked_to_max(request.body), logging_context)
self._log_multipart(self._chunked_to_max(self.cached_request_body), logging_context, log_level)
else:
self.logger.log(self.log_level, self._chunked_to_max(request.body), logging_context)
self.logger.log(log_level, self._chunked_to_max(self.cached_request_body), logging_context)

def process_response(self, request, response):
resp_log = "{} {} - {}".format(request.method, request.get_full_path(), response.status_code)
skip_logging, because = self._should_log_route(request)
if skip_logging:
if because is not None:
self.logger.log_error(logging.INFO, resp_log, {'args': {}, 'kwargs': { 'extra' : { 'no_logging': because } }})
self.logger.log_error(
logging.INFO, resp_log, {"args": {}, "kwargs": {"extra": {"no_logging": because}}}
)
return response
logging_context = self._get_logging_context(request, response)

Expand All @@ -214,16 +233,11 @@ def _get_logging_context(self, request, response):
This allows the logging context to be created per process request/response call.
"""
return {
'args': (),
'kwargs': {
'extra': {
'request': request,
'response': response,
},
},
"args": (),
"kwargs": {"extra": {"request": request, "response": response}},
}

def _log_multipart(self, body, logging_context):
def _log_multipart(self, body, logging_context, log_level):
"""
Splits multipart body into parts separated by "boundary", then matches each part to BINARY_REGEX
which searches for existence of "Content-Type" and capture of what type is this part.
Expand All @@ -233,34 +247,33 @@ def _log_multipart(self, body, logging_context):
try:
body_str = body.decode()
except UnicodeDecodeError:
self.logger.log(self.log_level, "(multipart/form)", logging_context)
self.logger.log(log_level, "(multipart/form)", logging_context)
return

parts = body_str.split(self.boundary)
last = len(parts) - 1
for i, part in enumerate(parts):
if 'Content-Type:' in part:
if "Content-Type:" in part:
match = BINARY_REGEX.search(part)
if match and match.group(2) in BINARY_TYPES and not match.group(4) in ('', '\r\n'):
part = match.expand(r'\1\2/\3\r\n\r\n(binary data)\r\n')
if match and match.group(2) in BINARY_TYPES and not match.group(4) in ("", "\r\n"):
part = match.expand(r"\1\2/\3\r\n\r\n(binary data)\r\n")

if i != last:
part = part + self.boundary

self.logger.log(self.log_level, part, logging_context)
self.logger.log(log_level, part, logging_context)

def _log_resp(self, level, response, logging_context):
if re.match('^application/json', response.get('Content-Type', ''), re.I):
if re.match("^application/json", response.get("Content-Type", ""), re.I):
self.logger.log(level, response._headers, logging_context)
if response.streaming:
# There's a chance that if it's streaming it's because large and it might hit
# the max_body_length very often. Not to mention that StreamingHttpResponse
# documentation advises to iterate only once on the content.
# So the idea here is to just _not_ log it.
self.logger.log(level, '(data_stream)', logging_context)
self.logger.log(level, "(data_stream)", logging_context)
else:
self.logger.log(level, self._chunked_to_max(response.content),
logging_context)
self.logger.log(level, self._chunked_to_max(response.content), logging_context)

def _chunked_to_max(self, msg):
return msg[0:self.max_body_length]
36 changes: 17 additions & 19 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@


def read(f):
return open(f, 'r', encoding='utf-8').read()
return open(f, "r", encoding="utf-8").read()


def get_version(package):
"""
Return package version as listed in `__version__` in `init.py`.
"""
init_py = open(os.path.join(package, '__init__.py')).read()
init_py = open(os.path.join(package, "__init__.py")).read()
return re.search("__version__ = ['\"]([^'\"]+)['\"]", init_py).group(1)


version = get_version('request_logging')
version = get_version("request_logging")


if sys.argv[-1] == 'publish':
if sys.argv[-1] == "publish":
if os.system("pip freeze | grep twine"):
print("twine not installed.\nUse `pip install twine`.\nExiting.")
sys.exit()
Expand All @@ -33,24 +33,22 @@ def get_version(package):
print("You probably want to also tag the version now:")
print(" git tag -a %s -m 'version %s'" % (version, version))
print(" git push --tags")
shutil.rmtree('dist')
shutil.rmtree('build')
shutil.rmtree("dist")
shutil.rmtree("build")
sys.exit()


setup(
name='django-request-logging',
name="django-request-logging",
version=version,
description='Django middleware that logs http request body.',
long_description=read('README.md'),
long_description_content_type='text/markdown',
url='https://github.com/Rhumbix/django-request-logging.git',
author='Rhumbix',
author_email='dev@rhumbix.com',
license='MIT',
packages=['request_logging'],
install_requires=[
'Django',
],
zip_safe=False
description="Django middleware that logs http request body.",
long_description=read("README.md"),
long_description_content_type="text/markdown",
url="https://github.com/Rhumbix/django-request-logging.git",
author="Rhumbix",
author_email="dev@rhumbix.com",
license="MIT",
packages=["request_logging"],
install_requires=["Django"],
zip_safe=False,
)
Loading

0 comments on commit 9bfe9c9

Please sign in to comment.