Skip to content
This repository has been archived by the owner on Jan 12, 2022. It is now read-only.

Send structured logs to Cloud Logging #18

Merged
merged 1 commit into from
Jul 23, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions multicore_runtime/cloud_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Provide a handler to log to Cloud Logging in JSON."""

import json
import logging
import math
import os

LOG_PATH_TEMPLATE = '/var/log/app_engine/app.{}.json'


class CloudLoggingHandler(logging.FileHandler):
"""A handler that emits logs to Cloud Logging.

Writes to the Cloud Logging directory, wrapped in JSON and with appropriate
metadata. The process of converting the user's formatted logs into a JSON
payload for Cloud Logging consumption is implemented as part of the handler
itself, and not as a formatting step, so as not to interfere with user-defined
logging formats.

The handler will look for HTTP header information in the environment (which
will be present in the GAE 1.0-compatible runtime) and, if it exists, will
parse the X-Cloud-Trace-Context header to add a Trace ID to each log record.

Logging calls can also alternatively 'trace_id' in as a field in the 'extra'
dict, which will be used preferentially to fill out the Trace ID metadata.
"""

def __init__(self):
# Large log entries will get mangled if multiple workers write to the same
# file simultaneously, so we'll use the worker's PID to pick a log filename.
filename = LOG_PATH_TEMPLATE.format(os.getpid())
super(CloudLoggingHandler, self).__init__(filename, delay=True)

def format(self, record):
"""Format the specified record default behavior, plus JSON and metadata."""
# First, let's just get the log string as it would normally be formatted.
message = super(CloudLoggingHandler, self).format(record)

# Now assemble a dictionary with the log string as the message.
payload = {
'message': message,
'timestamp': {'seconds': int(record.created),
'nanos': int(math.modf(record.created)[0] * 1000000000)},
'thread': record.thread,
'severity': record.levelname,}

# Now make a best effort to add the trace id.
# First, try to get the trace id from the 'extras' of the record.
trace_id = getattr(record, 'trace_id', None)

# If that didn't work, check if HTTP headers are present in the
# environment (GAE 1.0-style), and use them to parse out the Trace ID.
if not trace_id:
trace_id = os.getenv('X-Cloud-Trace-Context', '')[:16]

# Now add a traceID key to the payload, if one was found.
if trace_id:
payload['traceId'] = trace_id

return json.dumps(payload)
78 changes: 78 additions & 0 deletions multicore_runtime/cloud_logging_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
import logging
import mock
import os
import unittest

import cloud_logging


class CloudLoggingTestCase(unittest.TestCase):

EXPECTED_TRACE_ID = None
EXPECTED_OVERRIDDEN_TRACE_ID = '1234123412341234'
EXPECTED_MESSAGE = 'test message'
TEST_TIME = 1437589520.830589056
EXPECTED_SECONDS = 1437589520
EXPECTED_NANOS = 830589056

def setUp(self):
self.handler = cloud_logging.CloudLoggingHandler()
with mock.patch('time.time', return_value=self.TEST_TIME):
self.record = logging.makeLogRecord({'msg': self.EXPECTED_MESSAGE,
'levelname': 'INFO'})
self.record_with_extra = logging.makeLogRecord(
{'msg': self.EXPECTED_MESSAGE,
'levelname': 'INFO',
'trace_id': self.EXPECTED_OVERRIDDEN_TRACE_ID,})

def test_file_name_is_correct(self):
self.assertTrue(self.handler.baseFilename.startswith(
'/var/log/app_engine/app.'))
self.assertTrue(self.handler.baseFilename.endswith('.json'))

def test_format(self):
msg = self.handler.format(self.record)
payload = json.loads(msg)
if self.EXPECTED_TRACE_ID:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you really need self.EXPECTED_TRACE_ID since it is None?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I use the two-class system as here, then yes. This method is run twice, once in each class with a different env setup and a different expected result. I think this is idiomatic for unittest style.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, that's right. Sorry, I missed the two-class part. It is fine.

self.assertEquals(payload['traceId'], self.EXPECTED_TRACE_ID)
else:
self.assertNotIn('traceId', payload)

def test_format_with_trace_id_as_extra(self):
msg = self.handler.format(self.record_with_extra)
payload = json.loads(msg)
self.assertEquals(payload['traceId'], self.EXPECTED_OVERRIDDEN_TRACE_ID)

def test_format_timestamp(self):
msg = self.handler.format(self.record)
payload = json.loads(msg)
self.assertEquals(payload['timestamp']['seconds'], self.EXPECTED_SECONDS)
self.assertEquals(payload['timestamp']['nanos'], self.EXPECTED_NANOS)


class CloudLoggingTestCaseWithTraceIdEnv(CloudLoggingTestCase):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you really need a separate class for Env?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need a separate class because the environment variable needs to be torn down even on test failure. The safe way to do this is to set it in setUp and remove it in tearDown, which is per-class.

An alternative would be to mock os.environ instead of actually setting the env variable, but that seemed more complex than necessary, and running the same tests under two different conditions via class inheritance is idiomatic for unittest.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the setup and teardown for the second test class is simple, we could do it in the test function. Anyway, up to you.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the test fails or errors out the method will terminate immediately, without running the following code. tearDown reliably executes even in failure or error cases.


EXPECTED_TRACE_ID = '0101010102020202'

def setUp(self):
super(CloudLoggingTestCaseWithTraceIdEnv, self).setUp()
os.environ['X-Cloud-Trace-Context'] = '{}/12345;o=1'.format(
self.EXPECTED_TRACE_ID)

def tearDown(self):
os.unsetenv('X-Cloud-Trace-Context')
7 changes: 6 additions & 1 deletion multicore_runtime/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import logging
import os

import cloud_logging
from dispatcher import dispatcher
from middleware import health_check_middleware
from middleware import reset_environment_middleware
Expand All @@ -36,8 +37,12 @@
from google.appengine.ext.vmruntime import vmconfig
from google.appengine.ext.vmruntime import vmstub

logging.basicConfig(level=logging.INFO)
# Configure logging to output structured JSON to Cloud Logging.
handler = cloud_logging.CloudLoggingHandler()
handler.setLevel(logging.INFO)
logging.getLogger('').addHandler(handler)

# Fetch application configuration via the config file.
appinfo = get_module_config(get_module_config_filename())
env_config = vmconfig.BuildVmAppengineEnvConfig()

Expand Down