Skip to content

Commit

Permalink
Recover the compressed LZ4 strings for journald events (log2timeline#…
Browse files Browse the repository at this point in the history
  • Loading branch information
rgayon authored and Onager committed Jul 19, 2018
1 parent 631a476 commit 519dbf1
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 8 deletions.
12 changes: 11 additions & 1 deletion plaso/formatters/systemd_journal.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,14 @@ class SystemdJournalEventFormatter(interface.ConditionalEventFormatter):
SOURCE_SHORT = 'LOG'


manager.FormattersManager.RegisterFormatter(SystemdJournalEventFormatter)
# TODO: remove when PR #2004 is pushed
class SystemdJournalDirtyEventFormatter(SystemdJournalEventFormatter):
"""Formatter for a Systemd journal dirty event."""

DATA_TYPE = 'systemd:journal:dirty'

SOURCE_LONG = 'systemd-journal-dirty'


manager.FormattersManager.RegisterFormatters([
SystemdJournalEventFormatter, SystemdJournalDirtyEventFormatter])
44 changes: 37 additions & 7 deletions plaso/parsers/systemd_journal.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,24 @@ def __init__(self):
self.reporter = None


# TODO: remove once #2004 is pushed
class SystemdDirtyJournalEventData(SystemdJournalEventData):
"""Systemd 'dirty' journal event data.
Generated when a decompression error was encountered.
"""
DATA_TYPE = 'systemd:journal:dirty'


class SystemdJournalParser(interface.FileObjectParser):
"""Parses Systemd Journal files."""

NAME = 'systemd_journal'

DESCRIPTION = 'Parser for Systemd Journal files.'

_OBJECT_COMPRESSED_FLAG = 0x00000001
_OBJECT_COMPRESSED_FLAG_XZ = 1
_OBJECT_COMPRESSED_FLAG_LZ4 = 2

# Unfortunately this doesn't help us knowing about the "dirtiness" or
# "corrupted" file state.
Expand Down Expand Up @@ -190,7 +200,7 @@ def _ParseItem(self, file_object, offset):
offset (int): offset to the DATA object.
Returns:
tuple[str, str]: key and value of this item.
tuple[str, str, bool]: key and value of this item, with a 'dirty' bit.
Raises:
ParseError: When an unexpected object type is parsed.
Expand All @@ -203,13 +213,23 @@ def _ParseItem(self, file_object, offset):
'Expected an object of type DATA, but got {0:s}'.format(
object_header.type))

dirty = False
event_data = file_object.read(payload_size - self._DATA_OBJECT_SIZE)
if object_header.flags & self._OBJECT_COMPRESSED_FLAG:
if object_header.flags & self._OBJECT_COMPRESSED_FLAG_XZ:
event_data = lzma.decompress(event_data)
event_string = event_data.decode('utf-8')
elif object_header.flags & self._OBJECT_COMPRESSED_FLAG_LZ4:
# TODO: implement proper LZ4 decompression (see PR #2004)
dirty = True
event_string = event_data.decode('utf-8', 'ignore')
pos = event_string.index('MESSAGE=')
if pos >= 0:
event_string = 'MESSAGE=' + event_string[pos+8:]
else:
event_string = event_data.decode('utf-8')

event_string = event_data.decode('utf-8')
event_key, event_value = event_string.split('=', 1)
return (event_key, event_value)
return (event_key, event_value, dirty)

def _ParseJournalEntry(self, parser_mediator, file_object, offset):
"""Parses a Systemd journal ENTRY object.
Expand All @@ -235,12 +255,17 @@ def _ParseJournalEntry(self, parser_mediator, file_object, offset):
object_header.type))

fields = {}
dirty = False
for item in entry_object.object_items:
if item.object_offset < self._max_journal_file_offset:
raise errors.ParseError(
'object offset should be after hash tables ({0:d} < {1:d})'.format(
offset, self._max_journal_file_offset))
key, value = self._ParseItem(file_object, item.object_offset)
# TODO: remove the dirty variable once #2004 is pushed
key, value, _dirty = self._ParseItem(file_object, item.object_offset)
# We parse a lot of items for one event, we need to remember if one of the
# parsed item was dirty.
dirty = dirty or _dirty
fields[key] = value

reporter = fields.get('SYSLOG_IDENTIFIER', None)
Expand All @@ -249,7 +274,12 @@ def _ParseJournalEntry(self, parser_mediator, file_object, offset):
else:
pid = None

event_data = SystemdJournalEventData()
# TODO: remove the dirty variable once #2004 is pushed
if dirty:
event_data = SystemdDirtyJournalEventData()
else:
event_data = SystemdJournalEventData()

event_data.body = fields['MESSAGE']
event_data.hostname = fields['_HOSTNAME']
event_data.pid = pid
Expand Down
Binary file added test_data/systemd/journal/system.journal.lz4
Binary file not shown.
34 changes: 34 additions & 0 deletions tests/parsers/systemd_journal.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,40 @@ def testParse(self):
expected_short_message = '{0:s}...'.format(expected_message[:77])
self._TestGetMessageStrings(event, expected_message, expected_short_message)

def testParseLZ4(self):
"""Tests the Parse function on a journal with LZ4 compressed events."""
parser = systemd_journal.SystemdJournalParser()
storage_writer = self._ParseFile([
'systemd', 'journal', 'system.journal.lz4'], parser)

self.assertEqual(storage_writer.number_of_events, 85)

events = list(storage_writer.GetEvents())

event = events[0]

self.CheckTimestamp(event.timestamp, '2018-07-03 15:00:16.682340')

expected_message = 'testlol [systemd, pid: 822] Reached target Paths.'
self._TestGetMessageStrings(event, expected_message, expected_message)

# This event uses LZ4 compressed data
event = events[84]

self.CheckTimestamp(event.timestamp, '2018-07-03 15:19:04.667807')

# We don't suport lz4 decompression yet. So the output is slightly borked.
expected_message = (
# source: https://github.com/systemd/systemd/issues/6237
'testlol [test, pid: 34757] textual user names. Yes, as you found out'
' 0day is not a valid username. I wonder which tool permitted you to '
'create it in the first place. Note thatc\x00\x03\x3d\x00ing'
' numeric.\x00,characters is done on purpose: to avoid ambiguities '
'betweenJ\x00\x7fUID and\x00Pames.'
)
expected_short_message = '{0:s}...'.format(expected_message[:77])
self._TestGetMessageStrings(event, expected_message, expected_short_message)

def testParseDirty(self):
"""Tests the Parse function on a 'dirty' journal file."""
storage_writer = self._CreateStorageWriter()
Expand Down

0 comments on commit 519dbf1

Please sign in to comment.