Skip to content

Commit

Permalink
Code review: 343810043: Initial work on defining text formats
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Apr 27, 2018
1 parent f91b5f0 commit 90ca998
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 81 deletions.
2 changes: 1 addition & 1 deletion config/dpkg/changelog
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ plaso (20180427-1) unstable; urgency=low

* Auto-generated

-- Log2Timeline <log2timeline-dev@googlegroups.com> Fri, 27 Apr 2018 05:33:48 +0200
-- Log2Timeline <log2timeline-dev@googlegroups.com> Fri, 27 Apr 2018 05:45:29 +0200
18 changes: 9 additions & 9 deletions plaso/engine/extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,11 @@ def __init__(self, parser_filter_expression=None):
super(EventExtractor, self).__init__()
self._file_scanner = None
self._filestat_parser = None
self._formats_with_signatures = None
self._mft_parser = None
self._non_sigscan_parser_names = None
self._parsers = None
self._specification_store = None
self._parsers_profiler = None
self._usnjrnl_parser = None

self._InitializeParserObjects(
Expand Down Expand Up @@ -93,7 +94,7 @@ def _GetSignatureMatchParserNames(self, file_object):

for scan_result in iter(scan_state.scan_results):
format_specification = (
self._specification_store.GetSpecificationBySignature(
self._formats_with_signatures.GetSpecificationBySignature(
scan_result.identifier))

if format_specification.identifier not in parser_names:
Expand All @@ -117,18 +118,17 @@ def _InitializeParserObjects(self, parser_filter_expression=None):
* A name of a single parser (case insensitive), e.g. msiecf.
* A glob name for a single parser, e.g. '*msie*' (case insensitive).
"""
self._specification_store, non_sigscan_parser_names = (
parsers_manager.ParsersManager.GetSpecificationStore(
self._formats_with_signatures, non_sigscan_parser_names = (
parsers_manager.ParsersManager.GetFormatsWithSignatures(
parser_filter_expression=parser_filter_expression))

self._non_sigscan_parser_names = []
for parser_name in non_sigscan_parser_names:
if parser_name in ('filestat', 'usnjrnl'):
continue
self._non_sigscan_parser_names.append(parser_name)
if parser_name not in ('filestat', 'usnjrnl'):
self._non_sigscan_parser_names.append(parser_name)

self._file_scanner = parsers_manager.ParsersManager.GetScanner(
self._specification_store)
self._file_scanner = parsers_manager.ParsersManager.CreateSignatureScanner(
self._formats_with_signatures)

self._parsers = parsers_manager.ParsersManager.GetParserObjects(
parser_filter_expression=parser_filter_expression)
Expand Down
22 changes: 16 additions & 6 deletions plaso/lib/specification.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def __init__(self, pattern, offset=None):
and that all of the data is scanned.
Args:
pattern (byte): pattern of the signature. Wildcards or regular pattern
(regexp) are not supported.
pattern (bytes): pattern of the signature. Wildcards or regular
expressions (regexp) are not supported.
offset (int): offset of the signature. None is used to indicate
the signature has no offset. A positive offset is relative from
the start of the data a negative offset is relative from the end
Expand All @@ -47,13 +47,16 @@ def SetIdentifier(self, identifier):
class FormatSpecification(object):
"""The format specification."""

def __init__(self, identifier):
def __init__(self, identifier, text_format=False):
"""Initializes a format specification.
Args:
identifier (str): unique name for the format.
text_format (Optional[bool]): True if the format is a text format,
False otherwise.
"""
super(FormatSpecification, self).__init__()
self._text_format = text_format
self.identifier = identifier
self.signatures = []

Expand All @@ -69,6 +72,14 @@ def AddNewSignature(self, pattern, offset=None):
"""
self.signatures.append(Signature(pattern, offset=offset))

def IsTextFormat(self):
"""Determines if the format is a text format.
Returns:
bool: True if the format is a text format, False otherwise.
"""
return self._text_format


class FormatSpecificationStore(object):
"""The store for format specifications."""
Expand Down Expand Up @@ -131,9 +142,8 @@ def AddSpecification(self, specification):
specification.identifier, signature_index)

if signature_identifier in self._signature_map:
raise KeyError(
'Signature {0:s} is already defined in map.'.format(
signature_identifier))
raise KeyError('Signature {0:s} is already defined in map.'.format(
signature_identifier))

signature.SetIdentifier(signature_identifier)
self._signature_map[signature_identifier] = specification
Expand Down
10 changes: 10 additions & 0 deletions plaso/parsers/dsv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from plaso.lib import errors
from plaso.lib import line_reader_file
from plaso.lib import py2to3
from plaso.lib import specification
from plaso.parsers import interface


Expand Down Expand Up @@ -117,6 +118,15 @@ def _CreateDictReader(self, parser_mediator, line_reader):
quotechar=quotechar, restkey=magic_test_string,
restval=magic_test_string)

@classmethod
def GetFormatSpecification(cls):
"""Retrieves the format specification.
Returns:
FormatSpecification: format specification.
"""
return specification.FormatSpecification(cls.NAME, text_format=True)

def ParseFileObject(self, parser_mediator, file_object, **unused_kwargs):
"""Parses a DSV text file-like object.
Expand Down
126 changes: 64 additions & 62 deletions plaso/parsers/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,37 @@ def _ReduceParserFilters(cls, includes, excludes):
for parser_name in parsers_to_pop:
excludes.pop(parser_name)

@classmethod
def CreateSignatureScanner(cls, specification_store):
"""Creates a signature scanner for format specifications with signatures.
Args:
specification_store (FormatSpecificationStore): format specifications
with signaures.
Returns:
pysigscan.scanner: signature scanner.
"""
scanner_object = pysigscan.scanner()

for format_specification in specification_store.specifications:
for signature in format_specification.signatures:
pattern_offset = signature.offset

if pattern_offset is None:
signature_flags = pysigscan.signature_flags.NO_OFFSET
elif pattern_offset < 0:
pattern_offset *= -1
signature_flags = pysigscan.signature_flags.RELATIVE_FROM_END
else:
signature_flags = pysigscan.signature_flags.RELATIVE_FROM_START

scanner_object.add_signature(
signature.identifier, pattern_offset, signature.pattern,
signature_flags)

return scanner_object

@classmethod
def DeregisterParser(cls, parser_class):
"""Deregisters a parser class.
Expand All @@ -167,6 +198,39 @@ def DeregisterParser(cls, parser_class):

del cls._parser_classes[parser_name]

@classmethod
def GetFormatsWithSignatures(cls, parser_filter_expression=None):
"""Retrieves the format specifications that have signatures.
This method will create a specification store for parsers that define
a format specification with signatures and a list of parser names for
those that do not.
Args:
parser_filter_expression (Optional[str]): parser filter expression,
where None represents all parsers and plugins.
Returns:
tuple: contains:
* FormatSpecificationStore: format specifications with signaures.
* list[str[: remaining parser names that do not have a format
specification with signatures.
"""
specification_store = specification.FormatSpecificationStore()
remainder_list = []

for parser_name, parser_class in cls.GetParsers(
parser_filter_expression=parser_filter_expression):
format_specification = parser_class.GetFormatSpecification()

if format_specification and format_specification.signatures:
specification_store.AddSpecification(format_specification)
else:
remainder_list.append(parser_name)

return specification_store, remainder_list

@classmethod
def GetNamesOfParsersWithPlugins(cls):
"""Retrieves the names of all parsers with plugins.
Expand Down Expand Up @@ -394,68 +458,6 @@ def GetPresetForOperatingSystem(

return None

@classmethod
def GetScanner(cls, specification_store):
"""Initializes a signature scanner form a specification store.
Args:
specification_store (FormatSpecificationStore): specification store.
Returns:
pysigscan.scanner: signature scanner.
"""
scanner_object = pysigscan.scanner()

for format_specification in specification_store.specifications:
for signature in format_specification.signatures:
pattern_offset = signature.offset

if pattern_offset is None:
signature_flags = pysigscan.signature_flags.NO_OFFSET
elif pattern_offset < 0:
pattern_offset *= -1
signature_flags = pysigscan.signature_flags.RELATIVE_FROM_END
else:
signature_flags = pysigscan.signature_flags.RELATIVE_FROM_START

scanner_object.add_signature(
signature.identifier, pattern_offset, signature.pattern,
signature_flags)

return scanner_object

@classmethod
def GetSpecificationStore(cls, parser_filter_expression=None):
"""Retrieves the specification store for the parsers.
This method will create a specification store for parsers that define
a format specification and a list of parser names for those that do not.
Args:
parser_filter_expression (Optional[str]): parser filter expression,
where None represents all parsers and plugins.
Returns:
tuple: contains:
* FormatSpecificationStore: format specification store.
* list[str[: remaining parser names that do not have a format
specification.
"""
specification_store = specification.FormatSpecificationStore()
remainder_list = []

for parser_name, parser_class in cls.GetParsers(
parser_filter_expression=parser_filter_expression):
format_specification = parser_class.GetFormatSpecification()

if format_specification is not None:
specification_store.AddSpecification(format_specification)
else:
remainder_list.append(parser_name)

return specification_store, remainder_list

@classmethod
def RegisterParser(cls, parser_class):
"""Registers a parser class.
Expand Down
8 changes: 5 additions & 3 deletions tests/parsers/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def testGetParsersFromPresetCategory(self):
self.assertEqual(parser_names, [])

def testReduceParserFilters(self):
"""Tests the ReduceParserFilters function."""
"""Tests the _ReduceParserFilters function."""
includes = {}
excludes = {}

Expand Down Expand Up @@ -168,6 +168,8 @@ def testReduceParserFilters(self):
self.assertEqual(includes, {'test': ['include']})
self.assertEqual(excludes, {'test': ['exclude', 'intersection']})

# TODO: add tests for CreateSignatureScanner.

def testParserRegistration(self):
"""Tests the RegisterParser and DeregisterParser functions."""
number_of_parsers = len(manager.ParsersManager._parser_classes)
Expand All @@ -185,6 +187,8 @@ def testParserRegistration(self):
len(manager.ParsersManager._parser_classes),
number_of_parsers)

# TODO: add tests for GetFormatsWithSignatures.

def testGetNamesOfParsersWithPlugins(self):
"""Tests the GetNamesOfParsersWithPlugins function."""
parsers_names = manager.ParsersManager.GetNamesOfParsersWithPlugins()
Expand Down Expand Up @@ -371,8 +375,6 @@ def testGetPlugins(self):
TestParserWithPlugins.DeregisterPlugin(TestPlugin)

# TODO: add tests for GetPresetForOperatingSystem.
# TODO: add tests for GetScanner.
# TODO: add tests for GetSpecificationStore.

def testPluginRegistration(self):
"""Tests the RegisterPlugin and DeregisterPlugin functions."""
Expand Down

0 comments on commit 90ca998

Please sign in to comment.