Skip to content

Commit

Permalink
Improved python3 compatibility of lib tests log2timeline#1932 (log2ti…
Browse files Browse the repository at this point in the history
…meline#1934)

* Improved python3 compatibility of lib tests log2timeline#1927
  • Loading branch information
Onager committed Jun 11, 2018
1 parent 5fe5562 commit e8f0df9
Show file tree
Hide file tree
Showing 16 changed files with 201 additions and 121 deletions.
2 changes: 1 addition & 1 deletion plaso/engine/filter_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def BuildFindSpecs(self, environment_variables=None):
path_attributes[attribute_name] = attribute_value

find_specs = []
with open(self._path, 'rb') as file_object:
with open(self._path, 'r') as file_object:
for line in file_object:
line = line.strip()
if line.startswith('#'):
Expand Down
158 changes: 102 additions & 56 deletions plaso/lib/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

from __future__ import unicode_literals

import binascii
import codecs
import itertools
import logging
import os

Expand All @@ -14,11 +15,11 @@ def ByteArrayCopyToString(byte_array, codepage='utf-8'):
"""Copies a UTF-8 encoded byte array into a Unicode string.
Args:
byte_array: A byte array containing an UTF-8 encoded string.
codepage: The codepage of the byte stream.
byte_array (bytes): byte stream containing an UTF-8 encoded string.
codepage (Optional[str]): codepage of the byte stream.
Returns:
A Unicode string.
str: Unicode string.
"""
byte_stream = b''.join(map(chr, byte_array))
return ByteStreamCopyToString(byte_stream, codepage=codepage)
Expand All @@ -28,44 +29,44 @@ def ByteStreamCopyToString(byte_stream, codepage='utf-8'):
"""Copies a UTF-8 encoded byte stream into a Unicode string.
Args:
byte_stream: A byte stream containing an UTF-8 encoded string.
codepage: The codepage of the byte stream.
byte_stream (bytes): byte stream containing an UTF-8 encoded string.
codepage (Optional[str]): codepage of the byte stream.
Returns:
A Unicode string.
str: Unicode string.
"""
try:
string = byte_stream.decode(codepage)
string = codecs.decode(byte_stream, codepage)
except UnicodeDecodeError:
logging.warning(
'Unable to decode {0:s} formatted byte stream.'.format(codepage))
string = byte_stream.decode(codepage, errors='ignore')
string = codecs.decode(byte_stream, codepage, errors='ignore')

string, _, _ = string.partition('\x00')
return string


def ByteStreamCopyToUTF16Stream(byte_stream, byte_stream_size=None):
def ByteStreamCopyToUTF16String(byte_stream, byte_stream_size=None):
"""Reads an UTF-16 formatted stream from a byte stream.
The UTF-16 formatted stream should be terminated by an end-of-string
character (\x00\x00). Otherwise the function reads up to the byte stream size.
Args:
byte_stream: The byte stream that contains the UTF-16 formatted stream.
byte_stream_size: Optional byte stream size or None if the entire
byte stream should be read.
byte_stream (bytes): byte stream that contains the UTF-16 formatted
stream.
byte_stream_size (Optional[int]): byte stream size or None if the entire
byte stream should be read.
Returns:
String containing the UTF-16 formatted stream.
bytes: UTF-16 formatted stream.
"""
byte_stream_index = 0
if not byte_stream_size:
byte_stream_size = len(byte_stream)

while byte_stream_index + 1 < byte_stream_size:
if (byte_stream[byte_stream_index] == b'\x00' and
byte_stream[byte_stream_index + 1] == b'\x00'):
if _StreamContainsUTF16NullTerminator(byte_stream, byte_stream_index):
break

byte_stream_index += 2
Expand All @@ -80,14 +81,14 @@ def ReadUTF16Stream(file_object, offset=None, byte_size=0):
an end-of-string character (\x00\x00) or up to the byte size.
Args:
file_object: A file-like object to read the data from.
offset: An offset into the file object data, if -1 or not set
the current location into the file object data is used.
byte_size: Maximum number of bytes to read or 0 if the function
should keep reading up to the end of file.
file_object (file): file-like object to read the data from.
offset (Optional[int]): offset into the file object data, if -1 or not set
the current location into the file object data is used.
byte_size (Optional[int]): maximum number of bytes to read or 0 if the
function should keep reading up to the end of file.
Returns:
An Unicode string.
str: Unicode string.
"""
if offset is not None:
file_object.seek(offset, os.SEEK_SET)
Expand Down Expand Up @@ -116,18 +117,19 @@ def UTF16StreamCopyToString(byte_stream, byte_stream_size=None):
character (\x00\x00). Otherwise the function reads up to the byte stream size.
Args:
byte_stream: The UTF-16 formatted byte stream.
byte_stream_size: The byte stream size or None if the entire byte stream
should be used.
byte_stream (bytes): UTF-16 formatted byte stream.
byte_stream_size (Optional[int]): byte stream size or None if the entire
byte stream should be used.
Returns:
An Unicode string.
str: Unicode string.
"""
utf16_stream = ByteStreamCopyToUTF16Stream(
utf16_stream = ByteStreamCopyToUTF16String(
byte_stream, byte_stream_size=byte_stream_size)

try:
return utf16_stream.decode('utf-16-le')
string = codecs.decode(utf16_stream, 'utf-16-le')
return string
except (UnicodeDecodeError, UnicodeEncodeError) as exception:
logging.error('Unable to decode string: {0:s} with error: {1!s}'.format(
HexifyBuffer(utf16_stream), exception))
Expand All @@ -142,12 +144,12 @@ def ArrayOfUTF16StreamCopyToString(byte_stream, byte_stream_size=None):
character (\x00\x00). Otherwise the function reads up to the byte stream size.
Args:
byte_stream: The UTF-16 formatted byte stream.
byte_stream_size: The byte stream size or None if the entire byte stream
should be used.
byte_stream (str): UTF-16 formatted byte stream.
byte_stream_size (Optional[int]): byte stream size or None if the entire
byte stream should be used.
Returns:
An array of Unicode strings.
list[str]: Unicode strings.
"""
array_of_strings = []
utf16_stream_start = 0
Expand All @@ -156,15 +158,13 @@ def ArrayOfUTF16StreamCopyToString(byte_stream, byte_stream_size=None):
byte_stream_size = len(byte_stream)

while byte_stream_index + 1 < byte_stream_size:
if (byte_stream[byte_stream_index] == b'\x00' and
byte_stream[byte_stream_index + 1] == b'\x00'):

if _StreamContainsUTF16NullTerminator(byte_stream, byte_stream_index):
if byte_stream_index - utf16_stream_start <= 2:
break

array_of_strings.append(
byte_stream[utf16_stream_start:byte_stream_index].decode(
'utf-16-le'))
utf16_stream = byte_stream[utf16_stream_start:byte_stream_index]
string = codecs.decode(utf16_stream, 'utf-16-le')
array_of_strings.append(string)
utf16_stream_start = byte_stream_index + 2

byte_stream_index += 2
Expand All @@ -180,12 +180,13 @@ def ArrayOfUTF16StreamCopyToStringTable(byte_stream, byte_stream_size=None):
character (\x00\x00). Otherwise the function reads up to the byte stream size.
Args:
byte_stream: The UTF-16 formatted byte stream.
byte_stream_size: The byte stream size or None if the entire byte stream
should be used.
byte_stream (bytes): The UTF-16 formatted byte stream.
byte_stream_size (int): The byte stream size or None if the entire byte
stream should be used.
Returns:
A dict of Unicode strings with the byte offset as their key.
dict[int, str]: Unicode strings with their offset in the byte stream as
their key.
"""
string_table = {}
utf16_stream_start = 0
Expand All @@ -194,14 +195,13 @@ def ArrayOfUTF16StreamCopyToStringTable(byte_stream, byte_stream_size=None):
byte_stream_size = len(byte_stream)

while byte_stream_index + 1 < byte_stream_size:
if (byte_stream[byte_stream_index] == b'\x00' and
byte_stream[byte_stream_index + 1] == b'\x00'):

if _StreamContainsUTF16NullTerminator(byte_stream, byte_stream_index):
if byte_stream_index - utf16_stream_start <= 2:
break

string = byte_stream[utf16_stream_start:byte_stream_index].decode(
'utf-16-le')
utf16_stream = byte_stream[utf16_stream_start:byte_stream_index]
string = codecs.decode(utf16_stream, 'utf-16-le')
string_table[utf16_stream_start] = string
utf16_stream_start = byte_stream_index + 2

Expand All @@ -211,31 +211,77 @@ def ArrayOfUTF16StreamCopyToStringTable(byte_stream, byte_stream_size=None):


def ReadUTF16(string_buffer):
"""Returns a decoded UTF-16 string from a string buffer."""
"""Returns a decoded UTF-16 string from a string buffer.
Args:
string_buffer(bytes): byte string.
Returns:
str: Unicode string.
"""
if isinstance(string_buffer, (list, tuple)):
use_buffer = ''.join(string_buffer)
else:
use_buffer = string_buffer

if not isinstance(use_buffer, py2to3.STRING_TYPES):
if not isinstance(use_buffer, py2to3.BYTES_TYPE):
return ''

try:
return use_buffer.decode('utf-16').replace('\x00', '')
return codecs.decode(use_buffer, 'utf-16').replace('\x00', '')
except SyntaxError as exception:
logging.error('Unable to decode string: {0:s} with error: {1!s}.'.format(
HexifyBuffer(string_buffer), exception))
except (UnicodeDecodeError, UnicodeEncodeError) as exception:
logging.error('Unable to decode string: {0:s} with error: {1!s}'.format(
HexifyBuffer(string_buffer), exception))

return use_buffer.decode('utf-16', errors='ignore').replace('\x00', '')
return codecs.decode(
use_buffer, 'utf-16', errors='ignore').replace('\x00', '')


def HexifyBuffer(byte_sequence):
"""Returns an hexadecimal representation of a byte sequence.
Args:
byte_sequence (bytes): byte sequence.
Returns:
str: hexadecimal representation of the byte stream.
"""
hex_bytes = codecs.encode(byte_sequence, 'hex')
output_string = codecs.decode(hex_bytes, 'utf-8')
string_iterators = [iter(output_string)] * 2

# pylint: disable=no-member
if py2to3.PY_2:
iterators = itertools.izip_longest(*string_iterators)
else:
iterators = itertools.zip_longest(*string_iterators)
groups = list(iterators)
output_string = ''.join(
['\\x{0:s}{1:s}'.format(group[0], group[1]) for group in groups])
return output_string


def _StreamContainsUTF16NullTerminator(byte_stream, offset):
"""Checks if the given byte stream has a UTF-16 null character at the offset.
This is a little complicated because of the necessity of supporting Python 2
and 3.
def HexifyBuffer(string_buffer):
"""Return a string with the hex representation of a string buffer."""
chars = []
for char in string_buffer:
chars.append(binascii.hexlify(char))
Args:
byte_stream (bytes): byte string.
offset (int): byte stream offset to check.
return '\\x{0:s}'.format('\\x'.join(chars))
Returns:
bool: whether there's a UTF-16 null terminator in the stream at the given
offset.
"""
byte_1 = byte_stream[offset]
byte_2 = byte_stream[offset + 1]
if py2to3.PY_2 and byte_1 == b'\x00' and byte_2 == b'\x00':
return True
if py2to3.PY_3 and byte_1 == 0 and byte_2 == 0:
return True
return False
4 changes: 2 additions & 2 deletions plaso/lib/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def _CombineBinaryExpressions(self, operator):
self.stack[i-1] = None
self.stack[i+1] = None

self.stack = filter(None, self.stack)
self.stack = list(filter(None, self.stack))

def _CombineParenthesis(self):
"""Combine parenthesis."""
Expand All @@ -483,7 +483,7 @@ def _CombineParenthesis(self):
self.stack[i] = None
self.stack[i+2] = None

self.stack = filter(None, self.stack)
self.stack = list(filter(None, self.stack))

def Reduce(self):
"""Reduce the token stack into an AST."""
Expand Down
23 changes: 10 additions & 13 deletions plaso/lib/loggers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,22 @@
import gzip
import logging

from plaso.lib import py2to3


class CompressedFileHandler(logging.FileHandler):
"""Compressed file handler for logging."""

def __init__(self, filename, mode='a', encoding=None):
def __init__(self, filename, mode='a', encoding='utf-8'):
"""Initializes a compressed file logging handler.
Args:
filename (str): name of the log file.
mode (Optional[str]): file access mode.
encoding (Optional[str]): encoding of the log lines.
"""
if 't' not in mode and encoding and py2to3.PY_3:
mode = '{0:s}t'.format(mode)
super(CompressedFileHandler, self).__init__(
filename, mode=mode, encoding=encoding, delay=True)

Expand All @@ -28,19 +32,12 @@ def _open(self):
file: file-like object of the resulting stream.
"""
# The gzip module supports directly setting encoding as of Python 3.3.
return gzip.open(self.baseFilename, self.mode)

def emit(self, record):
"""Emits a record.
Args:
record (logging.LogRecord): log record.
"""
if self.encoding:
record = record.encode(self.encoding)

super(CompressedFileHandler, self).emit(record)
# pylint: disable=unexpected-keyword-arg
if py2to3.PY_3:
return gzip.open(
self.baseFilename, mode=self.mode, encoding=self.encoding)

return gzip.open(self.baseFilename, self.mode)

def ConfigureLogging(
debug_output=False, filename=None, mode='w', quiet_mode=False):
Expand Down

0 comments on commit e8f0df9

Please sign in to comment.