diff --git a/rtfdump.py b/rtfdump.py index e1b6399..d4cb7a8 100644 --- a/rtfdump.py +++ b/rtfdump.py @@ -2,8 +2,8 @@ __description__ = 'Analyze RTF files' __author__ = 'Didier Stevens' -__version__ = '0.0.9' -__date__ = '2018/12/09' +__version__ = '0.0.10' +__date__ = '2020/12/23' """ @@ -35,6 +35,14 @@ 2017/12/24: 0.0.7 made changes level 0 -> remainder 2018/12/07: 0.0.8 added support for -s a; added selection warning; added option -A; added yara #x# #r#; updated ParseCutTerm; added --jsonoutput 2018/12/09: 0.0.9 changed extra output for remainder + 2019/03/13: 0.0.10 bug fixes + 2019/03/15: continue; package; filter h>int + 2019/10/09: bug fix + 2019/10/27: introduced environment variable DSS_DEFAULT_HASH_ALGORITHMS + 2020/12/06: Python 3, option -n + 2020/12/07: Bug fixes, special characters, removed option -n; added options -v -T; cutdata update + 2020/12/08: added option -I + 2020/12/23: added option -O Todo: """ @@ -43,17 +51,22 @@ import sys import os import zipfile -import cStringIO import binascii import textwrap import re import string import hashlib import json +import zlib +import struct if sys.version_info[0] >= 3: from io import StringIO else: from cStringIO import StringIO +if sys.version_info[0] >= 3: + from io import BytesIO as BytesIO +else: + from cStringIO import StringIO as BytesIO try: import yara @@ -67,6 +80,15 @@ def PrintManual(): manual = ''' Manual: +This manual is a work in progress. + +By default, this tool uses the MD5 hash in reports, but this can be changed by setting environment variable DSS_DEFAULT_HASH_ALGORITHMS. +Like this: set DSS_DEFAULT_HASH_ALGORITHMS=SHA256 + +With option -T (--headtail), output can be truncated to the first 10 lines and last 10 lines of output. + +With option -j, oledump will output the content of the ole file as a JSON object that can be piped into other tools that support this JSON format. + ''' for line in manual.split('\n'): print(textwrap.fill(line)) @@ -164,14 +186,14 @@ def HexAsciiDump(self, rle=False): if i % self.dumplinelength == self.dumplinelength / 2: hexDump += ' ' hexDump += ' %02X' % b - asciiDump += IFF(b >= 32 and b < 128, chr(b), '.') + asciiDump += IFF(b >= 32 and b < 127, chr(b), '.') if countRLE > 0: oDumpStream.Addline('* %d 0x%02x' % (countRLE, countRLE * self.dumplinelength)) oDumpStream.Addline(self.CombineHexAscii(position + hexDump, asciiDump)) return oDumpStream.Content() def Base64Dump(self, nowhitespace=False): - encoded = binascii.b2a_base64(self.data) + encoded = binascii.b2a_base64(self.data).decode().strip() if nowhitespace: return encoded oDumpStream = self.cDumpStream(self.prefix) @@ -205,15 +227,44 @@ def HexDump(data): def HexAsciiDump(data, rle=False): return cDump(data, dumplinelength=dumplinelength).HexAsciiDump(rle=rle) +def RemoveLeadingEmptyLines(data): + if data[0] == '': + return RemoveLeadingEmptyLines(data[1:]) + else: + return data + +def RemoveTrailingEmptyLines(data): + if data[-1] == '': + return RemoveTrailingEmptyLines(data[:-1]) + else: + return data + +def HeadTail(data, apply): + count = 10 + if apply: + lines = RemoveTrailingEmptyLines(RemoveLeadingEmptyLines(data.split('\n'))) + if len(lines) <= count * 2: + return data + else: + return '\n'.join(lines[0:count] + ['...'] + lines[-count:]) + else: + return data + #Fix for http://bugs.python.org/issue11395 def StdoutWriteChunked(data): - while data != '': - sys.stdout.write(data[0:10000]) - try: - sys.stdout.flush() - except IOError: - return - data = data[10000:] + if sys.version_info[0] > 2: + if isinstance(data, str): + sys.stdout.write(data) + else: + sys.stdout.buffer.write(data) + else: + while data != '': + sys.stdout.write(data[0:10000]) + try: + sys.stdout.flush() + except IOError: + return + data = data[10000:] def IfWIN32SetBinary(io): if sys.platform == 'win32': @@ -242,86 +293,9 @@ def ProcessAt(argument): else: return [argument] -def AddPlugin(cClass): - global plugins - - plugins.append(cClass) - def ExpandFilenameArguments(filenames): return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), []))) -class cPluginParent(): - macroOnly = False - -def LoadPlugins(plugins, verbose): - if plugins == '': - return - scriptPath = os.path.dirname(sys.argv[0]) - for plugin in sum(map(ProcessAt, plugins.split(',')), []): - try: - if not plugin.lower().endswith('.py'): - plugin += '.py' - if os.path.dirname(plugin) == '': - if not os.path.exists(plugin): - scriptPlugin = os.path.join(scriptPath, plugin) - if os.path.exists(scriptPlugin): - plugin = scriptPlugin - exec open(plugin, 'r') in globals(), globals() - except Exception as e: - print('Error loading plugin: %s' % plugin) - if verbose: - raise e - -def AddDecoder(cClass): - global decoders - - decoders.append(cClass) - -class cDecoderParent(): - pass - -def LoadDecoders(decoders, verbose): - if decoders == '': - return - scriptPath = os.path.dirname(sys.argv[0]) - for decoder in sum(map(ProcessAt, decoders.split(',')), []): - try: - if not decoder.lower().endswith('.py'): - decoder += '.py' - if os.path.dirname(decoder) == '': - if not os.path.exists(decoder): - scriptDecoder = os.path.join(scriptPath, decoder) - if os.path.exists(scriptDecoder): - decoder = scriptDecoder - exec open(decoder, 'r') in globals(), globals() - except Exception as e: - print('Error loading decoder: %s' % decoder) - if verbose: - raise e - -class cIdentity(cDecoderParent): - name = 'Identity function decoder' - - def __init__(self, stream, options): - self.stream = stream - self.options = options - self.available = True - - def Available(self): - return self.available - - def Decode(self): - self.available = False - return self.stream - - def Name(self): - return '' - -def DecodeFunction(decoders, options, stream): - if decoders == []: - return stream - return decoders[0](stream, options.decoderoptions).Decode() - class cIteminfo(): def __init__(self, level, beginPosition, endPosition, countChildren): self.level = level @@ -334,7 +308,7 @@ def BuildTree(rtfdata, level, index, sequence, options): level += 1 if index >= len(rtfdata): return None - if rtfdata[index] != '{': + if rtfdata[index:index+1] != b'{': error = 'Parser error: Expected character {' print(error) raise Exception(error) @@ -343,10 +317,10 @@ def BuildTree(rtfdata, level, index, sequence, options): sequence.append(oIteminfo) index += 1 while index < len(rtfdata): - if rtfdata[index] == '{' and (index == 0 or rtfdata[index - 1] != '\\'): + if rtfdata[index:index+1] == b'{' and (index == 0 or rtfdata[index - 1:index] != b'\\'): index = BuildTree(rtfdata, level, index, sequence, options) children += 1 - elif rtfdata[index] == '}' and (index == 0 or rtfdata[index - 1] != '\\'): + elif rtfdata[index:index+1] == b'}' and (index == 0 or rtfdata[index - 1:index] != b'\\'): oIteminfo.endPosition = index oIteminfo.countChildren = children return index + 1 @@ -359,77 +333,94 @@ def BuildTree(rtfdata, level, index, sequence, options): return index def Trimdde(data): - if not data.startswith('\r\n\\dde'): + if not data.startswith(b'\r\n\\dde'): return data data = data[6:] counter = 0 - while data[counter] == '0' and counter < 250: + while data[counter] == b'0' and counter < 250: counter += 1 return data[counter:] -def ExtractHex(data): +def ExtractHex(data, controlwordsToIgnore): + # special characters http://latex2rtf.sourceforge.net/rtfspec_7.html#rtfspec_specialchar + # sample 5c3d12b29a1bb9fb775bb6d862a32ae8e89af943b6337c71fe2268dee70055e9 + control_words_special_characters = [b'\\qmspace'] + data = Trimdde(data) - if data.startswith('\r\n\\dde'): + if data.startswith(b'\r\n\\dde'): print(repr(data[0:40])) backslash = False - backslashtext = '' - hexstring = [cStringIO.StringIO()] + backslashtext = b'' + hexstring = [StringIO()] countUnexpectedCharacters = 0 binstatus = 0 - binnumber = '' - bintext = '' + binnumber = b'' + bintext = b'' + specialcharacterStatus = False i = 0 while i < len(data): - char = data[i] - if binstatus > 0: + if sys.version_info[0] >= 3: + byte = bytes([data[i]]) + else: + byte = data[i] + if specialcharacterStatus: + if byte == b'}': + specialcharacterStatus = False + else: + pass + elif binstatus > 0: if binstatus == 1: - if char in string.digits: - binnumber += char + if byte in string.digits.encode(): + binnumber += byte else: binstatus = 2 - if binnumber == '': + if binnumber == b'': binint = 0 else: binint = int(binnumber) & 0xFFFFFFFF - bintext = '' + bintext = b'' if binint == 0: binstatus = 0 - if not char in string.whitespace: + if not byte in string.whitespace.encode(): i -= 1 elif binstatus == 2: - bintext += char + bintext += byte binint -= 1 if binint == 0: binstatus = 0 hexstring.append(['bin', bintext]) - hexstring.append(cStringIO.StringIO()) - binnumber = '' - bintext = '' + hexstring.append(StringIO()) + binnumber = b'' + bintext = b'' elif backslash: - if char in string.letters or char in string.digits or char == '-': - backslashtext += char - if backslashtext == '\\bin': + if byte in string.ascii_letters.encode() or byte in string.digits.encode() or byte == b'-': + backslashtext += byte + if backslashtext == b'\\bin': binstatus = 1 - binnumber = '' + binnumber = b'' backslash = False - backslashtext = '' - elif backslashtext == '\\': + backslashtext = b'' + if backslashtext in control_words_special_characters + controlwordsToIgnore: + specialcharacterStatus = True + elif backslashtext == b'\\': backslash = False - backslashtext = '' + backslashtext = b'' else: # if backslashtext != '\\-': # print(repr(backslashtext)) backslash = False - backslashtext = '' + backslashtext = b'' i -= 1 - elif char == '\\': + elif byte == b'\\': backslash = True - backslashtext = char - elif char in string.hexdigits: - hexstring[-1].write(char) - elif char in string.whitespace: + backslashtext = byte + elif byte in string.hexdigits.encode(): + hexstring[-1].write(byte.decode()) + elif byte in string.whitespace.encode(): + pass + elif byte == b'{': pass - elif char in ['{', '}']: + elif byte == b'}': pass else: countUnexpectedCharacters += 1 @@ -438,59 +429,149 @@ def ExtractHex(data): # if not char in ['\0']: # raise('xxx') i += 1 - return [IFF(isinstance(x, list), x, lambda: x.getvalue()) for x in hexstring], max([''] + re.findall('[0-9a-f]+', data, re.I), key=len), countUnexpectedCharacters + return [IFF(isinstance(x, list), x, lambda: x.getvalue()) for x in hexstring], max([b''] + re.findall(b'[0-9a-f]+', data, re.I), key=len), countUnexpectedCharacters + +def ReadWORD(data): + format = ' 0 and word3 <= len(data): - name = data[:word3] - data = data[word3:] - else: - return [] - word4, data = ReadDWORD(data) - if word4 == None or word4 != 0x00000000: + topicname, data = ReadDWORDString(data) + if topicname == None: return [] - word5, data = ReadDWORD(data) - if word5 == None or word5 != 0x00000000: + itemname, data = ReadDWORDString(data) + if itemname == None: return [] sizeEmbedded, data = ReadDWORD(data) if sizeEmbedded == None: return [] - position = 6*4 + word3 - return [name, position, sizeEmbedded, hashlib.md5(dataSave[position:position + sizeEmbedded]).hexdigest(), binascii.b2a_hex(dataSave[position:position + 4])] + position = len(dataSave) - len(data) + content = dataSave[position:position + sizeEmbedded] + + if classname == b'Package\x00': + result = ExtractPackage00(content) + if result != []: + classname = classname + b':' + result[0] + position = position + result[3] + sizeEmbedded = result[4] + content = result[5] + + return [classname, position, sizeEmbedded, CalculateChosenHash(content), binascii.b2a_hex(content[:4]), content] def Info(data): result = ExtractOleInfo(data) if result == []: return 'Error: extraction failed' - return 'Name: %s\nPosition embedded: %08x\nSize embedded: %08x\nmd5: %s\nmagic: %s\n' % (repr(result[0]), result[1], result[2], result[3], result[4]) + return 'Name: %s\nPosition embedded: %08x\nSize embedded: %08x\n%s: %s\nmagic: %s\n' % (repr(result[0]), result[1], result[2], result[3][1], result[3][0], result[4]) + +def ExtractPackage(data): + result = ExtractOleInfo(data) + return data[result[1]:result[1] + result[2]] CUTTERM_NOTHING = 0 CUTTERM_POSITION = 1 CUTTERM_FIND = 2 CUTTERM_LENGTH = 3 -def ExtractPackage(data): - result = ExtractOleInfo(data) - return data[result[1]:result[1] + result[2]] - def Replace(string, dReplacements): if string in dReplacements: return dReplacements[string] @@ -528,7 +609,7 @@ def ParseCutTerm(argument): value = -value return CUTTERM_POSITION, value, argument[len(oMatch.group(0)):] if oMatch == None: - oMatch = re.match(r"\[\'(.+?)\'\](\d+)?([+-](?:0x[0-9a-f]+|\d+))?", argument) + oMatch = re.match(r"\[u?\'(.+?)\'\](\d+)?([+-](?:0x[0-9a-f]+|\d+))?", argument) else: if len(oMatch.group(1)) % 2 == 1: raise Exception("Uneven length hexadecimal string") @@ -537,7 +618,12 @@ def ParseCutTerm(argument): if oMatch == None: return None, None, argument else: - return CUTTERM_FIND, (oMatch.group(1), int(Replace(oMatch.group(2), {None: '1'})), ParseInteger(Replace(oMatch.group(3), {None: '0'}))), argument[len(oMatch.group(0)):] + if argument.startswith("[u'"): + # convert ascii to unicode 16 byte sequence + searchtext = oMatch.group(1).decode('unicode_escape').encode('utf16')[2:] + else: + searchtext = oMatch.group(1) + return CUTTERM_FIND, (searchtext, int(Replace(oMatch.group(2), {None: '1'})), ParseInteger(Replace(oMatch.group(3), {None: '0'}))), argument[len(oMatch.group(0)):] def ParseCutArgument(argument): type, value, remainder = ParseCutTerm(argument.strip()) @@ -571,8 +657,8 @@ def ParseCutArgument(argument): else: return typeLeft, valueLeft, type, value -def Find(data, value, nth): - position = -1 +def Find(data, value, nth, startposition=-1): + position = startposition while nth > 0: position = data.find(value, position + 1) if position == -1: @@ -582,12 +668,12 @@ def Find(data, value, nth): def CutData(stream, cutArgument): if cutArgument == '': - return stream + return [stream, None, None] typeLeft, valueLeft, typeRight, valueRight = ParseCutArgument(cutArgument) if typeLeft == None: - return stream + return [stream, None, None] if typeLeft == CUTTERM_NOTHING: positionBegin = 0 @@ -596,7 +682,7 @@ def CutData(stream, cutArgument): elif typeLeft == CUTTERM_FIND: positionBegin = Find(stream, valueLeft[0], valueLeft[1]) if positionBegin == -1: - return '' + return ['', None, None] positionBegin += valueLeft[2] else: raise Exception("Unknown value typeLeft") @@ -610,21 +696,21 @@ def CutData(stream, cutArgument): elif typeRight == CUTTERM_LENGTH: positionEnd = positionBegin + valueRight elif typeRight == CUTTERM_FIND: - positionEnd = Find(stream, valueRight[0], valueRight[1]) + positionEnd = Find(stream, valueRight[0], valueRight[1], positionBegin) if positionEnd == -1: - return '' + return ['', None, None] else: positionEnd += len(valueRight[0]) positionEnd += valueRight[2] else: raise Exception("Unknown value typeRight") - return stream[positionBegin:positionEnd] + return [stream[positionBegin:positionEnd], positionBegin, positionEnd] def HexDecode(hexstream, options): if hexstream == None: - return '' - result = '' + return b'' + result = b'' for entry in hexstream: if isinstance(entry, str): if len(entry) % 2 == 1: @@ -676,24 +762,36 @@ def __init__(self, index, leader, level, beginPosition, endPosition, countChildr self.oleInfo = oleInfo def GenerateMAGIC(data): - return binascii.b2a_hex(data) + ' ' + ''.join([IFF(ord(c) >= 32, c, '.') for c in data]) + if sys.version_info[0] > 2: + return binascii.b2a_hex(data).decode() + ' ' + ''.join([IFF(c >= 32 and c < 127, chr(c), '.') for c in data]) + else: + return binascii.b2a_hex(data) + ' ' + ''.join([IFF(ord(c) >= 32 and c < 127, c, '.') for c in data]) -def RTFSub(oStringIO, prefix, rules, options): - global plugins - global decoders +def RTFSub(oBytesIO, prefix, rules, options): + returnCode = 0 if options.filter != '': - if not options.filter in ['O', 'h']: + options.filter = options.filter.replace(' ', '') + if options.filter == 'O': + options.filter = ['O'] + elif options.filter == 'h': + options.filter = ['h', 0] + elif options.filter.startswith('h>'): + options.filter = ['h', int(options.filter[2:])] + else: print('Unknown filter: %s' % options.filter) - return - - returnCode = 0 + return returnCode sys.setrecursionlimit(options.recursionlimit) + if options.ignore != '': + controlwordsToIgnore = options.ignore.encode().split(b',') + else: + controlwordsToIgnore = [] + counter = 1 - rtfdata = oStringIO.read() - if not rtfdata.startswith('{'): + rtfdata = oBytesIO.read() + if not rtfdata.startswith(b'{'): print('This file does not start with an opening brace: {\nCheck if it is an RTF file.\nMAGIC: %s' % GenerateMAGIC(rtfdata[0:4])) return -1 sequence = [] @@ -703,9 +801,9 @@ def RTFSub(oStringIO, prefix, rules, options): sequence.append(cIteminfo(0, sequence[0].endPosition + 1, len(rtfdata) - 1, 0)) dAnalysis = {} for oIteminfo in sequence: - controlWord = '' + controlWord = b'' if oIteminfo.level != 0: - oMatch = re.match(r'(\\\*)?\\[a-z]+(-?[0-9]+)? ?', rtfdata[oIteminfo.beginPosition + 1:]) + oMatch = re.match(r'(\\\*)?\\[a-z]+(-?[0-9]+)? ?'.encode(), rtfdata[oIteminfo.beginPosition + 1:]) if oMatch != None: controlWord = oMatch.group(0) beginContent = oIteminfo.beginPosition + 1 + len(controlWord) @@ -713,10 +811,10 @@ def RTFSub(oStringIO, prefix, rules, options): if beginContent < endContent: content = rtfdata[beginContent:endContent + 1] else: - content = '' + content = b'' else: content = rtfdata[oIteminfo.beginPosition:] - hexstring, longestContiguousHexstring, countUnexpectedCharacters = ExtractHex(content) + hexstring, longestContiguousHexstring, countUnexpectedCharacters = ExtractHex(content, controlwordsToIgnore) if oIteminfo.level == 0: leader = 'Remainder ' else: @@ -732,96 +830,128 @@ def RTFSub(oStringIO, prefix, rules, options): try: data = ExtractPackage(data) except: - data = '' - object.append({'id': counter, 'name': str(counter), 'content': binascii.b2a_base64(data).strip('\n')}) + data = b'' + object.append({'id': counter, 'name': str(counter), 'content': binascii.b2a_base64(data).decode().strip('\n')}) print(json.dumps({'version': 2, 'id': 'didierstevens.com', 'type': 'content', 'fields': ['id', 'name', 'content'], 'items': object})) return - if options.select == '': - for counter in range(1, len(dAnalysis) + 1): - hexcount, bincount = HexBinCount(dAnalysis[counter].hexstring) - if options.filter == '' or options.filter == 'O' and dAnalysis[counter].oleInfo != [] or options.filter == 'h' and hexcount > 0: - line = '%5d %s c=%5d p=%08x l=%8d h=%8d;%8d b=%8d %s u=%8d %s' % (counter, dAnalysis[counter].leader[0:15], dAnalysis[counter].countChildren, dAnalysis[counter].beginPosition, dAnalysis[counter].endPosition - dAnalysis[counter].beginPosition, hexcount, len(dAnalysis[counter].longestContiguousHexstring), bincount, IFF(dAnalysis[counter].oleInfo != [], 'O', ' '), dAnalysis[counter].countUnexpectedCharacters, dAnalysis[counter].controlWord.strip()) - if dAnalysis[counter].controlWord.strip() == '\\*\\objclass': - line += ' ' + dAnalysis[counter].content - linePrinted = False - if options.yara == None: - print(line) - if dAnalysis[counter].oleInfo != []: - print(' Name: %s Size: %d md5: %s magic: %s' % (repr(dAnalysis[counter].oleInfo[0]), dAnalysis[counter].oleInfo[2], dAnalysis[counter].oleInfo[3], dAnalysis[counter].oleInfo[4])) - if dAnalysis[counter].level == 0: - message = [] - countWhitespace = len([c for c in dAnalysis[counter].content if c in string.whitespace]) - countNull = dAnalysis[counter].content.count('\x00') - if countWhitespace == len(dAnalysis[counter].content): - message.append('Only whitespace = %d' % countWhitespace) - elif countNull == len(dAnalysis[counter].content): - message.append('Only NULL bytes = %d' % countNull) - elif countWhitespace + countNull == len(dAnalysis[counter].content): - message.append('Only whitespace = %d and NULL bytes = %d' % (countWhitespace, countNull)) - else: - if countWhitespace > 0: - message.append('Whitespace = %d' % countWhitespace) - if countNull > 0: - message.append('NULL bytes = %d' % countNull) - if dAnalysis[counter].content.count('{') > 0: - message.append('Left curly braces = %d' % dAnalysis[counter].content.count('{')) - if dAnalysis[counter].content.count('}') > 0: - message.append('Right curly braces = %d' % dAnalysis[counter].content.count('}')) - print(' ' + ' '.join(message)) - linePrinted = True - elif dAnalysis[counter].content != None: - stream = HexDecodeIfRequested(dAnalysis[counter], options) - oDecoders = [cIdentity(stream, None)] - for cDecoder in decoders: - try: - oDecoder = cDecoder(stream, options.decoderoptions) - oDecoders.append(oDecoder) - except Exception as e: - print('Error instantiating decoder: %s' % cDecoder.name) - if options.verbose: - raise e - return returnCode - for oDecoder in oDecoders: - while oDecoder.Available(): - for result in rules.match(data=oDecoder.Decode()): - if not linePrinted: - print(line) - linePrinted = True - print(' YARA rule%s: %s' % (IFF(oDecoder.Name() == '', '', ' (stream decoder: %s)' % oDecoder.Name()), result.rule)) - if options.yarastrings: - for stringdata in result.strings: - print(' %06x %s:' % (stringdata[0], stringdata[1])) - print(' %s' % binascii.hexlify(C2BIP3(stringdata[2]))) - print(' %s' % repr(stringdata[2])) - else: - if len(decoders) > 1: - print('Error: provide only one decoder when using option select') - return returnCode - if options.dump: - DumpFunction = lambda x:x - IfWIN32SetBinary(sys.stdout) - elif options.hexdump: - DumpFunction = HexDump - elif options.info: - DumpFunction = Info - elif options.asciidumprle: - DumpFunction = lambda x: HexAsciiDump(x, True) + if options.objects: + dObjects = {} + dHashes = {} + counter = 1 + for analysis in dAnalysis.values(): + if analysis.oleInfo != []: + if not analysis.oleInfo[3][0] in dHashes: + dHashes[analysis.oleInfo[3][0]] = counter + dObjects[counter] = analysis + counter += 1 + + if options.select == '': + for key in sorted(dObjects.keys()): + oleInfo = dObjects[key].oleInfo + print('%d: Name: %s' % (key, oleInfo[0])) + print(' Magic: %s' % oleInfo[4]) + print(' Size: %d' % oleInfo[2]) + print(' Hash: %s %s' % (oleInfo[3][1], oleInfo[3][0])) else: - DumpFunction = HexAsciiDump - if options.extract: - ExtractFunction = ExtractPackage + if options.dump: + DumpFunction = lambda x:x + IfWIN32SetBinary(sys.stdout) + elif options.hexdump: + DumpFunction = HexDump + elif options.info: + DumpFunction = Info + elif options.asciidumprle: + DumpFunction = lambda x: HexAsciiDump(x, True) + else: + DumpFunction = HexAsciiDump + if options.extract: + ExtractFunction = ExtractPackage + else: + ExtractFunction = lambda x:x + + keys = list(dObjects.keys()) + for key in keys: + if options.select != 'a' and options.select != str(key): + del dObjects[key] + if len(dObjects) == 0: + print('Warning: no item was selected with expression %s' % options.select) + return + for key in sorted(dObjects.keys()): + StdoutWriteChunked(HeadTail(DumpFunction(CutData(dObjects[key].oleInfo[5], options.cut)[0]), options.headtail)) + else: + if options.select == '': + for counter in range(1, len(dAnalysis) + 1): + hexcount, bincount = HexBinCount(dAnalysis[counter].hexstring) + if options.filter == '' or options.filter[0] == 'O' and dAnalysis[counter].oleInfo != [] or options.filter[0] == 'h' and hexcount > options.filter[1]: + length = dAnalysis[counter].endPosition - dAnalysis[counter].beginPosition - IFF(dAnalysis[counter].level == 0, 0, 1) + line = '%5d %s c=%5d p=%08x l=%8d h=%8d;%8d b=%8d %s u=%8d %s' % (counter, dAnalysis[counter].leader[0:15], dAnalysis[counter].countChildren, dAnalysis[counter].beginPosition, length, hexcount, len(dAnalysis[counter].longestContiguousHexstring), bincount, IFF(dAnalysis[counter].oleInfo != [], 'O', ' '), dAnalysis[counter].countUnexpectedCharacters, dAnalysis[counter].controlWord.decode().strip()) + if dAnalysis[counter].controlWord.strip() == b'\\*\\objclass': + line += ' ' + dAnalysis[counter].content.decode() + linePrinted = False + if options.yara == None: + print(line) + if dAnalysis[counter].oleInfo != []: + print(' Name: %s Size: %d %s: %s magic: %s' % (repr(dAnalysis[counter].oleInfo[0]), dAnalysis[counter].oleInfo[2], dAnalysis[counter].oleInfo[3][1], dAnalysis[counter].oleInfo[3][0], dAnalysis[counter].oleInfo[4].decode())) + if dAnalysis[counter].level == 0: + message = [] + countWhitespace = len([c for c in dAnalysis[counter].content if chr(c) in string.whitespace]) + countNull = dAnalysis[counter].content.count(b'\x00') + if countWhitespace == len(dAnalysis[counter].content): + message.append('Only whitespace = %d' % countWhitespace) + elif countNull == len(dAnalysis[counter].content): + message.append('Only NULL bytes = %d' % countNull) + elif countWhitespace + countNull == len(dAnalysis[counter].content): + message.append('Only whitespace = %d and NULL bytes = %d' % (countWhitespace, countNull)) + else: + if countWhitespace > 0: + message.append('Whitespace = %d' % countWhitespace) + if countNull > 0: + message.append('NULL bytes = %d' % countNull) + if dAnalysis[counter].content.count(b'{') > 0: + message.append('Left curly braces = %d' % dAnalysis[counter].content.count(b'{')) + if dAnalysis[counter].content.count(b'}') > 0: + message.append('Right curly braces = %d' % dAnalysis[counter].content.count(b'}')) + print(' ' + ' '.join(message)) + linePrinted = True + elif dAnalysis[counter].content != None: + stream = HexDecodeIfRequested(dAnalysis[counter], options) + for result in rules.match(data=stream): + if not linePrinted: + print(line) + linePrinted = True + print(' YARA rule: %s' % result.rule) + if options.yarastrings: + for stringdata in result.strings: + print(' %06x %s:' % (stringdata[0], stringdata[1])) + print(' %s' % binascii.hexlify(C2BIP3(stringdata[2]))) + print(' %s' % repr(stringdata[2])) else: - ExtractFunction = lambda x:x - - for key in dAnalysis.keys(): - if options.select != 'a' and options.select != str(key): - del dAnalysis[key] - if len(dAnalysis) == 0: - print('Warning: no item was selected with expression %s' % options.select) - return - for key in sorted(dAnalysis.keys()): - StdoutWriteChunked(DumpFunction(ExtractFunction(DecodeFunction(decoders, options, CutData(HexDecodeIfRequested(dAnalysis[key], options), options.cut))))) + if options.dump: + DumpFunction = lambda x:x + IfWIN32SetBinary(sys.stdout) + elif options.hexdump: + DumpFunction = HexDump + elif options.info: + DumpFunction = Info + elif options.asciidumprle: + DumpFunction = lambda x: HexAsciiDump(x, True) + else: + DumpFunction = HexAsciiDump + if options.extract: + ExtractFunction = ExtractPackage + else: + ExtractFunction = lambda x:x + + keys = list(dAnalysis.keys()) + for key in keys: + if options.select != 'a' and options.select != str(key): + del dAnalysis[key] + if len(dAnalysis) == 0: + print('Warning: no item was selected with expression %s' % options.select) + return + for key in sorted(dAnalysis.keys()): + StdoutWriteChunked(HeadTail(DumpFunction(ExtractFunction(CutData(HexDecodeIfRequested(dAnalysis[key], options), options.cut)[0])), options.headtail)) return returnCode @@ -841,7 +971,7 @@ def YARACompile(ruledata): rule = 'rule regex {strings: $a = /%s/ ascii wide nocase condition: $a}' % ruledata[3:] else: rule = ruledata[1:] - return yara.compile(source=rule, externals={'streamname': '', 'VBA': False}) + return yara.compile(source=rule, externals={'streamname': '', 'VBA': False}), rule else: dFilepaths = {} if os.path.isdir(ruledata): @@ -852,17 +982,10 @@ def YARACompile(ruledata): else: for filename in ProcessAt(ruledata): dFilepaths[filename] = filename - return yara.compile(filepaths=dFilepaths, externals={'streamname': '', 'VBA': False}) - -def RTFDump(filename, options): - global plugins - plugins = [] - LoadPlugins(options.plugins, True) + return yara.compile(filepaths=dFilepaths, externals={'streamname': '', 'VBA': False}), ','.join(dFilepaths.values()) - global decoders - decoders = [] - LoadDecoders(options.decoders, True) +def RTFDump(filename, options): returnCode = 0 rules = None @@ -870,48 +993,51 @@ def RTFDump(filename, options): if not 'yara' in sys.modules: print('Error: option yara requires the YARA Python module.') return returnCode - rules = YARACompile(options.yara) + rules, rulesVerbose = YARACompile(options.yara) + if options.verbose: + print(rulesVerbose) if filename == '': IfWIN32SetBinary(sys.stdin) - oStringIO = cStringIO.StringIO(sys.stdin.read()) + if sys.version_info[0] > 2: + oBytesIO = BytesIO(sys.stdin.buffer.read()) + else: + oBytesIO = BytesIO(sys.stdin.read()) elif filename.lower().endswith('.zip'): oZipfile = zipfile.ZipFile(filename, 'r') oZipContent = oZipfile.open(oZipfile.infolist()[0], 'r', C2BIP3(MALWARE_PASSWORD)) - oStringIO = cStringIO.StringIO(oZipContent.read()) + oBytesIO = BytesIO(oZipContent.read()) oZipContent.close() oZipfile.close() else: - oStringIO = cStringIO.StringIO(open(filename, 'rb').read()) + oBytesIO = BytesIO(open(filename, 'rb').read()) - returnCode = RTFSub(oStringIO, '', rules, options) + returnCode = RTFSub(oBytesIO, '', rules, options) return returnCode def Main(): oParser = optparse.OptionParser(usage='usage: %prog [options] [file]\n' + __description__, version='%prog ' + __version__) oParser.add_option('-m', '--man', action='store_true', default=False, help='Print manual') + oParser.add_option('-O', '--objects', action='store_true', default=False, help='produce overview of objects') oParser.add_option('-s', '--select', default='', help='select item nr for dumping (a for all)') oParser.add_option('-d', '--dump', action='store_true', default=False, help='perform dump') oParser.add_option('-x', '--hexdump', action='store_true', default=False, help='perform hex dump') oParser.add_option('-a', '--asciidump', action='store_true', default=False, help='perform ascii dump') oParser.add_option('-A', '--asciidumprle', action='store_true', default=False, help='perform ascii dump with RLE') + oParser.add_option('-T', '--headtail', action='store_true', default=False, help='do head & tail') oParser.add_option('-H', '--hexdecode', action='store_true', default=False, help='decode hexadecimal data; append 0 in case of uneven number of hexadecimal digits') oParser.add_option('-S', '--hexshift', action='store_true', default=False, help='shift one nibble') - oParser.add_option('-p', '--plugins', type=str, default='', help='plugins to load (separate plugins with a comma , ; @file supported)') - oParser.add_option('--pluginoptions', type=str, default='', help='options for the plugin') - oParser.add_option('-q', '--quiet', action='store_true', default=False, help='only print output from plugins') oParser.add_option('-y', '--yara', help="YARA rule-file, @file or directory to check streams (YARA search doesn't work with -s option)") - oParser.add_option('-D', '--decoders', type=str, default='', help='decoders to load (separate decoders with a comma , ; @file supported)') - oParser.add_option('--decoderoptions', type=str, default='', help='options for the decoder') oParser.add_option('--yarastrings', action='store_true', default=False, help='Print YARA strings') - oParser.add_option('-V', '--verbose', action='store_true', default=False, help='verbose output with decoder errors') oParser.add_option('-c', '--cut', type=str, default='', help='cut data') oParser.add_option('-i', '--info', action='store_true', default=False, help='print extra info for selected item') oParser.add_option('-E', '--extract', action='store_true', default=False, help='extract package') oParser.add_option('-f', '--filter', type=str, default='', help='filter') + oParser.add_option('-I', '--ignore', type=str, default='', help='control words to ignore') oParser.add_option('--recursionlimit', type=int, default=2000, help='set recursionlimit for Python (default 2000)') oParser.add_option('-j', '--jsonoutput', action='store_true', default=False, help='produce json output') + oParser.add_option('-V', '--verbose', action='store_true', default=False, help='verbose output with decoder errors and YARA rules') (options, args) = oParser.parse_args() if options.man: