Permalink
Switch branches/tags
Nothing to show
Find file Copy path
f07efda Jul 29, 2018
1 contributor

Users who have contributed to this file

586 lines (511 sloc) 23.4 KB
#!/usr/bin/env python
from __future__ import print_function
__description__ = "Tool to instantiate variables"
__author__ = 'Didier Stevens'
__version__ = '0.0.1'
__date__ = '2018/07/29'
"""
Source code put in the public domain by Didier Stevens, no Copyright
https://DidierStevens.com
Use at your own risk
History:
2018/07/29: start
Todo:
"""
import optparse
import glob
import collections
import time
import sys
import textwrap
import os
import gzip
import re
import fnmatch
from contextlib import contextmanager
def PrintManual():
manual = '''
Manual:
Errors occuring when opening a file are reported (and logged if logging is turned on), and the program moves on to the next file.
Errors occuring when reading & processing a file are reported (and logged if logging is turned on), and the program stops unless option ignoreprocessingerrors is used.
Option --grep can be used to select (grep) lines that have to be processed.
If this option is not used, all lines will be processed.
To select particular lines to be processed, used option --grep and provide a regular expression. All lines matching this regular expression will be processed.
You can also use a capture group in your regular expression. The line to be processed will become the content of the first capture group (and not the complete line).
The regular expression matching operation is case sensitive. Use option --grepoptions i to make the matching operation case insensitive.
Use option --grepoptions v to invert the selection.
Use option --grepoptions F to match a fixed string in stead of a regular expression.
Option --begingrep can be used to select the first line from which on lines have to be processed.
If this option is not used, all lines will be processed.
To select the first line to be processed, used option --begingrep and provide a regular expression. The line matching this regular expression and all following lines will be processed (depending on --grep and --endgrep).
The regular expression matching operation is case sensitive. Use option --begingrepoptions i to make the matching operation case insensitive.
Use option --begingrepoptions v to invert the selection.
Use option --begingrepoptions F to match a fixed string in stead of a regular expression.
Option --endgrep can be used to select the last line to be processed.
If this option is not used, all lines will be processed.
To select the last line to be processed, used option --endgrep and provide a regular expression. The line matching this regular expression will be the last line to be processed (depending on --grep).
The regular expression matching operation is case sensitive. Use option --endgrepoptions i to make the matching operation case insensitive.
Use option --endgrepoptions v to invert the selection.
Use option --endgrepoptions F to match a fixed string in stead of a regular expression.
When combining --begingrep and --endgrep, make sure that --endgrep does not match a line before --begingrep does.
The lines are written to standard output, except when option -o is used. When option -o is used, the lines are written to the filename specified by option -o.
Filenames used with option -o starting with # have special meaning.
#c#example.txt will write output both to the console (stdout) and file example.txt.
#g# will write output to a file with a filename generated by the tool like this: toolname-date-time.txt.
#g#KEYWORD will write output to a file with a filename generated by the tool like this: toolname-KEYWORD-date-time.txt.
Use #p#filename to display execution progress.
To process several files while creating seperate output files for each input file, use -o #s#%f%.result *.
This will create output files with the name of the inputfile and extension .result.
There are several variables available when creating separate output files:
%f% is the full filename (with directory if present)
%b% is the base name: the filename without directory
%d% is the directory
%r% is the root: the filename without extension
%e% is the extension
Most options can be combined, like #ps# for example.
#l# is used for literal filenames: if the output filename has to start with # (#example.txt for example), use filename #l##example.txt for example.
'''
for line in manual.split('\n'):
print(textwrap.fill(line))
DEFAULT_SEPARATOR = ','
QUOTE = '"'
def PrintError(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def File2Strings(filename):
try:
f = open(filename, 'r')
except:
return None
try:
return map(lambda line:line.rstrip('\n'), f.readlines())
except:
return None
finally:
f.close()
def ProcessAt(argument):
if argument.startswith('@'):
strings = File2Strings(argument[1:])
if strings == None:
raise Exception('Error reading %s' % argument)
else:
return strings
else:
return [argument]
# CIC: Call If Callable
def CIC(expression):
if callable(expression):
return expression()
else:
return expression
# IFF: IF Function
def IFF(expression, valueTrue, valueFalse):
if expression:
return CIC(valueTrue)
else:
return CIC(valueFalse)
class cVariables():
def __init__(self, variablesstring='', separator=DEFAULT_SEPARATOR):
self.dVariables = {}
if variablesstring == '':
return
for variable in variablesstring.split(separator):
name, value = VariableNameValue(variable)
self.dVariables[name] = value
def SetVariable(self, name, value):
self.dVariables[name] = value
def Instantiate(self, astring):
for key, value in self.dVariables.items():
astring = astring.replace('%' + key + '%', value)
return astring
class cOutput():
def __init__(self, filenameOption=None):
self.filenameOption = filenameOption
self.separateFiles = False
self.progress = False
self.console = False
self.fOut = None
if self.filenameOption:
if self.ParseHash(self.filenameOption):
if not self.separateFiles and self.filename != '':
self.fOut = open(self.filename, 'w')
elif self.filenameOption != '':
self.fOut = open(self.filenameOption, 'w')
def ParseHash(self, option):
if option.startswith('#'):
position = self.filenameOption.find('#', 1)
if position > 1:
switches = self.filenameOption[1:position]
self.filename = self.filenameOption[position + 1:]
for switch in switches:
if switch == 's':
self.separateFiles = True
elif switch == 'p':
self.progress = True
elif switch == 'c':
self.console = True
elif switch == 'l':
pass
elif switch == 'g':
if self.filename != '':
extra = self.filename + '-'
else:
extra = ''
self.filename = '%s-%s%s.txt' % (os.path.splitext(os.path.basename(sys.argv[0]))[0], extra, self.FormatTime())
else:
return False
return True
return False
@staticmethod
def FormatTime(epoch=None):
if epoch == None:
epoch = time.time()
return '%04d%02d%02d-%02d%02d%02d' % time.localtime(epoch)[0:6]
def Line(self, line):
if self.fOut == None or self.console:
try:
print(line)
except UnicodeEncodeError:
encoding = sys.stdout.encoding
print(line.encode(encoding, errors='backslashreplace').decode(encoding))
# sys.stdout.flush()
if self.fOut != None:
self.fOut.write(line + '\n')
self.fOut.flush()
def LineTimestamped(self, line):
self.Line('%s: %s' % (self.FormatTime(), line))
def Filename(self, filename, index, total):
self.separateFilename = filename
if self.progress:
PrintError('%d/%d %s' % (index + 1, total, self.separateFilename))
if self.separateFiles and self.filename != '':
oFilenameVariables = cVariables()
oFilenameVariables.SetVariable('f', self.separateFilename)
basename = os.path.basename(self.separateFilename)
oFilenameVariables.SetVariable('b', basename)
oFilenameVariables.SetVariable('d', os.path.dirname(self.separateFilename))
root, extension = os.path.splitext(basename)
oFilenameVariables.SetVariable('r', root)
oFilenameVariables.SetVariable('e', extension)
self.Close()
self.fOut = open(oFilenameVariables.Instantiate(self.filename), 'w')
def Close(self):
if self.fOut != None:
self.fOut.close()
self.fOut = None
class cExpandFilenameArguments():
def __init__(self, filenames, literalfilenames=False, recursedir=False, checkfilenames=False, expressionprefix=None):
self.containsUnixShellStyleWildcards = False
self.warning = False
self.message = ''
self.filenameexpressions = []
self.expressionprefix = expressionprefix
self.literalfilenames = literalfilenames
expression = ''
if len(filenames) == 0:
self.filenameexpressions = [['', '']]
elif literalfilenames:
self.filenameexpressions = [[filename, ''] for filename in filenames]
elif recursedir:
for dirwildcard in filenames:
if expressionprefix != None and dirwildcard.startswith(expressionprefix):
expression = dirwildcard[len(expressionprefix):]
else:
if dirwildcard.startswith('@'):
for filename in ProcessAt(dirwildcard):
self.filenameexpressions.append([filename, expression])
elif os.path.isfile(dirwildcard):
self.filenameexpressions.append([dirwildcard, expression])
else:
if os.path.isdir(dirwildcard):
dirname = dirwildcard
basename = '*'
else:
dirname, basename = os.path.split(dirwildcard)
if dirname == '':
dirname = '.'
for path, dirs, files in os.walk(dirname):
for filename in fnmatch.filter(files, basename):
self.filenameexpressions.append([os.path.join(path, filename), expression])
else:
for filename in list(collections.OrderedDict.fromkeys(sum(map(self.Glob, sum(map(ProcessAt, filenames), [])), []))):
if expressionprefix != None and filename.startswith(expressionprefix):
expression = filename[len(expressionprefix):]
else:
self.filenameexpressions.append([filename, expression])
self.warning = self.containsUnixShellStyleWildcards and len(self.filenameexpressions) == 0
if self.warning:
self.message = "Your filename argument(s) contain Unix shell-style wildcards, but no files were matched.\nCheck your wildcard patterns or use option literalfilenames if you don't want wildcard pattern matching."
return
if self.filenameexpressions == [] and expression != '':
self.filenameexpressions = [['', expression]]
if checkfilenames:
self.CheckIfFilesAreValid()
def Glob(self, filename):
if not ('?' in filename or '*' in filename or ('[' in filename and ']' in filename)):
return [filename]
self.containsUnixShellStyleWildcards = True
return glob.glob(filename)
def CheckIfFilesAreValid(self):
valid = []
doesnotexist = []
isnotafile = []
for filename, expression in self.filenameexpressions:
hashfile = False
try:
hashfile = FilenameCheckHash(filename, self.literalfilenames)[0] == FCH_DATA
except:
pass
if filename == '' or hashfile:
valid.append([filename, expression])
elif not os.path.exists(filename):
doesnotexist.append(filename)
elif not os.path.isfile(filename):
isnotafile.append(filename)
else:
valid.append([filename, expression])
self.filenameexpressions = valid
if len(doesnotexist) > 0:
self.warning = True
self.message += 'The following files do not exist and will be skipped: ' + ' '.join(doesnotexist) + '\n'
if len(isnotafile) > 0:
self.warning = True
self.message += 'The following files are not regular files and will be skipped: ' + ' '.join(isnotafile) + '\n'
def Filenames(self):
if self.expressionprefix == None:
return [filename for filename, expression in self.filenameexpressions]
else:
return self.filenameexpressions
def ToString(value):
if isinstance(value, str):
return value
else:
return str(value)
def Quote(value, separator, quote):
value = ToString(value)
if separator in value or value == '':
return quote + value + quote
else:
return value
def MakeCSVLine(row, separator, quote):
return separator.join([Quote(value, separator, quote) for value in row])
class cLogfile():
def __init__(self, keyword, comment):
self.starttime = time.time()
self.errors = 0
if keyword == '':
self.oOutput = None
else:
self.oOutput = cOutput('%s-%s-%s.log' % (os.path.splitext(os.path.basename(sys.argv[0]))[0], keyword, self.FormatTime()))
self.Line('Start')
self.Line('UTC', '%04d%02d%02d-%02d%02d%02d' % time.gmtime(time.time())[0:6])
self.Line('Comment', comment)
self.Line('Args', repr(sys.argv))
self.Line('Version', __version__)
self.Line('Python', repr(sys.version_info))
self.Line('Platform', sys.platform)
self.Line('CWD', repr(os.getcwd()))
@staticmethod
def FormatTime(epoch=None):
if epoch == None:
epoch = time.time()
return '%04d%02d%02d-%02d%02d%02d' % time.localtime(epoch)[0:6]
def Line(self, *line):
if self.oOutput != None:
self.oOutput.Line(MakeCSVLine((self.FormatTime(), ) + line, DEFAULT_SEPARATOR, QUOTE))
def LineError(self, *line):
self.Line('Error', *line)
self.errors += 1
def Close(self):
if self.oOutput != None:
self.Line('Finish', '%d error(s)' % self.errors, '%d second(s)' % (time.time() - self.starttime))
self.oOutput.Close()
class cGrep():
def __init__(self, expression, options):
self.expression = expression
self.options = options
if self.expression == '' and self.options != '':
raise Exception('Option --grepoptions can not be used without option --grep')
self.dogrep = self.expression != ''
self.oRE = None
self.invert = False
self.caseinsensitive = False
self.fixedstring = False
if self.dogrep:
flags = 0
for option in self.options:
if option == 'i':
flags = re.IGNORECASE
self.caseinsensitive = True
elif option == 'v':
self.invert = True
elif option == 'F':
self.fixedstring = True
else:
raise Exception('Unknown grep option: %s' % option)
self.oRE = re.compile(self.expression, flags)
def Grep(self, line):
if self.fixedstring:
if self.caseinsensitive:
found = self.expression.lower() in line.lower()
else:
found = self.expression in line
if self.invert:
return not found, line
else:
return found, line
else:
oMatch = self.oRE.search(line)
if self.invert:
return oMatch == None, line
if oMatch != None and len(oMatch.groups()) > 0:
line = oMatch.groups()[0]
return oMatch != None, line
def ProcessFile(fIn, oBeginGrep, oGrep, oEndGrep, fullread):
if fIn == None:
return
begin = oBeginGrep == None or not oBeginGrep.dogrep
end = False
returnendline = False
if fullread:
yield fIn.read()
else:
for line in fIn:
line = line.rstrip('\n\r')
if not begin:
begin, line = oBeginGrep.Grep(line)
if not begin:
continue
if not end and oEndGrep != None and oEndGrep.dogrep:
end, line = oEndGrep.Grep(line)
if end:
returnendline = True
if end and not returnendline:
continue
selected = True
if oGrep != None and oGrep.dogrep:
selected, line = oGrep.Grep(line)
if not selected:
continue
if end and returnendline:
returnendline = False
yield line
def AnalyzeFileError(filename):
PrintError('Error opening file %s' % filename)
PrintError(sys.exc_info()[1])
try:
if not os.path.exists(filename):
PrintError('The file does not exist')
elif os.path.isdir(filename):
PrintError('The file is a directory')
elif not os.path.isfile(filename):
PrintError('The file is not a regular file')
except:
pass
@contextmanager
def TextFile(filename, oLogfile):
if filename == '':
fIn = sys.stdin
elif os.path.splitext(filename)[1].lower() == '.gz':
try:
fIn = gzip.GzipFile(filename, 'rb')
except:
AnalyzeFileError(filename)
oLogfile.LineError('Opening file %s %s' % (filename, repr(sys.exc_info()[1])))
fIn = None
else:
try:
fIn = open(filename, 'r')
except:
AnalyzeFileError(filename)
oLogfile.LineError('Opening file %s %s' % (filename, repr(sys.exc_info()[1])))
fIn = None
if fIn != None:
oLogfile.Line('Success', 'Opening file %s' % filename)
yield fIn
if fIn != None:
if sys.exc_info()[1] != None:
oLogfile.LineError('Reading file %s %s' % (filename, repr(sys.exc_info()[1])))
if fIn != sys.stdin:
fIn.close()
def ReplaceFunction(oMatch):
global dVariables
return dVariables[oMatch.groups()[0]]
def ProcessTextFile(filename, oBeginGrep, oGrep, oEndGrep, oOutput, oLogfile, options):
global dVariables
dVariables = {}
with TextFile(filename, oLogfile) as fIn:
try:
for line in ProcessFile(fIn, oBeginGrep, oGrep, oEndGrep, False):
# ----- Put your line processing code here -----
for oMatch in re.finditer(options.assignment, line, re.I):
dVariables[oMatch.groups()[0]] = oMatch.groups()[1]
# ----------------------------------------------
except:
oLogfile.LineError('Processing file %s %s' % (filename, repr(sys.exc_info()[1])))
if not options.ignoreprocessingerrors:
raise
if sys.version_info[0] < 3:
sys.exc_clear()
for key1, value1 in dVariables.items():
value1Saved = value1
value1 = re.sub(options.instantiation, ReplaceFunction, value1)
if value1 != value1Saved:
oOutput.Line(value1)
def InstantiateCOutput(options):
filenameOption = None
if options.output != '':
filenameOption = options.output
return cOutput(filenameOption)
def ProcessTextFiles(filenames, oLogfile, options):
oGrep = cGrep(options.grep, options.grepoptions)
oBeginGrep = cGrep(options.begingrep, options.begingrepoptions)
oEndGrep = cGrep(options.endgrep, options.endgrepoptions)
oOutput = InstantiateCOutput(options)
for index, filename in enumerate(filenames):
oOutput.Filename(filename, index, len(filenames))
ProcessTextFile(filename, oBeginGrep, oGrep, oEndGrep, oOutput, oLogfile, options)
oOutput.Close()
def Main():
moredesc = '''
Arguments:
@file: process each file listed in the text file specified
wildcards are supported
Source code put in the public domain by Didier Stevens, no Copyright
Use at your own risk
https://DidierStevens.com'''
oParser = optparse.OptionParser(usage='usage: %prog [options] [[@]file ...]\n' + __description__ + moredesc, version='%prog ' + __version__)
oParser.add_option('-m', '--man', action='store_true', default=False, help='Print manual')
oParser.add_option('-o', '--output', type=str, default='', help='Output to file (# supported)')
oParser.add_option('-a', '--assignment', type=str, default='set\s+([^=]+)=([^&]*)', help='Assignment regex')
oParser.add_option('-i', '--instantiation', type=str, default='%([^%]+)%', help='Instantiation regex')
oParser.add_option('--grep', type=str, default='', help='Grep expression')
oParser.add_option('--grepoptions', type=str, default='', help='grep options (ivF)')
oParser.add_option('--begingrep', type=str, default='', help='Grep expression for begin line')
oParser.add_option('--begingrepoptions', type=str, default='', help='begingrep options (ivF)')
oParser.add_option('--endgrep', type=str, default='', help='Grep expression for end line')
oParser.add_option('--endgrepoptions', type=str, default='', help='endgrep options (ivF)')
oParser.add_option('--literalfilenames', action='store_true', default=False, help='Do not interpret filenames')
oParser.add_option('--recursedir', action='store_true', default=False, help='Recurse directories (wildcards and here files (@...) allowed)')
oParser.add_option('--checkfilenames', action='store_true', default=False, help='Perform check if files exist prior to file processing')
oParser.add_option('--logfile', type=str, default='', help='Create logfile with given keyword')
oParser.add_option('--logcomment', type=str, default='', help='A string with comments to be included in the log file')
oParser.add_option('--ignoreprocessingerrors', action='store_true', default=False, help='Ignore errors during file processing')
(options, args) = oParser.parse_args()
if options.man:
oParser.print_help()
PrintManual()
return
oLogfile = cLogfile(options.logfile, options.logcomment)
oExpandFilenameArguments = cExpandFilenameArguments(args, options.literalfilenames, options.recursedir, options.checkfilenames)
oLogfile.Line('FilesCount', str(len(oExpandFilenameArguments.Filenames())))
oLogfile.Line('Files', repr(oExpandFilenameArguments.Filenames()))
if oExpandFilenameArguments.warning:
PrintError('\nWarning:')
PrintError(oExpandFilenameArguments.message)
oLogfile.Line('Warning', repr(oExpandFilenameArguments.message))
ProcessTextFiles(oExpandFilenameArguments.Filenames(), oLogfile, options)
if oLogfile.errors > 0:
PrintError('Number of errors: %d' % oLogfile.errors)
oLogfile.Close()
if __name__ == '__main__':
Main()