Skip to content

Commit

Permalink
major library update to avoid re.error: global flags not at the start…
Browse files Browse the repository at this point in the history
… of the expression at position ...
  • Loading branch information
HariSekhon committed Jun 14, 2023
1 parent 86634b0 commit 4f8d913
Showing 1 changed file with 22 additions and 21 deletions.
43 changes: 22 additions & 21 deletions anonymize.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
sys.exit(4)

__author__ = 'Hari Sekhon'
__version__ = '0.10.12'
__version__ = '0.11.0'

ip_regex = r'(?!127\.0\.0\.)' + ip_regex
subnet_mask_regex = r'(?!127\.0\.0\.)' + subnet_mask_regex
Expand Down Expand Up @@ -792,46 +792,47 @@ def run(self):
# allow to easily switch pre-compilation on/off for testing
# testing shows on a moderate sized file that it is a couple secs quicker to use pre-compiled regex
def compile(self, name, regex):
log.debug(f"compiling regex '{name}' = '{regex}'")
self.regex[name] = re.compile(regex, re.I)
#self.regex[name] = regex

def prepare_regex(self):
self.compile('hostname',
r'(?<!\w\]\s)' + \
r'(?<!\.)' + \
r'(?<!\w\]\s)' +
r'(?<!\.)' +
# ignore Java methods such as SomeClass$method:20
r'(?<!\$)' + \
r'(?<!\$)' +
# ignore Java stack traces eg. at SomeClass(Thread.java;789)
r'(?!\(\w+\.java:\d+\))' + \
r'(?!\(\w+\.java:\d+\))' +
# don't match 2018-01-01T00:00:00 => 2018-01-<hostname>:00:00
r'(?!\d+T\d+:\d+)' + \
r'(?!\d+[^A-Za-z0-9]|' + \
self.custom_ignores_raw + ')' + \
'(' + hostname_regex + ')' + \
r'(?!\d+T\d+:\d+)' +
r'(?!\d+[^A-Za-z0-9]|' +
self.custom_ignores_raw + ')' +
'(' + hostname_regex + ')' +
self.negative_host_lookbehind + r':(\d{1,5}(?!\.?\w))',
)
self.compile('domain',
# don't match java -some.net.property
#r'(?<!-)' + \
r'(?!' + self.custom_ignores_raw + ')' + \
domain_regex_strict + \
#r'(?<!-)' +
r'(?!' + self.custom_ignores_raw + ')' +
domain_regex_strict +
# don't match java -Dsome.net.property=
r'(?!=)' + \
r'(?!\.[A-Za-z])(\b|$)' + \
r'(?!=)' +
r'(?!\.[A-Za-z])(\b|$)' +
# ignore Java stack traces eg. at SomeClass(Thread.java;789)
r'(?!\(\w+\.java:\d+\))' + \
r'(?!\(\w+\.java:\d+\))' +
self.negative_host_lookbehind
)
self.compile('fqdn',
# don't match java -some.net.property
#r'(?<!-)' + \
r'(?!' + self.custom_ignores_raw + ')' + \
'(' + fqdn_regex + ')' + \
#r'(?<!-)' +
r'(?!' + self.custom_ignores_raw + ')' +
'(' + fqdn_regex + ')' +
# don't match java -Dsome.net.property=
r'(?!=)' + \
r'(?!\.[A-Za-z])(\b|$)' + \
r'(?!=)' +
r'(?!\.[A-Za-z])(\b|$)' +
# ignore Java stack traces eg. at SomeClass(Thread.java;789)
r'(?!\(\w+\.java:\d+\))' + \
r'(?!\(\w+\.java:\d+\))' +
self.negative_host_lookbehind
)
re_regex_ending = re.compile('_regex$')
Expand Down

0 comments on commit 4f8d913

Please sign in to comment.