Skip to content

Commit

Permalink
Group Positions for identicla keyword --autopull
Browse files Browse the repository at this point in the history
  • Loading branch information
iBug committed Sep 7, 2018
1 parent 17c5035 commit 950e04a
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion findspam.py
Expand Up @@ -1738,7 +1738,19 @@ def generate_why(compiled_regex, matched_text, type_of_text, is_regex_check):
if not is_regex_check:
return ""
matches = compiled_regex.finditer(matched_text)
return type_of_text + " - " + ", ".join([FindSpam.match_info(m) for m in matches])
spans = {}
for match in matches:
group = match.group().replace("\n", "")
if group not in spans:
spans[group] = [match.span()]
else:
spans[group].append(match.span())
infos = [(word, sorted(spans[word])) for word in spans]
infos.sort(key=lambda info: info[1][0][0]) # Sort by left boundary of first appearance
return type_of_text + " - " + ", ".join(
["Position {}: {}".format(
", ".join(["{}-{}".format(a + 1, b) for a, b in span]), word)
for word, span in infos])


FindSpam.reload_blacklists()

0 comments on commit 950e04a

Please sign in to comment.