New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add option to exclude outputs with cli and config #207
Changes from 7 commits
cffdc77
23285f0
bf5d3ea
33e4a77
66a6c40
0a7f3d8
41b2f82
3670e58
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,6 +90,11 @@ def get_arguments() -> Namespace: | |
default=None, | ||
dest="config_path", | ||
metavar="PATH") | ||
parser.add_argument("--denylist", | ||
help="path to a plain text file with lines or secrets to ignore", | ||
default=None, | ||
dest="denylist_path", | ||
metavar="PATH") | ||
parser.add_argument("--find-by-ext", | ||
help="find files by predefined extension.", | ||
dest="find_by_ext", | ||
|
@@ -198,6 +203,11 @@ def scan(args: Namespace, content_provider: FilesProvider, json_filename: Option | |
|
||
""" | ||
try: | ||
if args.denylist_path is not None: | ||
denylist = [line for line in Util.read_file(args.denylist_path) if line] | ||
else: | ||
denylist = [] | ||
|
||
credsweeper = CredSweeper(rule_path=args.rule_path, | ||
config_path=args.config_path, | ||
api_validation=args.api_validation, | ||
|
@@ -208,7 +218,9 @@ def scan(args: Namespace, content_provider: FilesProvider, json_filename: Option | |
ml_threshold=args.ml_threshold, | ||
find_by_ext=args.find_by_ext, | ||
depth=args.depth, | ||
size_limit=args.size_limit) | ||
size_limit=args.size_limit, | ||
exclude_lines=denylist, | ||
exclude_values=denylist) | ||
Comment on lines
+222
to
+223
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. deny_lines
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. with single 'denylist' we will have double work. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
@babenek We are taking about O(1) average difficultly at the search operation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. x N lines |
||
return credsweeper.run(content_provider=content_provider) | ||
except Exception as exc: | ||
logger.critical(exc, exc_info=True) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,7 +52,9 @@ def __init__(self, | |
ml_threshold: Union[float, ThresholdPreset] = ThresholdPreset.medium, | ||
find_by_ext: bool = False, | ||
depth: int = 0, | ||
size_limit: Optional[str] = None) -> None: | ||
size_limit: Optional[str] = None, | ||
exclude_lines: Optional[List[str]] = None, | ||
exclude_values: Optional[List[str]] = None) -> None: | ||
Comment on lines
+56
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the lines inserted before e.g. find_by_ext - then 55line is kept untouched :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree, but it have a chance to brake code that passes arguments by order rather than by name |
||
"""Initialize Advanced credential scanner. | ||
|
||
Args: | ||
|
@@ -73,6 +75,8 @@ def __init__(self, | |
find_by_ext: boolean - files will be reported by extension | ||
depth: int - how deep container files will be scanned | ||
size_limit: optional string integer or human-readable format to skip oversize files | ||
exclude_lines: lines to omit in scan. Will be added to the lines already in config | ||
exclude_values: values to omit in scan. Will be added to the values already in config | ||
|
||
""" | ||
self.pool_count: int = int(pool_count) if int(pool_count) > 1 else 1 | ||
|
@@ -88,6 +92,10 @@ def __init__(self, | |
config_dict["find_by_ext"] = find_by_ext | ||
config_dict["size_limit"] = size_limit | ||
config_dict["depth"] = depth | ||
if exclude_lines is not None: | ||
config_dict["exclude"]["lines"] = config_dict["exclude"].get("lines", []) + exclude_lines | ||
if exclude_values is not None: | ||
config_dict["exclude"]["values"] = config_dict["exclude"].get("values", []) + exclude_values | ||
Comment on lines
+95
to
+98
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably the feature is overhead |
||
|
||
self.config = Config(config_dict) | ||
self.credential_manager = CredentialManager() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
from typing import Dict, List, Optional | ||
from typing import Dict, List, Optional, Set | ||
|
||
from humanfriendly import parse_size | ||
from regex import regex | ||
|
@@ -20,6 +20,8 @@ def __init__(self, config: Dict) -> None: | |
] | ||
self.exclude_paths: List[str] = config["exclude"]["path"] | ||
self.exclude_extensions: List[str] = config["exclude"]["extension"] | ||
self.exclude_lines: Set[str] = set(config["exclude"].get("lines", [])) | ||
self.exclude_values: Set[str] = set(config["exclude"].get("values", [])) | ||
self.source_extensions: List[str] = config["source_ext"] | ||
self.source_quote_ext: List[str] = config["source_quote_ext"] | ||
self.find_by_ext_list: List[str] = config["find_by_ext_list"] | ||
|
@@ -41,3 +43,7 @@ def __init__(self, config: Dict) -> None: | |
self.exclude_extensions.remove(".zip") | ||
if ".gz" in self.exclude_extensions: | ||
self.exclude_extensions.remove(".gz") | ||
|
||
# Trim exclude patterns from space like characters | ||
self.exclude_lines = set(line.strip() for line in self.exclude_lines) | ||
self.exclude_values = set(line.strip() for line in self.exclude_values) | ||
Comment on lines
+48
to
+49
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. denylist is the same |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -154,11 +154,16 @@ def _get_candidate(cls, config: Config, rule: Rule, target: AnalysisTarget) -> O | |
remove current line. None otherwise | ||
|
||
""" | ||
if target.line.strip() in config.exclude_lines: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then, here may be extra work when user suggests to skip values only There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, agree, will update There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
return None | ||
|
||
line_data = cls.get_line_data(config, target.line, target.line_num, target.file_path, rule.patterns[0], | ||
rule.filters) | ||
|
||
if line_data is None: | ||
return None | ||
if line_data.value.strip() in config.exclude_values: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. double work in case of user 'denylist' There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Not the case |
||
return None | ||
|
||
return Candidate([line_data], rule.patterns, rule.rule_name, rule.severity, config, rule.validations, | ||
rule.use_ml) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,7 +69,9 @@ | |
"/node_modules/", | ||
"/target/", | ||
"/venv/" | ||
] | ||
], | ||
"lines": [], | ||
"values": [] | ||
}, | ||
"source_ext": [ | ||
".aspx", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
skip user lines with whitespaces only?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Huuuuuh
@babenek
Good, point!
However, IMO it's irrelevant because:
set
function that removes all duplicates and creates hash based data-structureSo after the steps 2 all space-only lines would be converted to
""
And step 3 would guarantee absence of duplicates
So not doing
if line.strip()
would increase search in cache by only 1 data pointThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, only 1 empty check
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be 1 more check IF we would perform linear search
However,
x in y
for sets doing hash map based search that is O(1) in average difficulty - meaning it does not depend on the set sizeSo adding
""
to set would increase set size by 1, but not the average number of operations during checkingx in y
In worst case
x in y
would be O(n), so it would increase operations by 1. But it's really an insanely rare worst case