Samsung · meanrin · Sep 5, 2022 · Aug 31, 2022 · Aug 31, 2022 · Aug 31, 2022
@@ -90,6 +90,11 @@ def get_arguments() -> Namespace:
                         default=None,
                         dest="config_path",
                         metavar="PATH")
+    parser.add_argument("--denylist",
+                        help="path to a plain text file with lines or secrets to ignore",
+                        default=None,
+                        dest="denylist_path",
+                        metavar="PATH")
     parser.add_argument("--find-by-ext",
                         help="find files by predefined extension.",
                         dest="find_by_ext",
@@ -198,6 +203,11 @@ def scan(args: Namespace, content_provider: FilesProvider, json_filename: Option
 
     """
     try:
+        if args.denylist_path is not None:
+            denylist = [line for line in Util.read_file(args.denylist_path) if line]
-            denylist = [line for line in Util.read_file(args.denylist_path) if line]
+            denylist = [line for line in Util.read_file(args.denylist_path) if line.strip()]
-            denylist = [line for line in Util.read_file(args.denylist_path) if line]
+            denylist = [line for line in Util.read_file(args.denylist_path) if line.strip()]
+        else:
+            denylist = []
+
         credsweeper = CredSweeper(rule_path=args.rule_path,
                                   config_path=args.config_path,
                                   api_validation=args.api_validation,
@@ -208,7 +218,9 @@ def scan(args: Namespace, content_provider: FilesProvider, json_filename: Option
                                   ml_threshold=args.ml_threshold,
                                   find_by_ext=args.find_by_ext,
                                   depth=args.depth,
-                                  size_limit=args.size_limit)
+                                  size_limit=args.size_limit,
+                                  exclude_lines=denylist,
+                                  exclude_values=denylist)
         return credsweeper.run(content_provider=content_provider)
     except Exception as exc:
         logger.critical(exc, exc_info=True)

@@ -52,7 +52,9 @@ def __init__(self,
                  ml_threshold: Union[float, ThresholdPreset] = ThresholdPreset.medium,
                  find_by_ext: bool = False,
                  depth: int = 0,
-                 size_limit: Optional[str] = None) -> None:
+                 size_limit: Optional[str] = None,
+                 exclude_lines: Optional[List[str]] = None,
+                 exclude_values: Optional[List[str]] = None) -> None:
         """Initialize Advanced credential scanner.
 
         Args:
@@ -73,6 +75,8 @@ def __init__(self,
             find_by_ext: boolean - files will be reported by extension
             depth: int - how deep container files will be scanned
             size_limit: optional string integer or human-readable format to skip oversize files
+            exclude_lines: lines to omit in scan. Will be added to the lines already in config
+            exclude_values: values to omit in scan. Will be added to the values already in config
 
         """
         self.pool_count: int = int(pool_count) if int(pool_count) > 1 else 1
@@ -88,6 +92,10 @@ def __init__(self,
         config_dict["find_by_ext"] = find_by_ext
         config_dict["size_limit"] = size_limit
         config_dict["depth"] = depth
+        if exclude_lines is not None:
+            config_dict["exclude"]["lines"] = config_dict["exclude"].get("lines", []) + exclude_lines
+        if exclude_values is not None:
+            config_dict["exclude"]["values"] = config_dict["exclude"].get("values", []) + exclude_values
 
         self.config = Config(config_dict)
         self.credential_manager = CredentialManager()

@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Set
 
 from humanfriendly import parse_size
 from regex import regex
@@ -20,6 +20,8 @@ def __init__(self, config: Dict) -> None:
         ]
         self.exclude_paths: List[str] = config["exclude"]["path"]
         self.exclude_extensions: List[str] = config["exclude"]["extension"]
+        self.exclude_lines: Set[str] = set(config["exclude"].get("lines", []))
+        self.exclude_values: Set[str] = set(config["exclude"].get("values", []))
         self.source_extensions: List[str] = config["source_ext"]
         self.source_quote_ext: List[str] = config["source_quote_ext"]
         self.find_by_ext_list: List[str] = config["find_by_ext_list"]
@@ -41,3 +43,7 @@ def __init__(self, config: Dict) -> None:
                 self.exclude_extensions.remove(".zip")
             if ".gz" in self.exclude_extensions:
                 self.exclude_extensions.remove(".gz")
+
+        # Trim exclude patterns from space like characters
+        self.exclude_lines = set(line.strip() for line in self.exclude_lines)
+        self.exclude_values = set(line.strip() for line in self.exclude_values)
@@ -154,11 +154,16 @@ def _get_candidate(cls, config: Config, rule: Rule, target: AnalysisTarget) -> O
             remove current line. None otherwise
 
         """
+        if target.line.strip() in config.exclude_lines:
+            return None
+
         line_data = cls.get_line_data(config, target.line, target.line_num, target.file_path, rule.patterns[0],
                                       rule.filters)
 
         if line_data is None:
             return None
+        if line_data.value.strip() in config.exclude_values:
+            return None
 
         return Candidate([line_data], rule.patterns, rule.rule_name, rule.severity, config, rule.validations,
                          rule.use_ml)
@@ -69,7 +69,9 @@
             "/node_modules/",
             "/target/",
             "/venv/"
-        ]
+        ],
+        "lines": [],
+        "values": []
     },
     "source_ext": [
         ".aspx",

@@ -13,9 +13,9 @@ Get all argument list:
 
 .. code-block:: text
 
-    usage: python -m credsweeper [-h] (--path PATH [PATH ...] | --diff_path PATH [PATH ...] | --export_config [PATH]) [--rules [PATH]] [--config [PATH]] [--find-by-ext] [--depth POSITIVE_INT]
-                                 [--ml_threshold FLOAT_OR_STR] [--ml_batch_size POSITIVE_INT] [--api_validation] [--jobs POSITIVE_INT] [--skip_ignored] [--save-json [PATH]] [--save-xlsx [PATH]]
-                                 [--log LOG_LEVEL] [--size_limit SIZE_LIMIT] [--version]
+    usage: python -m credsweeper [-h] (--path PATH [PATH ...] | --diff_path PATH [PATH ...] | --export_config [PATH]) [--rules [PATH]] [--config [PATH]] [--denylist PATH] [--find-by-ext]
+                                 [--depth POSITIVE_INT] [--ml_threshold FLOAT_OR_STR] [--ml_batch_size POSITIVE_INT] [--api_validation] [--jobs POSITIVE_INT] [--skip_ignored]
+                                 [--save-json [PATH]] [--save-xlsx [PATH]] [--log LOG_LEVEL] [--size_limit SIZE_LIMIT] [--version]
     optional arguments:
       -h, --help            show this help message and exit
       --path PATH [PATH ...]
@@ -26,6 +26,7 @@ Get all argument list:
                             exporting default config to file (default: config.json)
       --rules [PATH]        path of rule config file (default: credsweeper/rules/config.yaml)
       --config [PATH]       use custom config (default: built-in)
+      --denylist PATH      path to a plain text file with lines or secrets to ignore
       --find-by-ext         find files by predefined extension.
       --depth POSITIVE_INT  recursive search in files which are zip archives.
       --ml_threshold FLOAT_OR_STR
@@ -104,6 +105,46 @@ Get CLI output only:
 
     rule: Password / severity: medium / line_data_list: [line : 'password = "cackle!"' / line_num : 1 / path : tests/samples/password / entropy_validation: False] / api_validation: NOT_AVAILABLE / ml_validation: VALIDATED_KEY
 
+
+Exclude outputs using CLI:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you want to remove some values from report (e.g. known public secrets):
+create text files with lines or values you want to remove and add it using `--denylist` argument.
+Space-like characters at left and right will be ignored.
+
+.. code-block:: bash
+
+    $ python -m credsweeper --path tests/samples/password --denylist list.txt
+    Detected Credentials: 0
+    Time Elapsed: 0.07523202896118164s
+    $ cat list.txt
+    cackle!
+      password = "cackle!"
+
+Exclude outputs using config:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Edit ``exclude`` part of the config file.
+Default config can be generated using ``python -m credsweeper --export_config place_to_save.json``
+or can be found in ``credsweeper/secret/config.json``.
+Space-like characters at left and right will be ignored.
+
+.. code-block:: json
+
+    "exclude": {
+        "lines": ["   password = \"cackle!\" "],
+        "values": ["cackle!"]
+    }
+
+Then specify your config in CLI:
+
+.. code-block:: bash
+
+    $ python -m credsweeper --path tests/samples/password --config my_cfg.json
+    Detected Credentials: 0
+    Time Elapsed: 0.07152628898620605s
+
 Use as a python library
 -----------------------
 

@@ -187,6 +187,7 @@ def test_it_works_n(self) -> None:
                    ")" \
                    " [--rules [PATH]]" \
                    " [--config [PATH]]" \
+                   " [--denylist PATH]" \
                    " [--find-by-ext]" \
                    " [--depth POSITIVE_INT]" \
                    " [--ml_threshold FLOAT_OR_STR]" \
@@ -447,3 +448,65 @@ def test_zip_p(self) -> None:
                 assert len(report) == SAMPLES_POST_CRED_COUNT + SAMPLES_IN_DEEP_1 - SAMPLES_FILTERED_BY_POST_COUNT
 
     # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+    def test_denylist_value_p(self) -> None:
+        target_path = str(SAMPLES_DIR / "password")
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            json_filename = os.path.join(tmp_dir, f"{__name__}.json")
+            denylist_filename = os.path.join(tmp_dir, f"list.txt")
+            with open(denylist_filename, "w") as f:
+                f.write("cackle!")
+            _stdout, _stderr = self._m_credsweeper([
+                "--path", target_path, "--denylist", denylist_filename, "--save-json", json_filename, "--log", "silence"
+            ])
+            with open(json_filename, "r") as json_file:
+                report = json.load(json_file)
+                assert len(report) == 0
+
+    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+    def test_denylist_value_n(self) -> None:
+        target_path = str(SAMPLES_DIR / "password")
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            json_filename = os.path.join(tmp_dir, f"{__name__}.json")
+            denylist_filename = os.path.join(tmp_dir, f"list.txt")
+            with open(denylist_filename, "w") as f:
+                f.write("abc")
+            _stdout, _stderr = self._m_credsweeper([
+                "--path", target_path, "--denylist", denylist_filename, "--save-json", json_filename, "--log", "silence"
+            ])
+            with open(json_filename, "r") as json_file:
+                report = json.load(json_file)
+                assert len(report) == 1
+
+    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+    def test_denylist_line_p(self) -> None:
+        target_path = str(SAMPLES_DIR / "password")
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            json_filename = os.path.join(tmp_dir, f"{__name__}.json")
+            denylist_filename = os.path.join(tmp_dir, f"list.txt")
+            with open(denylist_filename, "w") as f:
+                f.write('  password = "cackle!" ')
+            _stdout, _stderr = self._m_credsweeper([
+                "--path", target_path, "--denylist", denylist_filename, "--save-json", json_filename, "--log", "silence"
+            ])
+            with open(json_filename, "r") as json_file:
+                report = json.load(json_file)
+                assert len(report) == 0
+
+    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+    def test_denylist_line_n(self) -> None:
+        target_path = str(SAMPLES_DIR / "password")
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            json_filename = os.path.join(tmp_dir, f"{__name__}.json")
+            denylist_filename = os.path.join(tmp_dir, f"list.txt")
+            with open(denylist_filename, "w") as f:
+                f.write("abc")
+            _stdout, _stderr = self._m_credsweeper([
+                "--path", target_path, "--denylist", denylist_filename, "--save-json", json_filename, "--log", "silence"
+            ])
+            with open(json_filename, "r") as json_file:
+                report = json.load(json_file)
+                assert len(report) == 1
@@ -164,7 +164,8 @@ def test_binary_patch_n(self, mock_get_arguments: Mock()) -> None:
                          ml_threshold=0.0,
                          depth=1,
                          size_limit="1G",
-                         api_validation=False)
+                         api_validation=False,
+                         denylist_path=None)
         mock_get_arguments.return_value = args_mock
         with patch('logging.Logger.warning') as mocked_logger:
             app_main.main()
@@ -192,7 +193,8 @@ def test_report_p(self, mock_get_arguments: Mock()) -> None:
                              depth=0,
                              size_limit="1G",
                              find_by_ext=False,
-                             api_validation=False)
+                             api_validation=False,
+                             denylist_path=None)
             mock_get_arguments.return_value = args_mock
             app_main.main()
             assert os.path.exists(xlsx_filename)
@@ -346,3 +348,40 @@ def test_zip_p(self) -> None:
         cred_sweeper.config.depth = 0
         cred_sweeper.run(content_provider=content_provider)
         assert len(cred_sweeper.credential_manager.get_credentials()) == SAMPLES_POST_CRED_COUNT
+
+    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+    def test_exclude_value_p(self) -> None:
+        cred_sweeper = CredSweeper(use_filters=True, exclude_values=["cackle!"])
+        files = [SAMPLES_DIR / "password"]
+        files_provider = [TextContentProvider(file_path) for file_path in files]
+        cred_sweeper.scan(files_provider)
+        assert len(cred_sweeper.credential_manager.get_credentials()) == 0
+
+    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+    def test_exclude_value_n(self) -> None:
+        cred_sweeper = CredSweeper(use_filters=True, exclude_values=["abc"])
+        files = [SAMPLES_DIR / "password"]
+        files_provider = [TextContentProvider(file_path) for file_path in files]
+        cred_sweeper.scan(files_provider)
+        assert len(cred_sweeper.credential_manager.get_credentials()) == 1
+
+    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+    @pytest.mark.parametrize("line", ['  password = "cackle!" ', 'password = "cackle!"'])
+    def test_exclude_line_p(self, line: str) -> None:
+        cred_sweeper = CredSweeper(use_filters=True, exclude_lines=[line])
+        files = [SAMPLES_DIR / "password"]
+        files_provider = [TextContentProvider(file_path) for file_path in files]
+        cred_sweeper.scan(files_provider)
+        assert len(cred_sweeper.credential_manager.get_credentials()) == 0
+
+    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+    def test_exclude_line_n(self) -> None:
+        cred_sweeper = CredSweeper(use_filters=True, exclude_lines=["abc"])
+        files = [SAMPLES_DIR / "password"]
+        files_provider = [TextContentProvider(file_path) for file_path in files]
+        cred_sweeper.scan(files_provider)
+        assert len(cred_sweeper.credential_manager.get_credentials()) == 1