diff --git a/.gitguardian.yaml b/.gitguardian.yaml index 23d47417a3..141587a038 100644 --- a/.gitguardian.yaml +++ b/.gitguardian.yaml @@ -20,6 +20,8 @@ secret: name: - match: 1a56aa9f3d76450ec4731ba7ae247176dfb460dca2989c40ed634eb9a2c1c262 name: Generic High Entropy Secret - tests/output/snapshots/snap_test_text_output.py + - match: 6a1d11d5bbbb48f044bedfc2db5749e7fde3ad7e3f6a79601db034478a95f88a + name: Facebook Access Token - tests/output/snapshots/snap_test_text_output.py - match: 793865a3729a2c5d38ef874a270accf407cd993af32acefb4e13fc1ea242e9cb name: Generic High Entropy Secret - tests/cassettes/test_scan_file_secret-False.yaml - match: 56c126cef75e3d17c3de32dac60bab688ecc384a054c2c85b688c1dd7ac4eefd diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 55d1895a22..0158e2249b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -27,7 +27,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install pipenv pre-commit - pipenv install --dev --skip-lock + pipenv install --dev - uses: actions/cache@v3 with: diff --git a/Pipfile b/Pipfile index e8ff2d51ed..85d04575c2 100644 --- a/Pipfile +++ b/Pipfile @@ -13,7 +13,7 @@ click = ">=8.1,<8.2" marshmallow = ">=3.18.0,<3.19.0" marshmallow-dataclass = ">=8.5.8,<8.6.0" oauthlib = ">=3.2.1,<3.3.0" -pygitguardian = ">=1.3.6,<1.4.0" +pygitguardian = {git = "https://github.com/GitGuardian/py-gitguardian.git", editable = true} python-dotenv = ">=0.21.0,<0.22.0" pyyaml = ">=6.0,<6.1" rich = ">=12.5.1,<12.6.0" diff --git a/Pipfile.lock b/Pipfile.lock index 87fcde6224..6357881014 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "e805427b9bd6c434dc2fcca1a7a910baae2ecbff0c350d2bd6210cf13aeb755d" + "sha256": "755688435972b39e265f6b31aadd7bb9e19eeb5ae33745f55d3b5b8707bbc399" }, "pipfile-spec": 6, "requires": {}, @@ -105,12 +105,13 @@ "version": "==21.3" }, "pygitguardian": { + "editable": true, + "git": "https://github.com/GitGuardian/py-gitguardian.git", "hashes": [ "sha256:0fe3285f8dce9ec66efcb26b769f93fb71c9b2c57b2d635a968ac713c5242ae7", "sha256:621b9e8cb01cd9fd13a0db3daf9484ad4ce684c3b2a3aea7645728713d7b90bd" ], - "index": "pypi", - "version": "==1.3.6" + "ref": "98c3007f03379a36f054709f403badb94d02aaa1" }, "pygments": { "hashes": [ @@ -187,7 +188,7 @@ "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349" ], - "markers": "python_version >= '3.7' and python_full_version < '4.0.0'", + "markers": "python_version >= '3.7' and python_version < '4'", "version": "==2.28.1" }, "rich": { @@ -218,7 +219,7 @@ "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e", "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_full_version < '4.0.0'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'", "version": "==1.26.12" } }, @@ -233,10 +234,10 @@ }, "asttokens": { "hashes": [ - "sha256:c61e16246ecfb2cde2958406b4c8ebc043c9e6d73aaa83c941673b35e5d3a76b", - "sha256:e3305297c744ae53ffa032c45dc347286165e4ffce6875dc662b205db0623d86" + "sha256:1b28ed85e254b724439afc783d4bee767f780b936c3fe8b3275332f42cf5f561", + "sha256:4aa76401a151c8cc572d906aad7aea2a841780834a19d780f4321c0fe1b54635" ], - "version": "==2.0.8" + "version": "==2.1.0" }, "attrs": { "hashes": [ @@ -371,18 +372,18 @@ }, "exceptiongroup": { "hashes": [ - "sha256:2ac84b496be68464a2da60da518af3785fff8b7ec0d090a581604bc870bdee41", - "sha256:affbabf13fb6e98988c38d9c5650e701569fe3c1de3233cfb61c5f33774690ad" + "sha256:4d6c0aa6dd825810941c792f53d7b8d71da26f5e5f84f20f9508e8f2d33b140a", + "sha256:73866f7f842ede6cb1daa42c4af078e2035e5f7607f0e2c762cc51bb31bbe7b2" ], "markers": "python_version < '3.11'", - "version": "==1.0.0" + "version": "==1.0.1" }, "executing": { "hashes": [ - "sha256:236ea5f059a38781714a8bfba46a70fad3479c2f552abee3bbafadc57ed111b8", - "sha256:b0d7f8dcc2bac47ce6e39374397e7acecea6fdc380a6d5323e26185d70f38ea8" + "sha256:0314a69e37426e3608aada02473b4161d4caf5a4b244d1d0c48072b8fee7bacc", + "sha256:19da64c18d2d851112f09c287f8d3dbbdf725ab0e569077efb6cdcbd3497c107" ], - "version": "==1.1.1" + "version": "==1.2.0" }, "fastdiff": { "hashes": [ @@ -454,11 +455,11 @@ }, "ipython": { "hashes": [ - "sha256:097bdf5cd87576fd066179c9f7f208004f7a6864ee1b20f37d346c0bcb099f84", - "sha256:6f090e29ab8ef8643e521763a4f1f39dc3914db643122b1e9d3328ff2e43ada2" + "sha256:7c959e3dedbf7ed81f9b9d8833df252c430610e2a4a6464ec13cd20975ce20a5", + "sha256:91ef03016bcf72dd17190f863476e7c799c6126ec7e8be97719d1bc9a78a59a4" ], "markers": "python_version >= '3.8'", - "version": "==8.5.0" + "version": "==8.6.0" }, "isort": { "hashes": [ @@ -642,11 +643,11 @@ }, "platformdirs": { "hashes": [ - "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788", - "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19" + "sha256:0cb405749187a194f444c25c82ef7225232f11564721eabffc6ec70df83b11cb", + "sha256:6e52c21afff35cb659c6e52d8b4d61b9bd544557180440538f255d9382c8cbe0" ], "markers": "python_version >= '3.7'", - "version": "==2.5.2" + "version": "==2.5.3" }, "pluggy": { "hashes": [ @@ -666,11 +667,11 @@ }, "prompt-toolkit": { "hashes": [ - "sha256:9696f386133df0fc8ca5af4895afe5d78f5fcfe5258111c2a79a1c3e41ffa96d", - "sha256:9ada952c9d1787f52ff6d5f3484d0b4df8952787c087edf6a1f7c2cb1ea88148" + "sha256:24becda58d49ceac4dc26232eb179ef2b21f133fecda7eed6018d341766ed76e", + "sha256:e7f2129cba4ff3b3656bbdda0e74ee00d2f874a8bcdb9dd16f5fec7b3e173cae" ], "markers": "python_full_version >= '3.6.2'", - "version": "==3.0.31" + "version": "==3.0.32" }, "ptyprocess": { "hashes": [ @@ -813,11 +814,11 @@ }, "setuptools": { "hashes": [ - "sha256:512e5536220e38146176efb833d4a62aa726b7bbff82cfbc8ba9eaa3996e0b17", - "sha256:f62ea9da9ed6289bfe868cd6845968a2c854d1427f8548d52cae02a42b4f0356" + "sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31", + "sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f" ], "markers": "python_version >= '3.7'", - "version": "==65.5.0" + "version": "==65.5.1" }, "six": { "hashes": [ @@ -837,18 +838,18 @@ }, "stack-data": { "hashes": [ - "sha256:5120731a18ba4c82cefcf84a945f6f3e62319ef413bfc210e32aca3a69310ba2", - "sha256:95eb784942e861a3d80efd549ff9af6cf847d88343a12eb681d7157cfcb6e32b" + "sha256:8e515439f818efaa251036af72d89e4026e2b03993f3453c000b200fb4f2d6aa", + "sha256:b92d206ef355a367d14316b786ab41cb99eb453a21f2cb216a4204625ff7bc07" ], - "version": "==0.5.1" + "version": "==0.6.0" }, "termcolor": { "hashes": [ - "sha256:6b2cf769e93364a2676e1de56a7c0cff2cf5bd07f37e9cc80b0dd6320ebfe388", - "sha256:7e597f9de8e001a3208c4132938597413b9da45382b6f1d150cff8d062b7aaa3" + "sha256:91dd04fdf661b89d7169cefd35f609b19ca931eb033687eaa647cef1ff177c49", + "sha256:b80df54667ce4f48c03fe35df194f052dc27a541ebbf2544e4d6b47b5d6949c4" ], "markers": "python_version >= '3.7'", - "version": "==2.0.1" + "version": "==2.1.0" }, "toml": { "hashes": [ diff --git a/ggshield/cmd/secret/scan/__init__.py b/ggshield/cmd/secret/scan/__init__.py index 8bc97cad3f..d0ae04b8fa 100644 --- a/ggshield/cmd/secret/scan/__init__.py +++ b/ggshield/cmd/secret/scan/__init__.py @@ -90,6 +90,12 @@ is_flag=True, hidden=True, ) +@click.option( + "--ignore-known-secrets", + is_flag=True, + default=None, + help="Ignore", +) @click.pass_context def scan_group( ctx: click.Context, @@ -102,6 +108,7 @@ def scan_group( banlist_detector: Optional[List[str]] = None, exclude: Optional[List[str]] = None, ignore_default_excludes: bool = False, + ignore_known_secrets: bool = False, ) -> int: """Commands to scan various contents.""" return scan_group_impl( @@ -113,6 +120,7 @@ def scan_group( output, banlist_detector, exclude, + ignore_known_secrets, ) @@ -125,6 +133,7 @@ def scan_group_impl( output: Optional[str], banlist_detector: Optional[List[str]] = None, exclude: Optional[List[str]] = None, + ignore_known_secrets: bool = False, ) -> int: """Implementation for scan_group(). Must be a separate function so that its code can be reused from the deprecated `cmd.scan` package.""" @@ -152,6 +161,9 @@ def scan_group_impl( if banlist_detector: config.secret.ignored_detectors.update(banlist_detector) + if ignore_known_secrets: + config.ignore_known_secrets = ignore_known_secrets + max_commits = get_max_commits_for_hook() if max_commits: config.max_commits_for_hook = max_commits diff --git a/ggshield/cmd/secret/scan/archive.py b/ggshield/cmd/secret/scan/archive.py index db79ff7862..6663adbf8e 100644 --- a/ggshield/cmd/secret/scan/archive.py +++ b/ggshield/cmd/secret/scan/archive.py @@ -53,6 +53,7 @@ def archive_cmd(ctx: click.Context, path: str) -> int: # pragma: no cover scan_context=scan_context, ignored_matches=config.secret.ignored_matches, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) results = scanner.scan( files.files, diff --git a/ggshield/cmd/secret/scan/docker.py b/ggshield/cmd/secret/scan/docker.py index 0b7a92e601..210f132d4b 100644 --- a/ggshield/cmd/secret/scan/docker.py +++ b/ggshield/cmd/secret/scan/docker.py @@ -51,6 +51,7 @@ def docker_name_cmd(ctx: click.Context, name: str, docker_timeout: int) -> int: scan_context=scan_context, matches_ignore=config.secret.ignored_matches, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) return output_handler.process_scan(scan) diff --git a/ggshield/cmd/secret/scan/dockerarchive.py b/ggshield/cmd/secret/scan/dockerarchive.py index e7bd91b7c1..c89c046a0e 100644 --- a/ggshield/cmd/secret/scan/dockerarchive.py +++ b/ggshield/cmd/secret/scan/dockerarchive.py @@ -38,6 +38,7 @@ def docker_archive_cmd( matches_ignore=config.secret.ignored_matches, scan_context=scan_context, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) return output_handler.process_scan(scan) diff --git a/ggshield/cmd/secret/scan/docset.py b/ggshield/cmd/secret/scan/docset.py index f602c80808..0bd3eafeec 100644 --- a/ggshield/cmd/secret/scan/docset.py +++ b/ggshield/cmd/secret/scan/docset.py @@ -68,6 +68,7 @@ def docset_cmd(ctx: click.Context, files: List[TextIO]) -> int: # pragma: no co ignored_matches=config.secret.ignored_matches, scan_context=scan_context, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) scans = create_scans_from_docset_files( diff --git a/ggshield/cmd/secret/scan/path.py b/ggshield/cmd/secret/scan/path.py index d65063b72d..4f0e1fb610 100644 --- a/ggshield/cmd/secret/scan/path.py +++ b/ggshield/cmd/secret/scan/path.py @@ -53,6 +53,7 @@ def path_cmd( ignored_matches=config.secret.ignored_matches, scan_context=scan_context, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) results = scanner.scan( files.files, diff --git a/ggshield/cmd/secret/scan/precommit.py b/ggshield/cmd/secret/scan/precommit.py index ab2bee15ce..63bca206a4 100644 --- a/ggshield/cmd/secret/scan/precommit.py +++ b/ggshield/cmd/secret/scan/precommit.py @@ -50,6 +50,7 @@ def precommit_cmd( scan_context=scan_context, ignored_matches=config.secret.ignored_matches, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) results = scanner.scan(commit.files) diff --git a/ggshield/cmd/secret/scan/prepush.py b/ggshield/cmd/secret/scan/prepush.py index 84491f01d0..9fe5621428 100644 --- a/ggshield/cmd/secret/scan/prepush.py +++ b/ggshield/cmd/secret/scan/prepush.py @@ -120,6 +120,7 @@ def prepush_cmd(ctx: click.Context, prepush_args: List[str]) -> int: matches_ignore=config.secret.ignored_matches, scan_context=scan_context, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) if return_code: click.echo( diff --git a/ggshield/cmd/secret/scan/prereceive.py b/ggshield/cmd/secret/scan/prereceive.py index 995b25050d..fdf39abc8b 100644 --- a/ggshield/cmd/secret/scan/prereceive.py +++ b/ggshield/cmd/secret/scan/prereceive.py @@ -201,6 +201,7 @@ def prereceive_cmd(ctx: click.Context, web: bool, prereceive_args: List[str]) -> matches_ignore=config.secret.ignored_matches, scan_context=scan_context, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) if return_code: click.echo( diff --git a/ggshield/cmd/secret/scan/pypi.py b/ggshield/cmd/secret/scan/pypi.py index beb57723ef..c8ffdd3725 100644 --- a/ggshield/cmd/secret/scan/pypi.py +++ b/ggshield/cmd/secret/scan/pypi.py @@ -115,6 +115,7 @@ def pypi_cmd(ctx: click.Context, package_name: str) -> int: # pragma: no cover ignored_matches=config.secret.ignored_matches, scan_context=scan_context, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) results = scanner.scan( files.files, diff --git a/ggshield/cmd/secret/scan/range.py b/ggshield/cmd/secret/scan/range.py index c3bb82e40c..556bbe95eb 100644 --- a/ggshield/cmd/secret/scan/range.py +++ b/ggshield/cmd/secret/scan/range.py @@ -38,6 +38,7 @@ def range_cmd(ctx: click.Context, commit_range: str) -> int: # pragma: no cover matches_ignore=config.secret.ignored_matches, scan_context=scan_context, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) except Exception as error: return handle_exception(error, config.verbose) diff --git a/ggshield/core/config/user_config.py b/ggshield/core/config/user_config.py index 44ebd5dbe7..a213ac0f5f 100644 --- a/ggshield/core/config/user_config.py +++ b/ggshield/core/config/user_config.py @@ -91,6 +91,7 @@ class UserConfig(FilteredConfig): allow_self_signed: bool = False max_commits_for_hook: int = 50 secret: SecretConfig = field(default_factory=SecretConfig) + ignore_known_secrets: bool = False debug: bool = False # If we hit any deprecated syntax when loading a configuration file, we do not diff --git a/ggshield/core/text_utils.py b/ggshield/core/text_utils.py index 4b0d07a76a..b3ce6d4bfc 100644 --- a/ggshield/core/text_utils.py +++ b/ggshield/core/text_utils.py @@ -37,12 +37,6 @@ "ignore_sha": {"fg": "bright_yellow", "bold": True}, } -_DISPLAY_NAME_FOR_POLICY = {"secrets detection": "Secret detected"} - - -def get_display_name_for_policy(policy: str) -> str: - return _DISPLAY_NAME_FOR_POLICY.get(policy.lower(), policy) - class LineCategory(Enum): addition = auto() diff --git a/ggshield/output/json/json_output_handler.py b/ggshield/output/json/json_output_handler.py index fbaccde8e8..482c38fbb0 100644 --- a/ggshield/output/json/json_output_handler.py +++ b/ggshield/output/json/json_output_handler.py @@ -117,6 +117,9 @@ def flattened_policy_break( if policy_breaks[0].validity: flattened_dict["validity"] = policy_breaks[0].validity + if policy_breaks[0].known_secret: + flattened_dict["known_secret"] = policy_breaks[0].known_secret + for policy_break in policy_breaks: matches = JSONOutputHandler.make_matches( policy_break.matches, lines, is_patch diff --git a/ggshield/output/json/schemas.py b/ggshield/output/json/schemas.py index c35a15bf2d..142c8fce32 100644 --- a/ggshield/output/json/schemas.py +++ b/ggshield/output/json/schemas.py @@ -77,6 +77,7 @@ class FlattenedPolicyBreak(BaseSchema): validity = fields.String(required=False, allow_none=True) ignore_sha = fields.String(required=True) total_occurrences = fields.Integer(required=True) + known_secret = fields.Bool(required=False) class JSONResultSchema(BaseSchema): diff --git a/ggshield/output/output_handler.py b/ggshield/output/output_handler.py index 79c0c9e6d3..1430953289 100644 --- a/ggshield/output/output_handler.py +++ b/ggshield/output/output_handler.py @@ -49,8 +49,8 @@ def _process_scan_impl(self, scan: ScanCollection) -> str: @staticmethod def _get_exit_code(scan: ScanCollection) -> int: - if scan.has_results or scan.has_iac_result: + if scan.has_iac_result: return 1 - if scan.scans and any(x.has_results for x in scan.scans): + if scan.has_new_secrets: return 1 return 0 diff --git a/ggshield/output/text/message.py b/ggshield/output/text/message.py index ce05dd72f6..0c664ec1a3 100644 --- a/ggshield/output/text/message.py +++ b/ggshield/output/text/message.py @@ -10,7 +10,6 @@ STYLE, Line, format_text, - get_display_name_for_policy, pluralize, translate_validity, ) @@ -211,7 +210,9 @@ def iac_vulnerability_location_failed( return f"\nFailed to read from the original file.\nThe incident was found between lines {line_start} and {line_end}\n" # noqa: E501 -def policy_break_header(policy_breaks: List[PolicyBreak], ignore_sha: str) -> str: +def policy_break_header( + policy_breaks: List[PolicyBreak], ignore_sha: str, known_secret: bool = False +) -> str: """ Build a header for the policy break. """ @@ -222,7 +223,7 @@ def policy_break_header(policy_breaks: List[PolicyBreak], ignore_sha: str) -> st ) start_line = format_text(">>", STYLE["detector_line_start"]) - policy_name = get_display_name_for_policy(policy_breaks[0].policy) + policy_header = "Known secret" if known_secret else "Secret detected" policy_break_type = format_text( policy_breaks[0].break_type, STYLE["policy_break_type"] ) @@ -230,7 +231,7 @@ def policy_break_header(policy_breaks: List[PolicyBreak], ignore_sha: str) -> st ignore_sha = format_text(ignore_sha, STYLE["ignore_sha"]) return f""" -{start_line} {policy_name}: {policy_break_type}{validity_msg} +{start_line} {policy_header}: {policy_break_type}{validity_msg} Occurrences: {number_occurrences} Ignore with SHA: {ignore_sha} @@ -404,13 +405,6 @@ def file_info(filename: str, nb_secrets: int) -> str: ) -def no_leak_message() -> str: - """ - Build a message if no secret is found. - """ - return format_text("\nNo secrets have been found\n", STYLE["no_secret"]) - - def no_iac_vulnerabilities() -> str: """ Build a message if no IaC vulnerabilities were found. diff --git a/ggshield/output/text/text_output_handler.py b/ggshield/output/text/text_output_handler.py index 86ec97172d..e5e8b490d8 100644 --- a/ggshield/output/text/text_output_handler.py +++ b/ggshield/output/text/text_output_handler.py @@ -1,20 +1,20 @@ +from copy import deepcopy from io import StringIO -from typing import ClassVar, List, cast +from typing import ClassVar, List import click from pygitguardian.models import Match from ggshield.core.filter import censor_content, leak_dictionary_by_ignore_sha -from ggshield.core.text_utils import Line +from ggshield.core.text_utils import Line, pluralize from ggshield.core.utils import Filemode, find_match_indices, get_lines_from_content from ggshield.output.output_handler import OutputHandler -from ggshield.scan import Result, Results, ScanCollection +from ggshield.scan import Result, ScanCollection from .message import ( file_info, flatten_policy_breaks_by_line, leak_message_located, - no_leak_message, policy_break_header, secrets_engine_version, ) @@ -26,31 +26,58 @@ class TextOutputHandler(OutputHandler): def _process_scan_impl(self, scan: ScanCollection, top: bool = True) -> str: scan_buf = StringIO() - if scan.optional_header and (scan.has_results or self.verbose): - scan_buf.write(scan.optional_header) - if top and self.verbose: + if self.verbose: scan_buf.write(secrets_engine_version()) - if scan.has_results: - for result in cast(Results, scan.results).results: - scan_buf.write(self.process_result(result)) - else: - has_results = False - if scan.scans: - has_results = any(x.has_results for x in scan.scans) + scan_buf.write(self.process_scan_results(scan)) + + if scan.known_secrets_count > 0: + scan_buf.write( + f"\nWarning: {scan.known_secrets_count} {pluralize('secret', scan.known_secrets_count)} ignored " + f"because {pluralize('it is', scan.known_secrets_count, 'they are')} already known by your " + f"GitGuardian dashboard and you used the `--ignore-known-secrets` option.\n" + ) + + if self.verbose: + scan_buf.write(self.process_scan_results(scan, True)) + else: + scan_buf.write("Use `--verbose` for more details.\n") + + return scan_buf.getvalue() + + def process_scan_results( + self, scan: ScanCollection, show_only_known_secrets: bool = False + ) -> str: + results_buf = StringIO() + if scan.results: + current_result_buf = StringIO() + for result in scan.results.results: + current_result_buf.write( + self.process_result(result, show_only_known_secrets) + ) + current_result_string = current_result_buf.getvalue() + + # We want to show header in the verbose mode for new secrets or when at least one result is not empty + if scan.optional_header and ( + current_result_string or (self.verbose and not show_only_known_secrets) + ): + results_buf.write(scan.optional_header) - if top and not has_results: - scan_buf.write(no_leak_message()) + results_buf.write(current_result_string) if scan.scans: for sub_scan in scan.scans: - inner_scan_str = self._process_scan_impl(sub_scan, top=False) - scan_buf.write(inner_scan_str) + inner_scan_str = self.process_scan_results( + sub_scan, show_only_known_secrets + ) + results_buf.write(inner_scan_str) - return scan_buf.getvalue() + return results_buf.getvalue() - def process_result(self, result: Result) -> str: + def process_result( + self, result: Result, show_only_known_secrets: bool = False + ) -> str: """ Build readable message on the found incidents. @@ -58,17 +85,22 @@ def process_result(self, result: Result) -> str: :param nb_lines: The number of lines to display before and after a secret in the patch :param show_secrets: Option to show secrets value + :param show_only_known_secrets: If True, display only known secrets, and only new secrets otherwise :return: The formatted message to display """ result_buf = StringIO() - policy_breaks = result.scan.policy_breaks + + # policy breaks and matches are modified in the functions leak_dictionary_by_ignore_sha and censor_content. + # Previously process_result was executed only once, so it did not create any issue. + # In the future we could rework those functions such that they do not change what is in the result. + policy_breaks = deepcopy(result.scan.policy_breaks) is_patch = result.filemode != Filemode.FILE sha_dict = leak_dictionary_by_ignore_sha(policy_breaks) if self.show_secrets: content = result.content else: - content = censor_content(result.content, result.scan.policy_breaks) + content = censor_content(result.content, policy_breaks) lines = get_lines_from_content(content, result.filemode, is_patch) padding = get_padding(lines) @@ -77,16 +109,23 @@ def process_result(self, result: Result) -> str: if len(lines) == 0: raise click.ClickException("Parsing of scan result failed.") - result_buf.write(file_info(result.filename, len(sha_dict))) - + number_of_displayed_secrets = 0 for ignore_sha, policy_breaks in sha_dict.items(): - result_buf.write(policy_break_header(policy_breaks, ignore_sha)) - for policy_break in policy_breaks: - policy_break.matches = TextOutputHandler.make_matches( - policy_break.matches, lines, is_patch + known_secret = policy_breaks[0].known_secret + if (not known_secret and not show_only_known_secrets) or ( + known_secret and show_only_known_secrets + ): + number_of_displayed_secrets += 1 + + result_buf.write( + policy_break_header(policy_breaks, ignore_sha, known_secret) ) - if policy_breaks[0].policy == "Secrets detection": + for policy_break in policy_breaks: + policy_break.matches = TextOutputHandler.make_matches( + policy_break.matches, lines, is_patch + ) + result_buf.write( leak_message_located( flatten_policy_breaks_by_line(policy_breaks), @@ -98,7 +137,11 @@ def process_result(self, result: Result) -> str: ) ) - return result_buf.getvalue() + file_info_line = "" + if number_of_displayed_secrets > 0: + file_info_line = file_info(result.filename, number_of_displayed_secrets) + + return file_info_line + result_buf.getvalue() @staticmethod def make_matches( diff --git a/ggshield/scan/docker.py b/ggshield/scan/docker.py index 6241d4f64e..4f7e81f57e 100644 --- a/ggshield/scan/docker.py +++ b/ggshield/scan/docker.py @@ -251,6 +251,7 @@ def docker_scan_archive( matches_ignore: Iterable[IgnoredMatch], scan_context: ScanContext, ignored_detectors: Optional[Set[str]] = None, + ignore_known_secrets: Optional[bool] = None, ) -> ScanCollection: files = get_files_from_docker_archive(archive) @@ -265,6 +266,7 @@ def docker_scan_archive( scan_context=scan_context, ignored_matches=matches_ignore, ignored_detectors=ignored_detectors, + ignore_known_secrets=ignore_known_secrets, ) results = scanner.scan( files.files, diff --git a/ggshield/scan/repo.py b/ggshield/scan/repo.py index ff8c06d806..3fa6200a0c 100644 --- a/ggshield/scan/repo.py +++ b/ggshield/scan/repo.py @@ -57,6 +57,7 @@ def scan_repo_path( matches_ignore=config.secret.ignored_matches, scan_context=scan_context, ignored_detectors=config.secret.ignored_detectors, + ignore_known_secrets=config.ignore_known_secrets, ) except Exception as error: return handle_exception(error, config.verbose) @@ -70,6 +71,7 @@ def scan_commits_content( scan_context: ScanContext, progress_callback: Callable[..., None], ignored_detectors: Optional[Set[str]] = None, + ignore_known_secrets: bool = False, ) -> ScanCollection: # pragma: no cover try: commit_files = list(itertools.chain.from_iterable(c.files for c in commits)) @@ -80,6 +82,7 @@ def scan_commits_content( scan_context=scan_context, ignored_matches=matches_ignore, ignored_detectors=ignored_detectors, + ignore_known_secrets=ignore_known_secrets, ) results = scanner.scan( commit_files, @@ -148,6 +151,7 @@ def scan_commit_range( matches_ignore: Iterable[IgnoredMatch], scan_context: ScanContext, ignored_detectors: Optional[Set[str]] = None, + ignore_known_secrets: bool = False, ) -> int: # pragma: no cover """ Scan every commit in a range. @@ -184,6 +188,7 @@ def scan_commit_range( scan_context, partial(progress.update, task_scan), ignored_detectors, + ignore_known_secrets, ) ) diff --git a/ggshield/scan/scanner.py b/ggshield/scan/scanner.py index 2c2e20bc70..d94d2f350c 100644 --- a/ggshield/scan/scanner.py +++ b/ggshield/scan/scanner.py @@ -12,6 +12,7 @@ from ggshield.core.cache import Cache from ggshield.core.filter import ( + leak_dictionary_by_ignore_sha, remove_ignored_from_result, remove_results_from_ignore_detectors, ) @@ -68,15 +69,46 @@ def from_exception(exc: Exception) -> "Results": return Results(results=[], errors=[error]) -class ScanCollection(NamedTuple): +class ScanCollection: id: str type: str results: Optional[Results] = None - scans: Optional[List["ScanCollection"]] = None # type: ignore[misc] + scans: Optional[List["ScanCollection"]] = None iac_result: Optional[IaCScanResult] = None optional_header: Optional[str] = None # To be printed in Text Output extra_info: Optional[Dict[str, str]] = None # To be included in JSON Output + def __init__( + self, + id: str, + type: str, + results: Optional[Results] = None, + scans: Optional[List["ScanCollection"]] = None, + iac_result: Optional[IaCScanResult] = None, + optional_header: Optional[str] = None, + extra_info: Optional[Dict[str, str]] = None, + ): + self.id = id + self.type = type + self.results = results + self.scans = scans + self.iac_result = iac_result + self.optional_header = optional_header + self.extra_info = extra_info + + ( + self.known_secrets_count, + self.new_secrets_count, + ) = self._get_known_new_secrets_count() + + @property + def has_new_secrets(self) -> bool: + return self.new_secrets_count > 0 + + @property + def has_known_secrets(self) -> bool: + return self.known_secrets_count > 0 + @property def scans_with_results(self) -> List["ScanCollection"]: if self.scans: @@ -91,13 +123,32 @@ def has_iac_result(self) -> bool: def has_results(self) -> bool: return bool(self.results and self.results.results) + def _get_known_new_secrets_count(self) -> Tuple[int, int]: + policy_breaks = [] + for result in self.get_all_results(): + for policy_break in result.scan.policy_breaks: + policy_breaks.append(policy_break) + + known_secrets_count = 0 + new_secrets_count = 0 + sha_dict = leak_dictionary_by_ignore_sha(policy_breaks) + + for ignore_sha, policy_breaks in sha_dict.items(): + if policy_breaks[0].known_secret: + known_secrets_count += 1 + else: + new_secrets_count += 1 + + return known_secrets_count, new_secrets_count + def get_all_results(self) -> Iterable[Result]: """Returns an iterable on all results and sub-scan results""" if self.results: yield from self.results.results if self.scans: for scan in self.scans: - yield from scan.results.results + if scan.results: + yield from scan.results.results class SecretScanner: @@ -112,6 +163,7 @@ def __init__( scan_context: ScanContext, ignored_matches: Optional[Iterable[IgnoredMatch]] = None, ignored_detectors: Optional[Set[str]] = None, + ignore_known_secrets: Optional[bool] = None, ): self.client = client self.cache = cache @@ -119,6 +171,7 @@ def __init__( self.ignored_detectors = ignored_detectors self.headers = scan_context.get_http_headers() self.command_id = scan_context.command_id + self.ignore_known_secrets = ignore_known_secrets def scan( self, @@ -158,10 +211,12 @@ def _scan_chunk( {"document": x.document, "filename": x.filename[-_API_PATH_MAX_LENGTH:]} for x in chunk ] + return executor.submit( self.client.multi_content_scan, documents, self.headers, + ignore_known_secrets=self.ignore_known_secrets, ) def _start_scans( diff --git a/setup.py b/setup.py index 3e1a7c4bca..4aa688f3f0 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def get_version() -> str: "marshmallow>=3.18.0,<3.19.0", "marshmallow-dataclass>=8.5.8,<8.6.0", "oauthlib>=3.2.1,<3.3.0", - "pygitguardian>=1.3.6,<1.4.0", + "pygitguardian @ git+https://github.com/GitGuardian/py-gitguardian.git", # TODO fix after pygitguardian merge "python-dotenv>=0.21.0,<0.22.0", "pyyaml>=6.0,<6.1", "rich>=12.5.1,<12.6.0", diff --git a/tests/unit/cmd/scan/test_path.py b/tests/unit/cmd/scan/test_path.py index 11d736422d..462702767e 100644 --- a/tests/unit/cmd/scan/test_path.py +++ b/tests/unit/cmd/scan/test_path.py @@ -45,7 +45,6 @@ def test_scan_file(self, cli_fs_runner, verbose): result = cli_fs_runner.invoke(cli, ["secret", "scan", "path", "file"]) assert result.exit_code == 0, result.output assert not result.exception - assert "No secrets have been found" in result.output @pytest.mark.parametrize("use_deprecated_syntax", [False, True]) def test_scan_file_secret(self, cli_fs_runner, use_deprecated_syntax): @@ -173,7 +172,6 @@ def test_files_verbose(self, cli_fs_runner: CliRunner): assert not result.exception assert "file1\n" in result.output assert "file2\n" in result.output - assert "No secrets have been found" in result.output def test_files_verbose_abort(self, cli_fs_runner): self.create_files() @@ -193,7 +191,6 @@ def test_files_verbose_yes(self, cli_fs_runner): assert not result.exception assert "file1\n" in result.output assert "file2\n" in result.output - assert "No secrets have been found" in result.output class TestScanDirectory: @@ -245,7 +242,6 @@ def test_directory_verbose(self, cli_fs_runner): assert "file1\n" in result.output assert self.path_line("dir/file2") in result.output assert self.path_line("dir/subdir/file3") in result.output - assert "No secrets have been found" in result.output def test_directory_verbose_abort(self, cli_fs_runner): self.create_files() @@ -291,7 +287,6 @@ def test_directory_verbose_yes(self, cli_fs_runner): assert "file1\n" in result.output assert self.path_line("dir/file2") in result.output assert self.path_line("dir/subdir/file3") in result.output - assert "No secrets have been found" in result.output def test_scan_path_should_detect_non_git_files(self, cli_fs_runner): """ @@ -358,5 +353,3 @@ def test_ignore_detectors( assert ( f": {nb_secret} incident{'s' if plural else ''} " ) in result.output - else: - assert "No secrets have been found" in result.output diff --git a/tests/unit/cmd/scan/test_prepush.py b/tests/unit/cmd/scan/test_prepush.py index fcb48503e5..44a0e6bf70 100644 --- a/tests/unit/cmd/scan/test_prepush.py +++ b/tests/unit/cmd/scan/test_prepush.py @@ -165,6 +165,7 @@ def test_prepush_pre_commit_framework( command_path="cli secret scan pre-push", ), ignored_detectors=set(), + ignore_known_secrets=False, ) assert_invoke_ok(result) assert "Commits to scan: 20" in result.output @@ -349,6 +350,7 @@ def test_prepush_new_branch( matches_ignore=ANY, scan_context=ANY, ignored_detectors=set(), + ignore_known_secrets=False, ) @patch("ggshield.cmd.secret.scan.prepush.scan_commit_range") @@ -389,6 +391,7 @@ def test_prepush_new_orphan_branch( matches_ignore=ANY, scan_context=ANY, ignored_detectors=set(), + ignore_known_secrets=False, ) @patch("ggshield.cmd.secret.scan.prepush.scan_commit_range") diff --git a/tests/unit/cmd/scan/test_prereceive.py b/tests/unit/cmd/scan/test_prereceive.py index 50de3f39f5..0b2334cff7 100644 --- a/tests/unit/cmd/scan/test_prereceive.py +++ b/tests/unit/cmd/scan/test_prereceive.py @@ -299,6 +299,7 @@ def test_new_branch( matches_ignore=ANY, scan_context=ANY, ignored_detectors=set(), + ignore_known_secrets=False, ) @patch("ggshield.cmd.secret.scan.prereceive.scan_commit_range") diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 127b025074..2cbaea96e8 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -531,6 +531,43 @@ def is_macos(): } """ +TWO_POLICY_BREAKS = ScanResult.SCHEMA.load( + { + "policy_breaks": [ + { + "type": "RSA Private Key", + "policy": "Secrets detection", + "matches": [ + { + "line_start": 2, + "match": _MULTILINE_SECRET, + "index_start": 86, + "index_end": 585, + "type": "apikey", + "line_end": 10, + } + ], + }, + { + "type": "SendGrid Key", + "policy": "Secrets detection", + "matches": [ + { + "line_start": 10, + "match": "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", # noqa + "index_start": 594, + "index_end": 662, + "type": "apikey", + "line_end": 10, + } + ], + }, + ], + "policies": ["Filenames", "File extensions", "Secrets detection"], + "policy_break_count": 2, + } +) + my_vcr = vcr.VCR( cassette_library_dir=join(dirname(realpath(__file__)), "cassettes"), path_transformer=vcr.VCR.ensure_suffix(".yaml"), diff --git a/tests/unit/output/snapshots/snap_test_message.py b/tests/unit/output/snapshots/snap_test_message.py deleted file mode 100644 index a3765ee39b..0000000000 --- a/tests/unit/output/snapshots/snap_test_message.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- -# snapshottest: v1 - https://goo.gl/zC4yUc -from __future__ import unicode_literals - -from snapshottest import Snapshot - - -snapshots = Snapshot() - -snapshots['test_message_no_secret 1'] = '''\x1b[37m\x1b[22m\x1b[22m -No secrets have been found -\x1b[0m''' diff --git a/tests/unit/output/snapshots/snap_test_text_output.py b/tests/unit/output/snapshots/snap_test_text_output.py index 30a92ad89b..0183d66a07 100644 --- a/tests/unit/output/snapshots/snap_test_text_output.py +++ b/tests/unit/output/snapshots/snap_test_text_output.py @@ -16,6 +16,11 @@ Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +1 @ + 1 | Facebook = 294*********575 | ce3f9f********************5e4372; + |___client_id__| + 1 | Facebook = 294*********575 | ce3f9f********************5e4372; + |_________client_secret_________| """ snapshots[ @@ -27,33 +32,47 @@ Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +1 @ + 1 | Facebook = 294790898041575 | ce3f9f0362bbe5ab01dfc8ee565e4372; + |___client_id__| + 1 | Facebook = 294790898041575 | ce3f9f0362bbe5ab01dfc8ee565e4372; + |_________client_secret_________| """ - snapshots[ "test_leak_message[_MULTI_SECRET_ONE_LINE_PATCH_OVERLAY_SCAN_RESULT-verbose-hide_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: Facebook Access Tokens Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +1 @ + 1 | Facebook = 294*********575 | ce3f9f********************5e4372; + |___client_id__| + 1 | Facebook = 294*********575 | ce3f9f********************5e4372; + |_________client_secret_________| """ snapshots[ "test_leak_message[_MULTI_SECRET_ONE_LINE_PATCH_OVERLAY_SCAN_RESULT-verbose-show_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: Facebook Access Tokens Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +1 @ + 1 | Facebook = 294790898041575 | ce3f9f0362bbe5ab01dfc8ee565e4372; + |___client_id__| + 1 | Facebook = 294790898041575 | ce3f9f0362bbe5ab01dfc8ee565e4372; + |_________client_secret_________| """ snapshots[ @@ -65,6 +84,11 @@ Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +1 @ + 1 | FacebookAppId = 294*********575; FacebookAppSecret = ce3f9f**************… + |___client_id__| + 1 | … = 294*********575; FacebookAppSecret = ce3f9f********************5e4372; + |_________client_secret_________| """ snapshots[ @@ -76,32 +100,47 @@ Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +1 @ + 1 | FacebookAppId = 294790898041575; FacebookAppSecret = ce3f9f0362bbe5ab01df… + |___client_id__| + 1 | … = 294790898041575; FacebookAppSecret = ce3f9f0362bbe5ab01dfc8ee565e4372; + |_________client_secret_________| """ snapshots[ "test_leak_message[_MULTI_SECRET_ONE_LINE_PATCH_SCAN_RESULT-verbose-hide_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: Facebook Access Tokens Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +1 @ + 1 | FacebookAppId = 294*********575; FacebookAppSecret = ce3f9f********************5e4372; + |___client_id__| + 1 | FacebookAppId = 294*********575; FacebookAppSecret = ce3f9f********************5e4372; + |_________client_secret_________| """ snapshots[ "test_leak_message[_MULTI_SECRET_ONE_LINE_PATCH_SCAN_RESULT-verbose-show_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: Facebook Access Tokens Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +1 @ + 1 | FacebookAppId = 294790898041575; FacebookAppSecret = ce3f9f0362bbe5ab01dfc8ee565e4372; + |___client_id__| + 1 | FacebookAppId = 294790898041575; FacebookAppSecret = ce3f9f0362bbe5ab01dfc8ee565e4372; + |_________client_secret_________| """ snapshots[ @@ -113,6 +152,11 @@ Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +2 @ + 1 | FacebookAppId = 294*********575; + |___client_id__| + 2 | FacebookAppSecret = ce3f9f********************5e4372; + |_________client_secret_________| """ snapshots[ @@ -124,32 +168,47 @@ Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +2 @ + 1 | FacebookAppId = 294790898041575; + |___client_id__| + 2 | FacebookAppSecret = ce3f9f0362bbe5ab01dfc8ee565e4372; + |_________client_secret_________| """ snapshots[ "test_leak_message[_MULTI_SECRET_TWO_LINES_PATCH_SCAN_RESULT-verbose-hide_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: Facebook Access Tokens Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +2 @ + 1 | FacebookAppId = 294*********575; + |___client_id__| + 2 | FacebookAppSecret = ce3f9f********************5e4372; + |_________client_secret_________| """ snapshots[ "test_leak_message[_MULTI_SECRET_TWO_LINES_PATCH_SCAN_RESULT-verbose-show_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: Facebook Access Tokens Occurrences: 1 Ignore with SHA: 38d9d3464520ed68f18d16e640a4a8b37ef5b17608b455267d100aa487ead314 + | @@ -0,0 +2 @ + 1 | FacebookAppId = 294790898041575; + |___client_id__| + 2 | FacebookAppSecret = ce3f9f0362bbe5ab01dfc8ee565e4372; + |_________client_secret_________| """ snapshots[ @@ -161,6 +220,13 @@ Occurrences: 1 Ignore with SHA: 1945f4a0c42abb19c1a420ddd09b4b4681249a3057c427b95f794b18595e7ffa + | @@ -0,0 +1,29 @ + 1 | FacebookAppKeys:294*********5733 /ce3f9f********************5e43711 -----… + |___client_id__| + 1 | …:294*********5733 /ce3f9f********************5e43711 -----BEGIN RSA PRI… + |_________client_secret_________| + 2 | MIIBOgIBAAJBAIIRkYjxjE3KIZi******************************+****** + 3 | **************************************************************** >> Secret detected: RSA Private Key Occurrences: 1 @@ -197,6 +263,13 @@ Occurrences: 1 Ignore with SHA: 1945f4a0c42abb19c1a420ddd09b4b4681249a3057c427b95f794b18595e7ffa + | @@ -0,0 +1,29 @ + 1 | FacebookAppKeys: 294790898041573 / ce3f9f0362bbe5ab01dfc8ee565e4371 -----… + |___client_id__| + 1 | …: 294790898041573 / ce3f9f0362bbe5ab01dfc8ee565e4371 -----BEGIN RSA PRI… + |_________client_secret_________| + 2 | MIIBOgIBAAJBAIIRkYjxjE3KIZiEc8k4sWWGNsPYRNE0u0bl5oFVApPLm+uXQ/4l + 3 | bKO9LFtMiVPy700oMWLScwAN5OAiqVLMvHUCAwEAAQJANLr8nmEWuV6t2hAwhK5I >> Secret detected: RSA Private Key Occurrences: 1 @@ -226,15 +299,22 @@ snapshots[ "test_leak_message[_ONE_LINE_AND_MULTILINE_PATCH_CONTENT-verbose-hide_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 3 incidents detected >> Secret detected: Facebook Access Tokens Occurrences: 1 Ignore with SHA: 1945f4a0c42abb19c1a420ddd09b4b4681249a3057c427b95f794b18595e7ffa + | @@ -0,0 +1,29 @ + 1 | FacebookAppKeys:294*********5733 /ce3f9f********************5e43711 -----BEGIN RSA PRIVATE KEY----- + |___client_id__| + 1 | FacebookAppKeys:294*********5733 /ce3f9f********************5e43711 -----BEGIN RSA PRIVATE KEY----- + |_________client_secret_________| + 2 | MIIBOgIBAAJBAIIRkYjxjE3KIZi******************************+****** + 3 | **************************************************************** >> Secret detected: RSA Private Key Occurrences: 1 @@ -264,15 +344,22 @@ snapshots[ "test_leak_message[_ONE_LINE_AND_MULTILINE_PATCH_CONTENT-verbose-show_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 3 incidents detected >> Secret detected: Facebook Access Tokens Occurrences: 1 Ignore with SHA: 1945f4a0c42abb19c1a420ddd09b4b4681249a3057c427b95f794b18595e7ffa + | @@ -0,0 +1,29 @ + 1 | FacebookAppKeys: 294790898041573 / ce3f9f0362bbe5ab01dfc8ee565e4371 -----BEGIN RSA PRIVATE KEY----- + |___client_id__| + 1 | FacebookAppKeys: 294790898041573 / ce3f9f0362bbe5ab01dfc8ee565e4371 -----BEGIN RSA PRIVATE KEY----- + |_________client_secret_________| + 2 | MIIBOgIBAAJBAIIRkYjxjE3KIZiEc8k4sWWGNsPYRNE0u0bl5oFVApPLm+uXQ/4l + 3 | bKO9LFtMiVPy700oMWLScwAN5OAiqVLMvHUCAwEAAQJANLr8nmEWuV6t2hAwhK5I >> Secret detected: RSA Private Key Occurrences: 1 @@ -309,9 +396,20 @@ Occurrences: 1 Ignore with SHA: 060bf63de122848f5efa122fe6cea504aae3b24cea393d887fdefa1529c6a02e + | @@ -0,0 +1,29 @ + 1 | PrivateKeyRsa: + 2 | - text: -----BEGIN RSA PRIVATE KEY----- + 3 | +MIIBOgIBAAJBAIIRkYjxjE3KIZi******************************+****** + 4 | +**************************************************************** + 5 | +**************************************************************** + 6 | +***********+**************************************************** + 7 | +****************+*********************************************** + 8 | +**********************+***************************************** + 9 | +****+******Xme/ovcDeM1+3W/UmSHYUW4b3WYq4 + 10 | -----END RSA PRIVATE KEY----- + |_____________________________apikey____________________________| """ - snapshots[ "test_leak_message[_SIMPLE_SECRET_MULTILINE_PATCH_SCAN_RESULT-clip_long_lines-show_secrets] 1" ] = """> This is an example header @@ -321,32 +419,68 @@ Occurrences: 1 Ignore with SHA: 060bf63de122848f5efa122fe6cea504aae3b24cea393d887fdefa1529c6a02e + | @@ -0,0 +1,29 @ + 1 | PrivateKeyRsa: + 2 | - text: -----BEGIN RSA PRIVATE KEY----- + 3 | +MIIBOgIBAAJBAIIRkYjxjE3KIZiEc8k4sWWGNsPYRNE0u0bl5oFVApPLm+uXQ/4l + 4 | +bKO9LFtMiVPy700oMWLScwAN5OAiqVLMvHUCAwEAAQJANLr8nmEWuV6t2hAwhK5I + 5 | +NNmBkEo4M/xFxEtl9J7LKbE2gtNrlCQiJlPP1EMhwAjDOzQcJ3lgFB28dkqH5rMW + 6 | +TQIhANrCE7O+wlCKe0WJqQ3lYlHG91XWyGVgfExJwBDsAD9LAiEAmDY5OSsH0n2A + 7 | +22tthkAvcN1s66lG+0DztOVJ4QLI2z8CIBPeDGwGpx8pdIicN/5LFuLWbyAcoZaT + 8 | +bLaA/DCNPniBAiA0l//bzg+M3srIhm04xzLdR9Vb9IjPRlkvN074zdKDVwIhAKJb + 9 | +RF3C+CMFb0wXme/ovcDeM1+3W/UmSHYUW4b3WYq4 + 10 | -----END RSA PRIVATE KEY----- + |_____________________________apikey____________________________| """ snapshots[ "test_leak_message[_SIMPLE_SECRET_MULTILINE_PATCH_SCAN_RESULT-verbose-hide_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: RSA Private Key Occurrences: 1 Ignore with SHA: 060bf63de122848f5efa122fe6cea504aae3b24cea393d887fdefa1529c6a02e + | @@ -0,0 +1,29 @ + 1 | PrivateKeyRsa: + 2 | - text: -----BEGIN RSA PRIVATE KEY----- + 3 | +MIIBOgIBAAJBAIIRkYjxjE3KIZi******************************+****** + 4 | +**************************************************************** + 5 | +**************************************************************** + 6 | +***********+**************************************************** + 7 | +****************+*********************************************** + 8 | +**********************+***************************************** + 9 | +****+******Xme/ovcDeM1+3W/UmSHYUW4b3WYq4 + 10 | -----END RSA PRIVATE KEY----- + |_____________________________apikey____________________________| """ snapshots[ "test_leak_message[_SIMPLE_SECRET_MULTILINE_PATCH_SCAN_RESULT-verbose-show_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: RSA Private Key Occurrences: 1 Ignore with SHA: 060bf63de122848f5efa122fe6cea504aae3b24cea393d887fdefa1529c6a02e + | @@ -0,0 +1,29 @ + 1 | PrivateKeyRsa: + 2 | - text: -----BEGIN RSA PRIVATE KEY----- + 3 | +MIIBOgIBAAJBAIIRkYjxjE3KIZiEc8k4sWWGNsPYRNE0u0bl5oFVApPLm+uXQ/4l + 4 | +bKO9LFtMiVPy700oMWLScwAN5OAiqVLMvHUCAwEAAQJANLr8nmEWuV6t2hAwhK5I + 5 | +NNmBkEo4M/xFxEtl9J7LKbE2gtNrlCQiJlPP1EMhwAjDOzQcJ3lgFB28dkqH5rMW + 6 | +TQIhANrCE7O+wlCKe0WJqQ3lYlHG91XWyGVgfExJwBDsAD9LAiEAmDY5OSsH0n2A + 7 | +22tthkAvcN1s66lG+0DztOVJ4QLI2z8CIBPeDGwGpx8pdIicN/5LFuLWbyAcoZaT + 8 | +bLaA/DCNPniBAiA0l//bzg+M3srIhm04xzLdR9Vb9IjPRlkvN074zdKDVwIhAKJb + 9 | +RF3C+CMFb0wXme/ovcDeM1+3W/UmSHYUW4b3WYq4 + 10 | -----END RSA PRIVATE KEY----- + |_____________________________apikey____________________________| """ snapshots[ @@ -358,6 +492,9 @@ Occurrences: 1 Ignore with SHA: 2b5840babacb6f089ddcce1fe5a56b803f8b1f636c6f44cdbf14b0c77a194c93 + | @@ -0,0 +1 @ + 1 | github_token: 368ac3e**************************37ddf91 + |________________apikey________________| """ snapshots[ @@ -369,30 +506,39 @@ Occurrences: 1 Ignore with SHA: 2b5840babacb6f089ddcce1fe5a56b803f8b1f636c6f44cdbf14b0c77a194c93 + | @@ -0,0 +1 @ + 1 | github_token: 368ac3edf9e850d1c0ff9d6c526496f8237ddf91 + |________________apikey________________| """ snapshots[ "test_leak_message[_SIMPLE_SECRET_PATCH_SCAN_RESULT-verbose-hide_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: GitHub Token Occurrences: 1 Ignore with SHA: 2b5840babacb6f089ddcce1fe5a56b803f8b1f636c6f44cdbf14b0c77a194c93 + | @@ -0,0 +1 @ + 1 | github_token: 368ac3e**************************37ddf91 + |________________apikey________________| """ snapshots[ "test_leak_message[_SIMPLE_SECRET_PATCH_SCAN_RESULT-verbose-show_secrets] 1" -] = """> This is an example header +] = """ secrets-engine-version: 3.14.159 - +> This is an example header > leak.txt: 1 incident detected >> Secret detected: GitHub Token Occurrences: 1 Ignore with SHA: 2b5840babacb6f089ddcce1fe5a56b803f8b1f636c6f44cdbf14b0c77a194c93 + | @@ -0,0 +1 @ + 1 | github_token: 368ac3edf9e850d1c0ff9d6c526496f8237ddf91 + |________________apikey________________| """ diff --git a/tests/unit/output/test_json_output.py b/tests/unit/output/test_json_output.py index ef38a8b9b1..9b04e43a0d 100644 --- a/tests/unit/output/test_json_output.py +++ b/tests/unit/output/test_json_output.py @@ -1,4 +1,6 @@ +import json from collections import namedtuple +from copy import deepcopy import pytest from pytest_voluptuous import Partial, S @@ -7,14 +9,24 @@ from ggshield.core.utils import Filemode from ggshield.output import JSONOutputHandler, OutputHandler from ggshield.output.json.schemas import JSONScanCollectionSchema -from ggshield.scan import Commit, ScanCollection, ScanContext, ScanMode, SecretScanner +from ggshield.scan import ( + Commit, + Result, + Results, + ScanCollection, + ScanContext, + ScanMode, + SecretScanner, +) from tests.unit.conftest import ( _MULTIPLE_SECRETS_PATCH, _NO_SECRET_PATCH, _ONE_LINE_AND_MULTILINE_PATCH, + _ONE_LINE_AND_MULTILINE_PATCH_CONTENT, _SINGLE_ADD_PATCH, _SINGLE_DELETE_PATCH, _SINGLE_MOVE_PATCH, + TWO_POLICY_BREAKS, UNCHECKED_SECRET_PATCH, VALID_SECRET_PATCH, my_vcr, @@ -135,3 +147,77 @@ def test_json_output(client, cache, name, input_patch, expected_exit_code): assert SCHEMA_WITH_INCIDENTS == JSONScanCollectionSchema().loads( json_flat_results ) + + +@pytest.mark.parametrize("verbose", [True, False]) +@pytest.mark.parametrize("ignore_known_secrets", [True, False]) +@pytest.mark.parametrize( + "secrets_types", ["only_new_secrets", "only_known_secrets", "mixed_secrets"] +) +def test_ignore_known_secrets(verbose, ignore_known_secrets, secrets_types): + """ + GIVEN policy breaks + WHEN generating json output + THEN if ignore_known_secrets is used, include "known_secret" field for the known policy breaks in the json output + """ + output_handler = JSONOutputHandler(show_secrets=True, verbose=verbose) + + result: Result = Result( + content=_ONE_LINE_AND_MULTILINE_PATCH_CONTENT, + filename="leak.txt", + filemode=Filemode.NEW, + scan=deepcopy(TWO_POLICY_BREAKS), # 2 policy breaks + ) + + all_policy_breaks = result.scan.policy_breaks + + known_policy_breaks = [] + new_policy_breaks = all_policy_breaks + + # add known_secret for the secrets that are known, when the option is, the known_secret field is not returned + if ignore_known_secrets: + if secrets_types == "only_known_secrets": + known_policy_breaks = all_policy_breaks + new_policy_breaks = [] + elif secrets_types == "mixed_secrets": + # set only first policy break as known + known_policy_breaks = all_policy_breaks[:1] + new_policy_breaks = all_policy_breaks[1:] + + for policy_break in known_policy_breaks: + policy_break.known_secret = True + + # call output handler + output = output_handler._process_scan_impl( + ScanCollection( + id="outer_scan", + type="outer_scan", + results=Results(results=[], errors=[]), + scans=[ + ScanCollection( + id="scan", + type="test", + results=Results(results=[result], errors=[]), + optional_header="> This is an example header", + ) + ], + ) + ) + + incidents = json.loads(output)["scans"][0]["entities_with_incidents"][0][ + "incidents" + ] + # We can rely on the policy break type, since in this test there are 2 policy breaks, + # and they are of different types + incident_for_policy_break_type = { + incident["type"]: incident for incident in incidents + } + + for policy_break in known_policy_breaks: + assert incident_for_policy_break_type[policy_break.break_type]["known_secret"] + + for policy_break in new_policy_breaks: + assert ( + "known_secret" + not in incident_for_policy_break_type[policy_break.break_type] + ) diff --git a/tests/unit/output/test_message.py b/tests/unit/output/test_message.py index a2b2354406..f9c5bc79e1 100644 --- a/tests/unit/output/test_message.py +++ b/tests/unit/output/test_message.py @@ -3,19 +3,10 @@ import pytest from ggshield.core.text_utils import Line -from ggshield.output.text.message import ( - clip_long_line, - format_line_count_break, - no_leak_message, -) +from ggshield.output.text.message import clip_long_line, format_line_count_break from ggshield.output.text.utils import get_offset, get_padding -def test_message_no_secret(snapshot): - msg = no_leak_message() - snapshot.assert_match(msg) - - @pytest.mark.parametrize( "lines, want", [ diff --git a/tests/unit/output/test_text_output.py b/tests/unit/output/test_text_output.py index 88da25b018..2fbb62a627 100644 --- a/tests/unit/output/test_text_output.py +++ b/tests/unit/output/test_text_output.py @@ -24,6 +24,7 @@ _SIMPLE_SECRET_MULTILINE_PATCH_SCAN_RESULT, _SIMPLE_SECRET_PATCH, _SIMPLE_SECRET_PATCH_SCAN_RESULT, + TWO_POLICY_BREAKS, ) @@ -123,3 +124,174 @@ def test_leak_message(result_input, snapshot, show_secrets, verbose): ) snapshot.assert_match(output) + + +def assert_policies_displayed(output, policy_breaks): + for policy_break in policy_breaks: + if policy_break.known_secret: + assert f"Known secret: {policy_break.break_type}" in output + else: + assert f"Secret detected: {policy_break.break_type}" in output + + +def assert_warning_is_displayed(output, warning_is_displayed, known_secrets_number): + if warning_is_displayed: + plural = ( + "s ignored because they are" + if known_secrets_number > 1 + else " ignored because it is" + ) + assert ( + f"Warning: {known_secrets_number} secret{plural} already known by your GitGuardian" + f" dashboard and you used the `--ignore-known-secrets` option." in output + ) + + +def assert_number_of_secrets_is_displayed(output, nb_new_secrets): + if nb_new_secrets: + assert ( + f"{nb_new_secrets} incident{'s' if nb_new_secrets > 1 else ''} detected" + in output + ) + + +@pytest.mark.parametrize("verbose", [True, False]) +@pytest.mark.parametrize("ignore_known_secrets", [True, False]) +@pytest.mark.parametrize( + "secrets_types", ["only_new_secrets", "only_known_secrets", "mixed_secrets"] +) +def test_ignore_known_secrets(verbose, ignore_known_secrets, secrets_types): + """ + GIVEN policy breaks + WHEN generating text output + THEN if ignore_known_secrets is used, do not show known secret (unless the verbose mode) + """ + output_handler = TextOutputHandler(show_secrets=True, verbose=verbose) + + result: Result = Result( + content=_ONE_LINE_AND_MULTILINE_PATCH_CONTENT, + filename="leak.txt", + filemode=Filemode.NEW, + scan=deepcopy(TWO_POLICY_BREAKS), # 2 policy breaks + ) + + all_policy_breaks = result.scan.policy_breaks + + known_policy_breaks = [] + new_policy_breaks = all_policy_breaks + + # add known_secret for the secrets that are known, when the option is, the known_secret field is not returned + if ignore_known_secrets: + if secrets_types == "only_known_secrets": + known_policy_breaks = all_policy_breaks + new_policy_breaks = [] + elif secrets_types == "mixed_secrets": + # set only first policy break as known + known_policy_breaks = all_policy_breaks[:1] + new_policy_breaks = all_policy_breaks[1:] + + for policy_break in known_policy_breaks: + policy_break.known_secret = True + + # call output handler + output = output_handler._process_scan_impl( + ScanCollection( + id="outer_scan", + type="outer_scan", + results=Results(results=[], errors=[]), + scans=[ + ScanCollection( + id="scan", + type="test", + results=Results(results=[result], errors=[]), + optional_header="> This is an example header", + ) + ], + ) + ) + + if secrets_types == "only_new_secrets" or not ignore_known_secrets: + expected_policies_break_displayed = all_policy_breaks + expected_warning_is_displayed = False + else: + expected_warning_is_displayed = True + + if verbose: + expected_policies_break_displayed = known_policy_breaks + else: + if secrets_types == "only_known_secrets": + # Do not show secrets if there are only known secrets and not in verbose mode + expected_policies_break_displayed = [] + else: + expected_policies_break_displayed = new_policy_breaks + + output = click.unstyle(output).replace( + _file_info_decoration(), _file_info_default_decoration() + ) + + assert_policies_displayed(output, expected_policies_break_displayed) + assert_warning_is_displayed( + output, expected_warning_is_displayed, len(known_policy_breaks) + ) + assert_number_of_secrets_is_displayed( + output, len(expected_policies_break_displayed) + ) + + +@pytest.mark.parametrize("ignore_known_secrets", [True, False]) +@pytest.mark.parametrize( + "secrets_types", ["only_new_secrets", "only_known_secrets", "mixed_secrets"] +) +def test_ignore_known_secrets_exit_code(ignore_known_secrets, secrets_types): + """ + GIVEN policy breaks + WHEN checking for the exit code + THEN the exit code is 1 when the new secrets are present, and 0 otherwise + """ + output_handler = TextOutputHandler(show_secrets=True, verbose=False) + + result: Result = Result( + content=_ONE_LINE_AND_MULTILINE_PATCH_CONTENT, + filename="leak.txt", + filemode=Filemode.NEW, + scan=deepcopy(TWO_POLICY_BREAKS), # 2 policy breaks + ) + + all_policy_breaks = result.scan.policy_breaks + + known_policy_breaks = [] + new_policy_breaks = all_policy_breaks + + # add known_secret for the secrets that are known, when the option is, the known_secret field is not returned + if ignore_known_secrets: + if secrets_types == "only_known_secrets": + known_policy_breaks = all_policy_breaks + new_policy_breaks = [] + elif secrets_types == "mixed_secrets": + # set only first policy break as known + known_policy_breaks = all_policy_breaks[:1] + new_policy_breaks = all_policy_breaks[1:] + + for policy_break in known_policy_breaks: + policy_break.known_secret = True + + # call output handler + exit_code = output_handler._get_exit_code( + ScanCollection( + id="outer_scan", + type="outer_scan", + results=Results(results=[], errors=[]), + scans=[ + ScanCollection( + id="scan", + type="test", + results=Results(results=[result], errors=[]), + optional_header="> This is an example header", + ) + ], + ) + ) + + expected_exit_code = len(new_policy_breaks) > 0 + + assert exit_code == expected_exit_code diff --git a/tests/unit/scan/test_scan.py b/tests/unit/scan/test_scan.py index f14a2c1ac3..b88484c9f6 100644 --- a/tests/unit/scan/test_scan.py +++ b/tests/unit/scan/test_scan.py @@ -42,4 +42,5 @@ def test_request_headers(scan_mock: Mock, client): "GGShield-Command-Id": ANY, "mode": "path", }, + ignore_known_secrets=None, )