From 5cad0be49abd64cf2825853d5051454ea58f4dc9 Mon Sep 17 00:00:00 2001 From: Paul Beslin Date: Thu, 21 Sep 2023 15:34:43 +0200 Subject: [PATCH 1/4] feat(iac): send the scanned repository's origin url as an extra header --- ggshield/utils/git_shell.py | 44 +++++++++ tests/unit/utils/test_git_shell.py | 146 +++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) diff --git a/ggshield/utils/git_shell.py b/ggshield/utils/git_shell.py index f1dc21003d..1a5c6863e1 100644 --- a/ggshield/utils/git_shell.py +++ b/ggshield/utils/git_shell.py @@ -1,5 +1,6 @@ import logging import os +import re import subprocess from enum import Enum from functools import lru_cache @@ -95,6 +96,27 @@ def _git_rev_parse(option: str, wd: Path) -> Optional[str]: return _git_rev_parse_absolute(option=option, wd_absolute=wd.resolve()) +def simplify_git_url(url: str) -> str: + """ + Removes elements from the git remote url. + - scheme + - credentials + - port + - extension + https://user:pass@mygitlab.corp.com:84/path/to/repo.git -> mygitlab.corp.com/toto/titi/tata + """ + for (pattern, replace) in ( + (r"https?://", ""), # Scheme + (r".+@", ""), # Credentials + (r":\d*/", "/"), # Port + (r"\.git$", ""), # Github/Gitlab/BitBucket extension (**.git) + (r"/_git/", "/"), # Azure Devops extension (**/_git/**) + (":", "/"), # Normalize ssh url to https format + ): + url = re.sub(pattern, replace, url) + return url + + def is_git_dir(wd: Union[str, Path]) -> bool: return _git_rev_parse("--git-dir", Path(wd)) is not None @@ -265,6 +287,28 @@ def get_last_commit_sha_of_branch(branch_name: str) -> Optional[str]: return last_target_commit[0] +def get_repository_url_from_path(wd: Path) -> Optional[str]: + """ + Returns one of the repository remote urls. Returns None if no remote are found, + or the directory is not a repository. + """ + remotes_raw: List[str] = [] + try: + if not is_git_dir(wd): + return None + remotes_raw = git(["remote", "-v"], cwd=wd).splitlines() + except (subprocess.CalledProcessError, OSError): + return None + + url: Optional[str] = None + for line in remotes_raw: + if match := re.search(r"^(.*)\t(.*) \(fetch\)$", line): + name, url = match.groups() + if name == "origin": + break + return simplify_git_url(url) if url else None + + def get_filepaths_from_ref( ref: str, wd: Optional[Union[str, Path]] = None ) -> List[Path]: diff --git a/tests/unit/utils/test_git_shell.py b/tests/unit/utils/test_git_shell.py index defc78d2ef..56ea3b8c4d 100644 --- a/tests/unit/utils/test_git_shell.py +++ b/tests/unit/utils/test_git_shell.py @@ -2,6 +2,7 @@ import tarfile from io import BytesIO from pathlib import Path +from typing import Optional import pytest @@ -12,15 +13,34 @@ check_git_dir, check_git_ref, get_filepaths_from_ref, + get_repository_url_from_path, get_staged_filepaths, git, is_git_dir, is_valid_git_commit_ref, + simplify_git_url, ) from ggshield.utils.os import cd from tests.repository import Repository +def _add_remote( + repository: Repository, repository_name: str, remote_name: Optional[str] = "origin" +): + remote_url = f"https://github.com/owner/{repository_name}.git" + repository.git("remote", "add", remote_name, remote_url) + + +def _create_repository_with_remote( + repository_path: Path, + repository_name: str, + remote_name: Optional[str] = "origin", +): + local_repo = Repository.create(repository_path, bare=True) + _add_remote(local_repo, repository_name, remote_name) + return local_repo + + def test_git_shell(): assert "usage: git" in git(["help"]) @@ -80,6 +100,132 @@ def test_check_git_ref_valid_git_path(tmp_path): check_git_ref("invalid_ref", local_repo_path) +@pytest.mark.parametrize( + ("url", "expected"), + [ + ( + "https://user:password@github.com:84/GitGuardian/ggshield.git", + "github.com/GitGuardian/ggshield", + ), + ( + "https://github.com/GitGuardian/ggshield.git", + "github.com/GitGuardian/ggshield", + ), + ( + "git@github.com:GitGuardian/ggshield.git", + "github.com/GitGuardian/ggshield", + ), + ( + "https://github.com/Git.Guar-di_an/gg.sh-ie_ld.git", + "github.com/Git.Guar-di_an/gg.sh-ie_ld", + ), + ( + "https://gitlab.instance.ovh/owner/project/repository.git", + "gitlab.instance.ovh/owner/project/repository", + ), + ( + "https://username@dev.azure.com/username/project/_git/repository", + "dev.azure.com/username/project/repository", + ), + ( + "https://username@bitbucket.org/owner/repository.git", + "bitbucket.org/owner/repository", + ), + ], + ids=[ + "Full Github https", + "Github https", + "Github ssh", + "Github special characters", + "Gitlab https", + "Azure Devops https", + "BitBucket https", + ], +) +def test_simplify_git_url(url, expected): + assert expected == simplify_git_url(url) + + +def test_get_repository_url_from_path(tmp_path: Path): + # GIVEN a local repository with remote url + local_repo = _create_repository_with_remote(tmp_path, "repository") + + # THEN the remote url is returned in the root clone directory + assert "repository" in get_repository_url_from_path(local_repo.path) + # AND in a subdirectory + subdirectory_path = local_repo.path / "subdirectory" + subdirectory_path.mkdir() + assert "repository" in get_repository_url_from_path(subdirectory_path) + + +def test_get_repository_url_from_path_no_repo(tmp_path: Path): + # GIVEN a local directory with no remote git directory + local_directory_path = tmp_path / "local" + local_directory_path.mkdir() + # AND a local repository with no remote git directory + local_repository_path = tmp_path / "repo" + repo = Repository.create(local_repository_path) + repo.create_commit() + + # THEN no url is returned + assert get_repository_url_from_path(local_directory_path) is None + assert get_repository_url_from_path(local_repository_path) is None + + +def test_get_repository_url_from_path_two_remotes(tmp_path: Path): + # GIVEN a local repository with two remotes + local_repo = _create_repository_with_remote( + repository_path=tmp_path, + repository_name="repository1", + remote_name="other_remote", + ) + _add_remote( + repository=local_repo, + repository_name="repository2", + remote_name="origin", + ) + + # THEN only one remote is returned, with priority to origin + assert "repository2" in get_repository_url_from_path(local_repo.path) + # AND in a subdirectory + subdirectory_path = local_repo.path / "subdirectory" + subdirectory_path.mkdir() + assert "repository2" in get_repository_url_from_path(subdirectory_path) + + +def test_get_repository_url_from_path_different_repo(tmp_path: Path): + # GIVEN two repositories with one remote each + local_repo1 = _create_repository_with_remote( + repository_path=tmp_path / "local1", + repository_name="repository1", + ) + local_repo2 = _create_repository_with_remote( + repository_path=tmp_path / "local2", + repository_name="repository2", + ) + + # THEN scanning repo 2 from repo 1 yields repo 2's remote url + with cd(str(local_repo1.path)): + assert "repository2" in get_repository_url_from_path(local_repo2.path) + + +def test_get_repository_url_from_path_subrepo(tmp_path: Path): + # GIVEN two repositories, each with its remote, with repo2 nested inside repo1 + local_repo1 = _create_repository_with_remote( + repository_path=tmp_path, + repository_name="repository1", + ) + local_repo2 = _create_repository_with_remote( + repository_path=local_repo1.path / "nested", + repository_name="repository2", + ) + + # THEN scanning local repo 1 returns remote repo 1 url + assert "repository1" in get_repository_url_from_path(local_repo1.path) + # AND scanning local repo 2 returns remote repo 2 url + assert "repository2" in get_repository_url_from_path(local_repo2.path) + + def test_get_filepaths_from_ref(tmp_path): # GIVEN a repository repo = Repository.create(tmp_path) From 34e1d4261c752fdf7b8f5e3a509b898721cc991e Mon Sep 17 00:00:00 2001 From: Paul Beslin Date: Mon, 23 Oct 2023 15:01:57 +0200 Subject: [PATCH 2/4] feat(core): add optional target_path to ScanContext, and send repository url as header --- ggshield/core/scan/scan_context.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ggshield/core/scan/scan_context.py b/ggshield/core/scan/scan_context.py index c9e0d36adc..f5edea3adc 100644 --- a/ggshield/core/scan/scan_context.py +++ b/ggshield/core/scan/scan_context.py @@ -1,9 +1,11 @@ import platform import uuid from dataclasses import dataclass +from pathlib import Path from typing import Dict, Optional, Union from ggshield import __version__ +from ggshield.utils.git_shell import get_repository_url_from_path from ggshield.utils.os import get_os_info from .scan_mode import ScanMode @@ -14,6 +16,7 @@ class ScanContext: scan_mode: Union[ScanMode, str] command_path: str extra_headers: Optional[Dict[str, str]] = None + target_path: Optional[Path] = None def __post_init__(self) -> None: self.command_id = str(uuid.uuid4()) @@ -34,6 +37,10 @@ def get_http_headers(self) -> Dict[str, str]: "OS-Version": self.os_version, "Python-Version": self.python_version, } + if self.target_path is not None: + repo_url = get_repository_url_from_path(self.target_path) + if repo_url is not None: + headers["Repository-URL"] = repo_url if self.extra_headers: headers = {**headers, **self.extra_headers} From 5875d2ea626c8b9e6086ef87e3775941b780cf7a Mon Sep 17 00:00:00 2001 From: Paul Beslin Date: Mon, 23 Oct 2023 15:19:15 +0200 Subject: [PATCH 3/4] feat(scan): add the target path in iac/sca/secrets scans --- ggshield/cmd/iac/scan/all.py | 1 + ggshield/cmd/iac/scan/diff.py | 1 + ggshield/cmd/sca/scan/sca_scan_utils.py | 2 ++ ggshield/cmd/secret/scan/ci.py | 2 ++ ggshield/cmd/secret/scan/path.py | 3 +++ ggshield/cmd/secret/scan/precommit.py | 2 ++ ggshield/cmd/secret/scan/prepush.py | 2 ++ ggshield/cmd/secret/scan/prereceive.py | 2 ++ ggshield/cmd/secret/scan/range.py | 2 ++ ggshield/cmd/secret/scan/repo.py | 2 ++ tests/unit/cmd/scan/test_prepush.py | 1 + 11 files changed, 20 insertions(+) diff --git a/ggshield/cmd/iac/scan/all.py b/ggshield/cmd/iac/scan/all.py index a54081497c..0b2121245c 100644 --- a/ggshield/cmd/iac/scan/all.py +++ b/ggshield/cmd/iac/scan/all.py @@ -99,6 +99,7 @@ def iac_scan_all( command_path=ctx.command_path, scan_mode=scan_mode if ci_mode is None else f"{scan_mode}/{ci_mode.value}", extra_headers={"Ci-Mode": str(ci_mode)} if ci_mode else None, + target_path=directory, ).get_http_headers(), ) diff --git a/ggshield/cmd/iac/scan/diff.py b/ggshield/cmd/iac/scan/diff.py index f5845dd023..087d295225 100644 --- a/ggshield/cmd/iac/scan/diff.py +++ b/ggshield/cmd/iac/scan/diff.py @@ -186,6 +186,7 @@ def iac_scan_diff( command_path=ctx.command_path, scan_mode=scan_mode if ci_mode is None else f"{scan_mode}/{ci_mode.value}", extra_headers={"Ci-Mode": str(ci_mode)} if ci_mode else None, + target_path=directory, ).get_http_headers(), ) diff --git a/ggshield/cmd/sca/scan/sca_scan_utils.py b/ggshield/cmd/sca/scan/sca_scan_utils.py index deae3080c5..850058a186 100644 --- a/ggshield/cmd/sca/scan/sca_scan_utils.py +++ b/ggshield/cmd/sca/scan/sca_scan_utils.py @@ -86,6 +86,7 @@ def sca_scan_all( ScanContext( command_path=ctx.command_path, scan_mode=scan_mode, + target_path=directory, ).get_http_headers(), ) @@ -233,6 +234,7 @@ def sca_scan_diff( extra_headers=ScanContext( command_path=ctx.command_path, scan_mode=scan_mode, + target_path=directory, extra_headers={"Ci-Mode": ci_mode} if ci_mode else None, ).get_http_headers(), ) diff --git a/ggshield/cmd/secret/scan/ci.py b/ggshield/cmd/secret/scan/ci.py index 9f97b0f96a..e7108152aa 100644 --- a/ggshield/cmd/secret/scan/ci.py +++ b/ggshield/cmd/secret/scan/ci.py @@ -1,4 +1,5 @@ import os +from pathlib import Path from typing import Any import click @@ -39,6 +40,7 @@ def ci_cmd(ctx: click.Context, **kwargs: Any) -> int: scan_context = ScanContext( scan_mode=mode_header, command_path=ctx.command_path, + target_path=Path.cwd(), extra_headers={"Ci-Mode": ci_mode.name}, ) diff --git a/ggshield/cmd/secret/scan/path.py b/ggshield/cmd/secret/scan/path.py index c054a77b39..1228ca0387 100644 --- a/ggshield/cmd/secret/scan/path.py +++ b/ggshield/cmd/secret/scan/path.py @@ -57,10 +57,13 @@ def path_cmd( ignore_git=True, ) + target = paths[0] if len(paths) == 1 else Path.cwd() + target_path = target if target.is_dir() else target.parent with RichSecretScannerUI(len(files), dataset_type="Path") as ui: scan_context = ScanContext( scan_mode=ScanMode.PATH, command_path=ctx.command_path, + target_path=target_path, ) scanner = SecretScanner( diff --git a/ggshield/cmd/secret/scan/precommit.py b/ggshield/cmd/secret/scan/precommit.py index 8cb2654403..e83335a0dd 100644 --- a/ggshield/cmd/secret/scan/precommit.py +++ b/ggshield/cmd/secret/scan/precommit.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Any, List import click @@ -50,6 +51,7 @@ def precommit_cmd( scan_context = ScanContext( scan_mode=ScanMode.PRE_COMMIT, command_path=ctx.command_path, + target_path=Path.cwd(), ) commit = Commit(exclusion_regexes=ctx.obj["exclusion_regexes"]) diff --git a/ggshield/cmd/secret/scan/prepush.py b/ggshield/cmd/secret/scan/prepush.py index ff2df40e09..761fa9da6e 100644 --- a/ggshield/cmd/secret/scan/prepush.py +++ b/ggshield/cmd/secret/scan/prepush.py @@ -1,4 +1,5 @@ import logging +from pathlib import Path from typing import Any, List import click @@ -89,6 +90,7 @@ def prepush_cmd(ctx: click.Context, prepush_args: List[str], **kwargs: Any) -> i scan_context = ScanContext( scan_mode=ScanMode.PRE_PUSH, command_path=ctx.command_path, + target_path=Path.cwd(), ) return_code = scan_commit_range( diff --git a/ggshield/cmd/secret/scan/prereceive.py b/ggshield/cmd/secret/scan/prereceive.py index ffbbda31bf..3c1ad6f208 100644 --- a/ggshield/cmd/secret/scan/prereceive.py +++ b/ggshield/cmd/secret/scan/prereceive.py @@ -3,6 +3,7 @@ import os import re import sys +from pathlib import Path from typing import Any, List, Set import click @@ -53,6 +54,7 @@ def _execute_prereceive( scan_context = ScanContext( scan_mode=ScanMode.PRE_RECEIVE, command_path=command_path, + target_path=Path.cwd(), ) return_code = scan_commit_range( diff --git a/ggshield/cmd/secret/scan/range.py b/ggshield/cmd/secret/scan/range.py index 212d79ac2f..ebad9db7f8 100644 --- a/ggshield/cmd/secret/scan/range.py +++ b/ggshield/cmd/secret/scan/range.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Any import click @@ -41,6 +42,7 @@ def range_cmd( scan_context = ScanContext( scan_mode=ScanMode.COMMIT_RANGE, command_path=ctx.command_path, + target_path=Path.cwd(), ) return scan_commit_range( diff --git a/ggshield/cmd/secret/scan/repo.py b/ggshield/cmd/secret/scan/repo.py index 0842d7799e..614e13a23b 100644 --- a/ggshield/cmd/secret/scan/repo.py +++ b/ggshield/cmd/secret/scan/repo.py @@ -48,6 +48,7 @@ def repo_cmd( path = Path(repository) if path.is_dir(): + scan_context.target_path = path return scan_repo_path( client=client, cache=cache, @@ -60,6 +61,7 @@ def repo_cmd( if REGEX_GIT_URL.match(repository): with tempfile.TemporaryDirectory() as tmpdirname: git(["clone", repository, tmpdirname]) + scan_context.target_path = Path(tmpdirname) return scan_repo_path( client=client, cache=cache, diff --git a/tests/unit/cmd/scan/test_prepush.py b/tests/unit/cmd/scan/test_prepush.py index 7df0da7185..5335f73f4c 100644 --- a/tests/unit/cmd/scan/test_prepush.py +++ b/tests/unit/cmd/scan/test_prepush.py @@ -167,6 +167,7 @@ def test_prepush_pre_commit_framework( scan_context=ScanContext( scan_mode=ScanMode.PRE_PUSH, command_path="cli secret scan pre-push", + target_path=local_repo.path, ), ignored_detectors=set(), ) From f2ad1be671a7046ed15b328a955121b97783d859 Mon Sep 17 00:00:00 2001 From: Paul Beslin Date: Mon, 23 Oct 2023 17:40:50 +0200 Subject: [PATCH 4/4] test: add tests for repository url in headers via ScanContext --- .../test_sca_scan_context_repository.yaml | 64 +++++++++++++++++++ ...test_sca_scan_diff_context_repository.yaml | 64 +++++++++++++++++++ .../test_scan_context_repository.yaml | 60 +++++++++++++++++ tests/unit/cmd/iac/test_scan_all.py | 42 +++++++++++- tests/unit/cmd/iac/test_scan_diff.py | 33 ++++++++++ tests/unit/cmd/sca/test_diff.py | 44 +++++++++++++ tests/unit/cmd/sca/test_scan.py | 37 +++++++++++ tests/unit/cmd/scan/test_path.py | 43 ++++++++++++- tests/unit/core/scan/test_scan_context.py | 52 +++++++++++++++ 9 files changed, 436 insertions(+), 3 deletions(-) create mode 100644 tests/unit/cassettes/test_sca_scan_context_repository.yaml create mode 100644 tests/unit/cassettes/test_sca_scan_diff_context_repository.yaml create mode 100644 tests/unit/cassettes/test_scan_context_repository.yaml create mode 100644 tests/unit/core/scan/test_scan_context.py diff --git a/tests/unit/cassettes/test_sca_scan_context_repository.yaml b/tests/unit/cassettes/test_sca_scan_context_repository.yaml new file mode 100644 index 0000000000..b9292fc3c1 --- /dev/null +++ b/tests/unit/cassettes/test_sca_scan_context_repository.yaml @@ -0,0 +1,64 @@ +interactions: + - request: + body: '{"files": ["Pipfile.lock"]}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '27' + Content-Type: + - application/json + User-Agent: + - pygitguardian/1.10.0 (Linux;py3.10.4) ggshield + method: POST + uri: https://api.gitguardian.com/v1/sca/compute_sca_files/ + response: + body: + string: '{"sca_files":["Pipfile.lock"],"potential_siblings":[]}' + headers: + access-control-expose-headers: + - X-App-Version + allow: + - POST, OPTIONS + content-length: + - '54' + content-type: + - application/json + cross-origin-opener-policy: + - same-origin + date: + - Wed, 25 Oct 2023 13:35:42 GMT + referrer-policy: + - strict-origin-when-cross-origin + server: + - istio-envoy + strict-transport-security: + - max-age=31536000; includeSubDomains + vary: + - Cookie + x-app-version: + - v2.41.5 + x-content-type-options: + - nosniff + - nosniff + x-envoy-upstream-service-time: + - '13' + x-frame-options: + - DENY + - SAMEORIGIN + x-sca-engine-version: + - 1.18.1 + x-sca-last-vuln-fetch: + - '2023-10-17T16:26:51.058422+00:00' + x-secrets-engine-version: + - 2.98.0 + x-xss-protection: + - 1; mode=block + status: + code: 200 + message: OK +version: 1 diff --git a/tests/unit/cassettes/test_sca_scan_diff_context_repository.yaml b/tests/unit/cassettes/test_sca_scan_diff_context_repository.yaml new file mode 100644 index 0000000000..412b9f5f00 --- /dev/null +++ b/tests/unit/cassettes/test_sca_scan_diff_context_repository.yaml @@ -0,0 +1,64 @@ +interactions: + - request: + body: '{"files": ["Pipfile.lock"]}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '27' + Content-Type: + - application/json + User-Agent: + - pygitguardian/1.10.0 (Linux;py3.10.4) ggshield + method: POST + uri: https://api.gitguardian.com/v1/sca/compute_sca_files/ + response: + body: + string: '{"sca_files":["Pipfile.lock"],"potential_siblings":[]}' + headers: + access-control-expose-headers: + - X-App-Version + allow: + - POST, OPTIONS + content-length: + - '54' + content-type: + - application/json + cross-origin-opener-policy: + - same-origin + date: + - Wed, 25 Oct 2023 13:35:16 GMT + referrer-policy: + - strict-origin-when-cross-origin + server: + - istio-envoy + strict-transport-security: + - max-age=31536000; includeSubDomains + vary: + - Cookie + x-app-version: + - v2.41.5 + x-content-type-options: + - nosniff + - nosniff + x-envoy-upstream-service-time: + - '15' + x-frame-options: + - DENY + - SAMEORIGIN + x-sca-engine-version: + - 1.18.1 + x-sca-last-vuln-fetch: + - '2023-10-17T16:26:51.058422+00:00' + x-secrets-engine-version: + - 2.98.0 + x-xss-protection: + - 1; mode=block + status: + code: 200 + message: OK +version: 1 diff --git a/tests/unit/cassettes/test_scan_context_repository.yaml b/tests/unit/cassettes/test_scan_context_repository.yaml new file mode 100644 index 0000000000..6277215a56 --- /dev/null +++ b/tests/unit/cassettes/test_scan_context_repository.yaml @@ -0,0 +1,60 @@ +interactions: + - request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - pygitguardian/1.10.0 (Linux;py3.10.4) ggshield + method: GET + uri: https://api.gitguardian.com/v1/metadata + response: + body: + string: '{"version":"v2.41.5","preferences":{"marketplaces__aws_product_url":"http://aws.amazon.com/marketplace/pp/prodview-mrmulzykamba6","on_premise__restrict_signup":true,"on_premise__is_email_server_configured":true,"on_premise__default_sso_config_api_id":null,"onboarding__segmentation_v1_enabled":true},"secret_scan_preferences":{"maximum_documents_per_scan":20,"maximum_document_size":1048576}}' + headers: + access-control-expose-headers: + - X-App-Version + allow: + - GET, HEAD, OPTIONS + content-length: + - '392' + content-type: + - application/json + cross-origin-opener-policy: + - same-origin + date: + - Wed, 25 Oct 2023 13:36:10 GMT + referrer-policy: + - strict-origin-when-cross-origin + server: + - istio-envoy + strict-transport-security: + - max-age=31536000; includeSubDomains + vary: + - Cookie + x-app-version: + - v2.41.5 + x-content-type-options: + - nosniff + - nosniff + x-envoy-upstream-service-time: + - '21' + x-frame-options: + - DENY + - SAMEORIGIN + x-sca-engine-version: + - 1.18.1 + x-sca-last-vuln-fetch: + - '2023-10-17T16:26:51.058422+00:00' + x-secrets-engine-version: + - 2.98.0 + x-xss-protection: + - 1; mode=block + status: + code: 200 + message: OK +version: 1 diff --git a/tests/unit/cmd/iac/test_scan_all.py b/tests/unit/cmd/iac/test_scan_all.py index 3ecf5fd741..222638f46e 100644 --- a/tests/unit/cmd/iac/test_scan_all.py +++ b/tests/unit/cmd/iac/test_scan_all.py @@ -1,6 +1,6 @@ import json from pathlib import Path -from unittest.mock import Mock, patch +from unittest.mock import ANY, Mock, patch import pytest import requests @@ -26,7 +26,6 @@ def setup_single_iac_vuln_repo(tmp_path: Path) -> str: Sets up a local repo with a single vulnerable IaC file from a given tmp_path. :returns: a string representing the path to the file """ - repo = Repository.create(tmp_path) iac_file_name = "iac_file_single_vulnerability.tf" @@ -272,3 +271,42 @@ def test_iac_scan_all_ignored_directory( assert_invoke_exited_with(result, ExitCode.USAGE_ERROR) assert "An ignored file or directory cannot be scanned." in result.stdout iac_directory_scan_mock.assert_not_called() + + +@patch("pygitguardian.GGClient.iac_directory_scan") +def test_iac_scan_all_context_repository( + scan_mock: Mock, tmp_path: Path, cli_fs_runner: CliRunner, cli_command +) -> None: + """ + GIVEN a repository with a remote url + WHEN executing a scan all + THEN repository url is sent + """ + local_repo = Repository.create(tmp_path) + remote_url = "https://github.com/owner/repository.git" + local_repo.git("remote", "add", "origin", remote_url) + + tracked_file = local_repo.path / "iac_file_single_vulnerability.tf" + tracked_file.write_text(_IAC_SINGLE_VULNERABILITY) + local_repo.add(tracked_file) + local_repo.create_commit() + + cli_fs_runner.invoke( + cli, + cli_command + + [ + str(local_repo.path), + ], + ) + + scan_mock.assert_called_once_with( + local_repo.path, + ["iac_file_single_vulnerability.tf"], + ANY, + ANY, + ) + assert any( + isinstance(arg, dict) + and arg.get("GGShield-Repository-URL") == "github.com/owner/repository" + for arg in scan_mock.call_args[0] + ) diff --git a/tests/unit/cmd/iac/test_scan_diff.py b/tests/unit/cmd/iac/test_scan_diff.py index 5c5222e8fd..5cd7e9a1f9 100644 --- a/tests/unit/cmd/iac/test_scan_diff.py +++ b/tests/unit/cmd/iac/test_scan_diff.py @@ -191,3 +191,36 @@ def test_iac_scan_diff_ignored_directory( assert_invoke_exited_with(result, ExitCode.USAGE_ERROR) assert "An ignored file or directory cannot be scanned." in result.stdout iac_diff_scan_mock.assert_not_called() + + +@patch("pygitguardian.GGClient.iac_diff_scan") +def test_iac_scan_diff_context_repository( + scan_mock: Mock, + tmp_path: Path, + cli_fs_runner: CliRunner, +) -> None: + """ + GIVEN a repository with a remote url + WHEN executing a scan diff + THEN repository url is sent + """ + local_repo = Repository.create(tmp_path) + remote_url = "https://github.com/owner/repository.git" + local_repo.git("remote", "add", "origin", remote_url) + local_repo.create_commit() + + tracked_file = local_repo.path / "iac_file_single_vulnerability.tf" + tracked_file.write_text(_IAC_SINGLE_VULNERABILITY) + local_repo.add(tracked_file) + + cli_fs_runner.invoke( + cli, + ["iac", "scan", "diff", "--ref", "HEAD", "--staged", str(local_repo.path)], + ) + + scan_mock.assert_called_once() + assert any( + isinstance(arg, dict) + and arg.get("GGShield-Repository-URL") == "github.com/owner/repository" + for arg in scan_mock.call_args[0] + ) diff --git a/tests/unit/cmd/sca/test_diff.py b/tests/unit/cmd/sca/test_diff.py index a045ee64f6..27026a73b7 100644 --- a/tests/unit/cmd/sca/test_diff.py +++ b/tests/unit/cmd/sca/test_diff.py @@ -1,8 +1,13 @@ +from pathlib import Path +from unittest.mock import Mock, patch + import pytest +from click.testing import CliRunner from ggshield.__main__ import cli from ggshield.core.errors import ExitCode from ggshield.utils.os import cd +from tests.repository import Repository from tests.unit.conftest import my_vcr @@ -50,3 +55,42 @@ def test_scan_diff( # THEN we get a vulnerability when a commit contains any assert result.exit_code == exit_code, result assert output_message in result.stdout + + +@patch("pygitguardian.GGClient.scan_diff") +@my_vcr.use_cassette("test_sca_scan_diff_context_repository.yaml") +def test_sca_scan_diff_context_repository( + scan_mock: Mock, tmp_path: Path, cli_fs_runner: CliRunner, pipfile_lock_with_vuln +) -> None: + """ + GIVEN a repository with a remote url + WHEN executing a scan diff + THEN repository url is sent + """ + local_repo = Repository.create(tmp_path) + remote_url = "https://github.com/owner/repository.git" + local_repo.git("remote", "add", "origin", remote_url) + local_repo.create_commit() + + file = local_repo.path / "Pipfile.lock" + file.write_text(pipfile_lock_with_vuln) + local_repo.add(file) + + cli_fs_runner.invoke( + cli, + [ + "sca", + "scan", + "diff", + "--ref", + "HEAD", + "--staged", + str(local_repo.path), + ], + ) + + scan_mock.assert_called_once() + assert ( + scan_mock.call_args[1].get("extra_headers").get("GGShield-Repository-URL") + == "github.com/owner/repository" + ) diff --git a/tests/unit/cmd/sca/test_scan.py b/tests/unit/cmd/sca/test_scan.py index 2dbc563b2e..4b42958b3d 100644 --- a/tests/unit/cmd/sca/test_scan.py +++ b/tests/unit/cmd/sca/test_scan.py @@ -433,3 +433,40 @@ def test_sca_scan_diff_ignored_directory( assert_invoke_exited_with(result, ExitCode.USAGE_ERROR) assert "An ignored file or directory cannot be scanned." in result.stdout compute_sca_files_mock.assert_not_called() + + +@patch("pygitguardian.GGClient.sca_scan_directory") +@my_vcr.use_cassette("test_sca_scan_context_repository.yaml") +def test_sca_scan_context_repository( + scan_mock: Mock, tmp_path: Path, cli_fs_runner: CliRunner, pipfile_lock_with_vuln +) -> None: + """ + GIVEN a repository with a remote url + WHEN executing a scan + THEN repository url is sent + """ + local_repo = Repository.create(tmp_path) + remote_url = "https://github.com/owner/repository.git" + local_repo.git("remote", "add", "origin", remote_url) + + file = local_repo.path / "Pipfile.lock" + file.write_text(pipfile_lock_with_vuln) + local_repo.add(file) + local_repo.create_commit() + + cli_fs_runner.invoke( + cli, + [ + "sca", + "scan", + "all", + str(local_repo.path), + ], + ) + + scan_mock.assert_called_once() + assert any( + isinstance(arg, dict) + and arg.get("GGShield-Repository-URL") == "github.com/owner/repository" + for arg in scan_mock.call_args[0] + ) diff --git a/tests/unit/cmd/scan/test_path.py b/tests/unit/cmd/scan/test_path.py index 8e92eca683..bc7d30dd2e 100644 --- a/tests/unit/cmd/scan/test_path.py +++ b/tests/unit/cmd/scan/test_path.py @@ -1,13 +1,14 @@ import json import os from pathlib import Path -from unittest.mock import patch +from unittest.mock import Mock, patch import pytest from click.testing import CliRunner from ggshield.__main__ import cli from ggshield.core.errors import ExitCode +from tests.repository import Repository from tests.unit.conftest import ( _ONE_LINE_AND_MULTILINE_PATCH, UNCHECKED_SECRET_PATCH, @@ -406,3 +407,43 @@ def test_ignore_detectors( ) in result.output else: assert "No secrets have been found" in result.output + + @patch("pygitguardian.GGClient.multi_content_scan") + @my_vcr.use_cassette("test_scan_context_repository.yaml") + def test_scan_context_repository( + self, + scan_mock: Mock, + tmp_path: Path, + cli_fs_runner: CliRunner, + ) -> None: + """ + GIVEN a repository with a remote url + WHEN executing a scan + THEN repository url is sent + """ + local_repo = Repository.create(tmp_path) + remote_url = "https://github.com/owner/repository.git" + local_repo.git("remote", "add", "origin", remote_url) + + file = local_repo.path / "file_secret" + file.write_text(_ONE_LINE_AND_MULTILINE_PATCH) + local_repo.add(file) + local_repo.create_commit() + + cli_fs_runner.invoke( + cli, + [ + "secret", + "scan", + "path", + "-r", + str(local_repo.path), + ], + ) + + scan_mock.assert_called_once() + assert any( + isinstance(arg, dict) + and arg.get("GGShield-Repository-URL") == "github.com/owner/repository" + for arg in scan_mock.call_args[0] + ) diff --git a/tests/unit/core/scan/test_scan_context.py b/tests/unit/core/scan/test_scan_context.py new file mode 100644 index 0000000000..f29905aa7b --- /dev/null +++ b/tests/unit/core/scan/test_scan_context.py @@ -0,0 +1,52 @@ +from pathlib import Path +from typing import Union + +from ggshield.core.scan.scan_context import ScanContext +from ggshield.core.scan.scan_mode import ScanMode +from tests.repository import Repository + + +class TestScanContextRepositoryURL: + def _assert_repo_url_in_headers( + self, context: ScanContext, expected_url: Union[Path, str] + ): + assert context.get_http_headers().get("GGShield-Repository-URL") == str( + expected_url + ) + + def _assert_no_repo_url_in_headers(self, context: ScanContext): + assert context.get_http_headers().get("GGShield-Repository-URL") is None + + def test_scan_context_no_repo( + self, + tmp_path: Path, + ): + """ + GIVEN a directory which is not a git repo + WHEN passing the local path to the scan context + THEN there is no GGShield-Repository-URL in the headers + """ + context = ScanContext( + scan_mode=ScanMode.PATH, + command_path="ggshield secret scan path", + target_path=tmp_path, + ) + self._assert_no_repo_url_in_headers(context) + + def test_scan_context_repository_url_parsed(self, tmp_path: Path): + """ + GIVEN a repository with a remote (url) + WHEN passing the local path to the scan context + THEN the remote url is found and simplified + """ + local_repo = Repository.create(tmp_path) + remote_url = "https://user:password@github.com:84/owner/repository.git" + expected_url = "github.com/owner/repository" + local_repo.git("remote", "add", "origin", remote_url) + + context = ScanContext( + scan_mode=ScanMode.PATH, + command_path="ggshield secret scan path", + target_path=local_repo.path, + ) + self._assert_repo_url_in_headers(context, expected_url)