Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Send the scanned repository's origin url as an extra header #747

Merged
merged 4 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ggshield/cmd/iac/scan/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def iac_scan_all(
command_path=ctx.command_path,
scan_mode=scan_mode if ci_mode is None else f"{scan_mode}/{ci_mode.value}",
extra_headers={"Ci-Mode": str(ci_mode)} if ci_mode else None,
target_path=directory,
).get_http_headers(),
)

Expand Down
1 change: 1 addition & 0 deletions ggshield/cmd/iac/scan/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def iac_scan_diff(
command_path=ctx.command_path,
scan_mode=scan_mode if ci_mode is None else f"{scan_mode}/{ci_mode.value}",
extra_headers={"Ci-Mode": str(ci_mode)} if ci_mode else None,
target_path=directory,
).get_http_headers(),
)

Expand Down
2 changes: 2 additions & 0 deletions ggshield/cmd/sca/scan/sca_scan_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def sca_scan_all(
ScanContext(
command_path=ctx.command_path,
scan_mode=scan_mode,
target_path=directory,
).get_http_headers(),
)

Expand Down Expand Up @@ -233,6 +234,7 @@ def sca_scan_diff(
extra_headers=ScanContext(
command_path=ctx.command_path,
scan_mode=scan_mode,
target_path=directory,
extra_headers={"Ci-Mode": ci_mode} if ci_mode else None,
).get_http_headers(),
)
Expand Down
2 changes: 2 additions & 0 deletions ggshield/cmd/secret/scan/ci.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from pathlib import Path
from typing import Any

import click
Expand Down Expand Up @@ -39,6 +40,7 @@ def ci_cmd(ctx: click.Context, **kwargs: Any) -> int:
scan_context = ScanContext(
scan_mode=mode_header,
command_path=ctx.command_path,
target_path=Path.cwd(),
extra_headers={"Ci-Mode": ci_mode.name},
)

Expand Down
3 changes: 3 additions & 0 deletions ggshield/cmd/secret/scan/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,13 @@ def path_cmd(
ignore_git=True,
)

target = paths[0] if len(paths) == 1 else Path.cwd()
target_path = target if target.is_dir() else target.parent
with RichSecretScannerUI(len(files), dataset_type="Path") as ui:
scan_context = ScanContext(
scan_mode=ScanMode.PATH,
command_path=ctx.command_path,
target_path=target_path,
)

scanner = SecretScanner(
Expand Down
2 changes: 2 additions & 0 deletions ggshield/cmd/secret/scan/precommit.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from typing import Any, List

import click
Expand Down Expand Up @@ -50,6 +51,7 @@ def precommit_cmd(
scan_context = ScanContext(
scan_mode=ScanMode.PRE_COMMIT,
command_path=ctx.command_path,
target_path=Path.cwd(),
)

commit = Commit(exclusion_regexes=ctx.obj["exclusion_regexes"])
Expand Down
2 changes: 2 additions & 0 deletions ggshield/cmd/secret/scan/prepush.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from pathlib import Path
from typing import Any, List

import click
Expand Down Expand Up @@ -89,6 +90,7 @@ def prepush_cmd(ctx: click.Context, prepush_args: List[str], **kwargs: Any) -> i
scan_context = ScanContext(
scan_mode=ScanMode.PRE_PUSH,
command_path=ctx.command_path,
target_path=Path.cwd(),
)

return_code = scan_commit_range(
Expand Down
2 changes: 2 additions & 0 deletions ggshield/cmd/secret/scan/prereceive.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import re
import sys
from pathlib import Path
from typing import Any, List, Set

import click
Expand Down Expand Up @@ -53,6 +54,7 @@ def _execute_prereceive(
scan_context = ScanContext(
scan_mode=ScanMode.PRE_RECEIVE,
command_path=command_path,
target_path=Path.cwd(),
)

return_code = scan_commit_range(
Expand Down
2 changes: 2 additions & 0 deletions ggshield/cmd/secret/scan/range.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from typing import Any

import click
Expand Down Expand Up @@ -41,6 +42,7 @@ def range_cmd(
scan_context = ScanContext(
scan_mode=ScanMode.COMMIT_RANGE,
command_path=ctx.command_path,
target_path=Path.cwd(),
)

return scan_commit_range(
Expand Down
2 changes: 2 additions & 0 deletions ggshield/cmd/secret/scan/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def repo_cmd(

path = Path(repository)
if path.is_dir():
scan_context.target_path = path
return scan_repo_path(
client=client,
cache=cache,
Expand All @@ -60,6 +61,7 @@ def repo_cmd(
if REGEX_GIT_URL.match(repository):
with tempfile.TemporaryDirectory() as tmpdirname:
git(["clone", repository, tmpdirname])
scan_context.target_path = Path(tmpdirname)
return scan_repo_path(
client=client,
cache=cache,
Expand Down
7 changes: 7 additions & 0 deletions ggshield/core/scan/scan_context.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import platform
import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Optional, Union

from ggshield import __version__
from ggshield.utils.git_shell import get_repository_url_from_path
from ggshield.utils.os import get_os_info

from .scan_mode import ScanMode
Expand All @@ -14,6 +16,7 @@ class ScanContext:
scan_mode: Union[ScanMode, str]
command_path: str
extra_headers: Optional[Dict[str, str]] = None
target_path: Optional[Path] = None

def __post_init__(self) -> None:
self.command_id = str(uuid.uuid4())
Expand All @@ -34,6 +37,10 @@ def get_http_headers(self) -> Dict[str, str]:
"OS-Version": self.os_version,
"Python-Version": self.python_version,
}
if self.target_path is not None:
repo_url = get_repository_url_from_path(self.target_path)
if repo_url is not None:
headers["Repository-URL"] = repo_url
agateau-gg marked this conversation as resolved.
Show resolved Hide resolved
if self.extra_headers:
headers = {**headers, **self.extra_headers}

Expand Down
44 changes: 44 additions & 0 deletions ggshield/utils/git_shell.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import re
import subprocess
from enum import Enum
from functools import lru_cache
Expand Down Expand Up @@ -95,6 +96,27 @@ def _git_rev_parse(option: str, wd: Path) -> Optional[str]:
return _git_rev_parse_absolute(option=option, wd_absolute=wd.resolve())


def simplify_git_url(url: str) -> str:
"""
Removes elements from the git remote url.
- scheme
- credentials
- port
- extension
https://user:pass@mygitlab.corp.com:84/path/to/repo.git -> mygitlab.corp.com/toto/titi/tata
"""
for (pattern, replace) in (
(r"https?://", ""), # Scheme
(r".+@", ""), # Credentials
(r":\d*/", "/"), # Port
(r"\.git$", ""), # Github/Gitlab/BitBucket extension (**.git)
(r"/_git/", "/"), # Azure Devops extension (**/_git/**)
(":", "/"), # Normalize ssh url to https format
):
url = re.sub(pattern, replace, url)
return url


def is_git_dir(wd: Union[str, Path]) -> bool:
return _git_rev_parse("--git-dir", Path(wd)) is not None

Expand Down Expand Up @@ -265,6 +287,28 @@ def get_last_commit_sha_of_branch(branch_name: str) -> Optional[str]:
return last_target_commit[0]


def get_repository_url_from_path(wd: Path) -> Optional[str]:
"""
Returns one of the repository remote urls. Returns None if no remote are found,
or the directory is not a repository.
"""
remotes_raw: List[str] = []
try:
if not is_git_dir(wd):
return None
remotes_raw = git(["remote", "-v"], cwd=wd).splitlines()
except (subprocess.CalledProcessError, OSError):
return None
Paul-GitGuardian marked this conversation as resolved.
Show resolved Hide resolved

url: Optional[str] = None
for line in remotes_raw:
if match := re.search(r"^(.*)\t(.*) \(fetch\)$", line):
name, url = match.groups()
if name == "origin":
break
return simplify_git_url(url) if url else None


def get_filepaths_from_ref(
ref: str, wd: Optional[Union[str, Path]] = None
) -> List[Path]:
Expand Down
64 changes: 64 additions & 0 deletions tests/unit/cassettes/test_sca_scan_context_repository.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
interactions:
- request:
body: '{"files": ["Pipfile.lock"]}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '27'
Content-Type:
- application/json
User-Agent:
- pygitguardian/1.10.0 (Linux;py3.10.4) ggshield
method: POST
uri: https://api.gitguardian.com/v1/sca/compute_sca_files/
response:
body:
string: '{"sca_files":["Pipfile.lock"],"potential_siblings":[]}'
headers:
access-control-expose-headers:
- X-App-Version
allow:
- POST, OPTIONS
content-length:
- '54'
content-type:
- application/json
cross-origin-opener-policy:
- same-origin
date:
- Wed, 25 Oct 2023 13:35:42 GMT
referrer-policy:
- strict-origin-when-cross-origin
server:
- istio-envoy
strict-transport-security:
- max-age=31536000; includeSubDomains
vary:
- Cookie
x-app-version:
- v2.41.5
x-content-type-options:
- nosniff
- nosniff
x-envoy-upstream-service-time:
- '13'
x-frame-options:
- DENY
- SAMEORIGIN
x-sca-engine-version:
- 1.18.1
x-sca-last-vuln-fetch:
- '2023-10-17T16:26:51.058422+00:00'
x-secrets-engine-version:
- 2.98.0
x-xss-protection:
- 1; mode=block
status:
code: 200
message: OK
version: 1
64 changes: 64 additions & 0 deletions tests/unit/cassettes/test_sca_scan_diff_context_repository.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
interactions:
- request:
body: '{"files": ["Pipfile.lock"]}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '27'
Content-Type:
- application/json
User-Agent:
- pygitguardian/1.10.0 (Linux;py3.10.4) ggshield
method: POST
uri: https://api.gitguardian.com/v1/sca/compute_sca_files/
response:
body:
string: '{"sca_files":["Pipfile.lock"],"potential_siblings":[]}'
headers:
access-control-expose-headers:
- X-App-Version
allow:
- POST, OPTIONS
content-length:
- '54'
content-type:
- application/json
cross-origin-opener-policy:
- same-origin
date:
- Wed, 25 Oct 2023 13:35:16 GMT
referrer-policy:
- strict-origin-when-cross-origin
server:
- istio-envoy
strict-transport-security:
- max-age=31536000; includeSubDomains
vary:
- Cookie
x-app-version:
- v2.41.5
x-content-type-options:
- nosniff
- nosniff
x-envoy-upstream-service-time:
- '15'
x-frame-options:
- DENY
- SAMEORIGIN
x-sca-engine-version:
- 1.18.1
x-sca-last-vuln-fetch:
- '2023-10-17T16:26:51.058422+00:00'
x-secrets-engine-version:
- 2.98.0
x-xss-protection:
- 1; mode=block
status:
code: 200
message: OK
version: 1
Loading
Loading