Skip to content

Commit

Permalink
fix: replace 0 bytes instead of raising an exception
Browse files Browse the repository at this point in the history
This factorize handling of 0 bytes instead of leaving it up to all
callers.
  • Loading branch information
agateau-gg committed Jun 27, 2022
1 parent 59f2926 commit ec02027
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 13 deletions.
10 changes: 8 additions & 2 deletions pygitguardian/client.py
Expand Up @@ -236,7 +236,10 @@ def content_scan(
extra_headers: Optional[Dict[str, str]] = None,
) -> Union[Detail, ScanResult]:
"""
content_scan handles the /scan endpoint of the API
content_scan handles the /scan endpoint of the API.
If document contains `0` bytes, they will be replaced with the Unicode
replacement character.
:param filename: name of file, example: "intro.py"
:param document: content of file
Expand Down Expand Up @@ -272,7 +275,10 @@ def multi_content_scan(
extra_headers: Optional[Dict[str, str]] = None,
) -> Union[Detail, MultiScanResult]:
"""
multi_content_scan handles the /multiscan endpoint of the API
multi_content_scan handles the /multiscan endpoint of the API.
If documents contain `0` bytes, they will be replaced with the Unicode
replacement character.
:param documents: List of dictionaries containing the keys document
and, optionally, filename.
Expand Down
12 changes: 7 additions & 5 deletions pygitguardian/models.py
Expand Up @@ -52,7 +52,7 @@ class DocumentSchema(BaseSchema):
document = fields.String(required=True)

@validates("document")
def validate_document(self, document: str) -> str:
def validate_document(self, document: str) -> None:
"""
validate that document is smaller than scan limit
"""
Expand All @@ -64,10 +64,12 @@ def validate_document(self, document: str) -> str:
)
)

if "\x00" in document:
raise ValidationError("document has null characters")

return document
@post_load
def replace_0_bytes(self, in_data: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]:
doc = in_data["document"]
# Our API does not accept 0 bytes in documents, so replace them with the replacement character
in_data["document"] = doc.replace("\0", "\uFFFD")
return in_data


class Document(Base):
Expand Down
65 changes: 65 additions & 0 deletions tests/cassettes/document_with_0_bytes.yaml
@@ -0,0 +1,65 @@
interactions:
- request:
body: '{"document": "Hello World"}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '27'
Content-Type:
- application/json
User-Agent:
- pygitguardian/1.3.4 (Linux;py3.8.10)
method: POST
uri: https://api.gitguardian.com/v1/scan
response:
body:
string:
'{"policy_break_count":0,"policies":["File extensions","Filenames","Secrets
detection"],"policy_breaks":[]}'
headers:
Access-Control-Expose-Headers:
- X-App-Version
Allow:
- POST, OPTIONS
Connection:
- keep-alive
Content-Length:
- '106'
Content-Type:
- application/json
Date:
- Fri, 24 Jun 2022 16:08:40 GMT
Referrer-Policy:
- strict-origin-when-cross-origin
Server:
- nginx
Set-Cookie:
- AWSALB=jzG+lNYQFwVa/HLEk17W6yiGRSKg6NTA2/1+uOmn+n5jG7J03MudYdFdbtJdN7+y9jwsoul66j7dHclQD7B8ZRa4FWTZJO3AeCHhfcZQxhwEb5uko4OvEhi9jD2o;
Expires=Fri, 01 Jul 2022 16:08:40 GMT; Path=/
- AWSALBCORS=jzG+lNYQFwVa/HLEk17W6yiGRSKg6NTA2/1+uOmn+n5jG7J03MudYdFdbtJdN7+y9jwsoul66j7dHclQD7B8ZRa4FWTZJO3AeCHhfcZQxhwEb5uko4OvEhi9jD2o;
Expires=Fri, 01 Jul 2022 16:08:40 GMT; Path=/; SameSite=None; Secure
Strict-Transport-Security:
- max-age=31536000; includeSubDomains
Vary:
- Cookie
X-App-Version:
- v2.7.5
X-Content-Type-Options:
- nosniff
- nosniff
X-Frame-Options:
- DENY
- SAMEORIGIN
X-Secrets-Engine-Version:
- 2.69.0
X-XSS-Protection:
- 1; mode=block
status:
code: 200
message: OK
version: 1
14 changes: 8 additions & 6 deletions tests/test_client.py
Expand Up @@ -362,12 +362,6 @@ def test_multi_content_scan(
r"file exceeds the maximum allowed size",
id="too large file",
),
pytest.param(
"dwhewe\x00ddw",
ValidationError,
r"document has null characters",
id="invalid type",
),
],
)
def test_content_scan_exceptions(
Expand Down Expand Up @@ -437,6 +431,14 @@ def test_content_not_ok():
True,
id="secret with validity",
),
pytest.param(
"document_with_0_bytes",
{"document": "Hello\0World"},
0,
False,
False,
id="Document containing a 0 byte",
),
pytest.param(
"filename",
{"filename": FILENAME, "document": "normal"},
Expand Down
6 changes: 6 additions & 0 deletions tests/test_models.py
Expand Up @@ -32,6 +32,12 @@ def test_document_model(self):
assert isinstance(document.to_dict(), dict)
assert isinstance(str(document), str)

def test_document_handle_0_bytes(self):
document = Document.SCHEMA.load(
{"filename": "name", "document": "hello\0world"}
)
assert document["document"] == "hello\uFFFDworld"

@pytest.mark.parametrize(
"schema_klass, expected_klass, instance_data",
[
Expand Down

0 comments on commit ec02027

Please sign in to comment.