From d4d8097455aa4e4e705f06f21419b461d0315be5 Mon Sep 17 00:00:00 2001 From: Aurelien Gateau Date: Fri, 24 Jun 2022 18:48:20 +0200 Subject: [PATCH] fix: replace 0 bytes instead of raising an exception --- pygitguardian/client.py | 8 ++- pygitguardian/models.py | 11 ++-- tests/cassettes/document_with_0_bytes.yaml | 65 ++++++++++++++++++++++ tests/test_client.py | 14 +++-- 4 files changed, 85 insertions(+), 13 deletions(-) create mode 100644 tests/cassettes/document_with_0_bytes.yaml diff --git a/pygitguardian/client.py b/pygitguardian/client.py index 1b2d5817..4f76eacf 100644 --- a/pygitguardian/client.py +++ b/pygitguardian/client.py @@ -236,7 +236,9 @@ def content_scan( extra_headers: Optional[Dict[str, str]] = None, ) -> Union[Detail, ScanResult]: """ - content_scan handles the /scan endpoint of the API + content_scan handles the /scan endpoint of the API. + + If document contains `0` bytes, they will be replaced with a space. :param filename: name of file, example: "intro.py" :param document: content of file @@ -272,7 +274,9 @@ def multi_content_scan( extra_headers: Optional[Dict[str, str]] = None, ) -> Union[Detail, MultiScanResult]: """ - multi_content_scan handles the /multiscan endpoint of the API + multi_content_scan handles the /multiscan endpoint of the API. + + If documents contain `0` bytes, they will be replaced with a space. :param documents: List of dictionaries containing the keys document and, optionally, filename. diff --git a/pygitguardian/models.py b/pygitguardian/models.py index 3d47edc3..da2aa6e1 100644 --- a/pygitguardian/models.py +++ b/pygitguardian/models.py @@ -52,7 +52,7 @@ class DocumentSchema(BaseSchema): document = fields.String(required=True) @validates("document") - def validate_document(self, document: str) -> str: + def validate_document(self, document: str) -> None: """ validate that document is smaller than scan limit """ @@ -64,10 +64,11 @@ def validate_document(self, document: str) -> str: ) ) - if "\x00" in document: - raise ValidationError("document has null characters") - - return document + @post_load + def replace_zero(self, in_data: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]: + # Our API does not accept 0 bytes in documents, so replace them with a space + in_data["document"] = in_data["document"].replace("\0", " ") + return in_data class Document(Base): diff --git a/tests/cassettes/document_with_0_bytes.yaml b/tests/cassettes/document_with_0_bytes.yaml new file mode 100644 index 00000000..51e0e24c --- /dev/null +++ b/tests/cassettes/document_with_0_bytes.yaml @@ -0,0 +1,65 @@ +interactions: + - request: + body: '{"document": "Hello World"}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '27' + Content-Type: + - application/json + User-Agent: + - pygitguardian/1.3.4 (Linux;py3.8.10) + method: POST + uri: https://api.gitguardian.com/v1/scan + response: + body: + string: + '{"policy_break_count":0,"policies":["File extensions","Filenames","Secrets + detection"],"policy_breaks":[]}' + headers: + Access-Control-Expose-Headers: + - X-App-Version + Allow: + - POST, OPTIONS + Connection: + - keep-alive + Content-Length: + - '106' + Content-Type: + - application/json + Date: + - Fri, 24 Jun 2022 16:08:40 GMT + Referrer-Policy: + - strict-origin-when-cross-origin + Server: + - nginx + Set-Cookie: + - AWSALB=jzG+lNYQFwVa/HLEk17W6yiGRSKg6NTA2/1+uOmn+n5jG7J03MudYdFdbtJdN7+y9jwsoul66j7dHclQD7B8ZRa4FWTZJO3AeCHhfcZQxhwEb5uko4OvEhi9jD2o; + Expires=Fri, 01 Jul 2022 16:08:40 GMT; Path=/ + - AWSALBCORS=jzG+lNYQFwVa/HLEk17W6yiGRSKg6NTA2/1+uOmn+n5jG7J03MudYdFdbtJdN7+y9jwsoul66j7dHclQD7B8ZRa4FWTZJO3AeCHhfcZQxhwEb5uko4OvEhi9jD2o; + Expires=Fri, 01 Jul 2022 16:08:40 GMT; Path=/; SameSite=None; Secure + Strict-Transport-Security: + - max-age=31536000; includeSubDomains + Vary: + - Cookie + X-App-Version: + - v2.7.5 + X-Content-Type-Options: + - nosniff + - nosniff + X-Frame-Options: + - DENY + - SAMEORIGIN + X-Secrets-Engine-Version: + - 2.69.0 + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_client.py b/tests/test_client.py index 1348d7db..288ca3f2 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -362,12 +362,6 @@ def test_multi_content_scan( r"file exceeds the maximum allowed size", id="too large file", ), - pytest.param( - "dwhewe\x00ddw", - ValidationError, - r"document has null characters", - id="invalid type", - ), ], ) def test_content_scan_exceptions( @@ -437,6 +431,14 @@ def test_content_not_ok(): True, id="secret with validity", ), + pytest.param( + "document_with_0_bytes", + {"document": "Hello\0World"}, + 0, + False, + False, + id="Document containing a 0 byte", + ), pytest.param( "filename", {"filename": FILENAME, "document": "normal"},