From b912ccb79a544e3101fc5f52d47e5d940419c9f6 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Wed, 16 Nov 2022 16:32:26 +0200 Subject: [PATCH] YAML and JSON are scanned like structures (#236) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Squashed commit of the following: commit 7b2feeaf86d16497f4202bee91bb2e30592bb153 Author: Roman Babenko Date: Wed Nov 16 01:23:13 2022 +0200 Squashed commit of the following: commit 02768be368eae25b5a2cf13fc38cefcf6b629f86 Merge: 6faeb2c d7e44c7 Author: Roman Babenko Date: Wed Nov 16 00:38:37 2022 +0200 Merge branch 'main' into structures commit d7e44c76e10a71f92046cb08eeb1aa5433adec00 Author: Roman Babenko Date: Tue Nov 15 23:32:10 2022 +0200 Version up to 1.4.5 (#243) * Version up to 1.4.5 and template fix * Update whatthepath requirement version to avoid huge diffs check. * Added test for huge patches parsing commit 6faeb2cc90c4f2c20c45de030b3bed3298b4f570 Author: Roman Babenko Date: Mon Nov 14 09:16:09 2022 +0200 Removed extra debug comments commit 722c068124324112bb26a5ce7a017f27bf8c8f0c Merge: 1555e12 d2e07b8 Author: Roman Babenko Date: Wed Nov 9 01:47:10 2022 +0200 Merge remote-tracking branch 'upstream/main' into structures commit 1555e127c7fb70d55115f0ee8dd911ba3765f5b1 Author: Roman Babenko Date: Wed Nov 9 01:47:03 2022 +0200 rename methods commit 0e36719319a2a9a14bbdc40f1b801eadf7c4f5be Merge: ab563ba 4d59bda Author: Roman Babenko Date: Tue Nov 8 01:19:11 2022 +0200 Merge remote-tracking branch 'upstream/main' into structures commit ab563baf95bb448c57f99143d1dbc6107f4c1a61 Merge: 37287ae 97bdaa0 Author: Roman Babenko Date: Mon Nov 7 09:23:10 2022 +0200 Merge branch 'auxiliary' into structures commit 97bdaa0661ce9c9b46e77c3b9ccbcd23d2825f73 Merge: 6989e46 27b7110 Author: Roman Babenko Date: Mon Nov 7 09:20:05 2022 +0200 Merge branch 'main' into auxiliary commit 6989e46b9d6006316a9ba33f9658fae7d72da38e Author: Roman Babenko Date: Sat Nov 5 10:38:19 2022 +0200 rename methods commit 99f340815986b683bb9e258ab9d748656f1df394 Author: Roman Babenko Date: Sat Nov 5 10:32:13 2022 +0200 make the method private again commit 2108748423f5792dd3e5f273c1fce9dc33705e4e Author: Roman Babenko Date: Wed Nov 2 10:21:50 2022 +0200 fix commit 4abdb33246d10243657553a2d3fee545464e5c98 Author: Roman Babenko Date: Wed Nov 2 10:20:46 2022 +0200 Line umeration commit da431c145d73b6f065d7474094b3c255e6caaa60 Author: Roman Babenko Date: Wed Nov 2 09:50:44 2022 +0200 Use common code for reduce duplicate code commit d3861df60cb22a9a0d7ad2145c692cc644d4e81f Author: Roman Babenko Date: Wed Nov 2 09:31:23 2022 +0200 Apply test assertion commit f268401315b2da24fc514fc87ee2898532749f7e Author: Roman Babenko Date: Wed Nov 2 09:30:36 2022 +0200 Update credsweeper/app.py Co-authored-by: ShinHyung Choi commit a956018e1dc12e3422021c7547b2accb36d7005e Author: Roman Babenko Date: Wed Nov 2 09:30:25 2022 +0200 Update credsweeper/file_handler/data_content_provider.py Co-authored-by: ShinHyung Choi commit 37287ae0312552af97dcab53fbfa722bb667f791 Author: Roman Babenko Date: Thu Oct 27 10:59:27 2022 +0300 restored 75% fuzz coverage commit 343ba589db3174dbf17c33cf662eb13efb4f3151 Author: Roman Babenko Date: Thu Oct 27 10:58:09 2022 +0300 minimized commit 6b9a5d5a658c2a8688df123895700f1d7a7260ed Author: Roman Babenko Date: Thu Oct 27 10:30:13 2022 +0300 reduced commit 7a8c2fd0c128e2f26dfc6aeec566a3d647573479 Author: Roman Babenko Date: Thu Oct 27 08:03:52 2022 +0300 четвер, 27 жовтня 2022 08:03:52 +0300 commit 307762f8ecc4fbeebf02f5e8f88239173b33655d Author: Roman Babenko Date: Wed Oct 26 12:45:54 2022 +0300 flake8fix commit 7c68080a06a3cc4e70b53ce2d03bc089c84e8d96 Author: Roman Babenko Date: Wed Oct 26 12:04:06 2022 +0300 Any struct commit 1e2c317f58e7d096b93b2b0d91a20fdbbfe040cd Author: Roman Babenko Date: Wed Oct 26 11:30:44 2022 +0300 Fix MyPY. Reduce fuzz coverage commit 74fa40e10e043d98a04cbd00edeb1720599faf3b Author: Roman Babenko Date: Wed Oct 26 11:18:53 2022 +0300 fix commit e816a6827e157d62e819be5085dc5889cdb33c24 Author: Roman Babenko Date: Wed Oct 26 10:53:29 2022 +0300 fix tests credentials commit 9987e12ad7f34ac0160dc3a719929e049177f5af Author: Roman Babenko Date: Wed Oct 26 10:40:17 2022 +0300 Structure scan JSON and YAML commit 2ddfee320aa90b4421978be9c3fc09a4bbeb0a1f Author: Roman Babenko Date: Wed Oct 26 09:22:08 2022 +0300 scan struct commit 2024255d0e37813015c6cc3d56d7b5145ab594db Author: Roman Babenko Date: Wed Oct 26 08:32:21 2022 +0300 Commited forgotten sample commit c894f3180f4446cc97de0f48efb6ff054856f6dd Author: Roman Babenko Date: Tue Oct 25 23:58:12 2022 +0300 refuzzed commit f17cbbde8096c0f9d3e630ab1a1ec8cfd3606bc7 Author: Roman Babenko Date: Tue Oct 25 18:35:55 2022 +0300 fix commit 4e77dc971f404e2a7fa73fded013750873cf06fa Author: Roman Babenko Date: Tue Oct 25 18:25:15 2022 +0300 Improved encode test research commit 6594b60e12eae3cd6b53dfb06c9e4b9ad0f70b46 Author: Roman Babenko Date: Tue Oct 25 17:56:11 2022 +0300 Encoded test commit 5e2775dd12a6d1c2c2fb454ffe5e34b3d138876f Author: Roman Babenko Date: Tue Oct 25 17:52:40 2022 +0300 Separated test for docx commit 26d727b2423f397b948cbdae606299f2c526a755 Author: Roman Babenko Date: Tue Oct 25 17:44:43 2022 +0300 fix commit 4ea1137cdc89807ebf57048ba3074682741f01e3 Author: Roman Babenko Date: Tue Oct 25 14:58:27 2022 +0300 apply file type when not None commit cc2f6d7cb9ae3255826ec77d9fa88b2245bfce09 Author: Roman Babenko Date: Tue Oct 25 13:40:40 2022 +0300 Encoded data might be decoded * Update credsweeper/file_handler/struct_content_provider.py Co-authored-by: Kostiantyn Melnik --- .github/workflows/fuzz.yml | 5 +- cicd/mypy_warnings.txt | 2 +- credsweeper/app.py | 94 +++++++++++++++++- .../file_handler/data_content_provider.py | 40 ++++++++ .../file_handler/struct_content_provider.py | 45 +++++++++ .../096ec2ed3a11a2c4422fe445f86fc03963adf350 | Bin 492 -> 0 bytes .../0d36f4956137486243c60fadb4248f5b2f913d4c | Bin 0 -> 269 bytes .../19521a7555bd197646dd224e3890064c2f4cf9bf | 1 + .../1df555ea6ab8f834626d3002c2d3eaf7746450ae | 1 - .../2023cf6be65f362b3892de9f6f1f8b7eec51d3ef | Bin 167 -> 0 bytes .../248b61d9c284b005868a4c0a80854281f99a7bdb | 1 + .../25d7d0a438a8029f3d3a3d15163310a094573797 | Bin 0 -> 392 bytes .../276d98250ec4b5946949b9ac49c4c8649c92b422 | Bin 0 -> 1538 bytes .../2bc2e50780867fcce228c61c332e93862a7b8716 | Bin 323 -> 0 bytes .../2e0ed4d19120c0095af1391325e73d763e97268d | 1 + .../4c3dbea65792c1641b2c985d52aa0d2631a2c9c7 | Bin 0 -> 321 bytes .../4f48be8a6966a19677115dfe5119ac7aa2ae667e | Bin 1564 -> 0 bytes .../50127775b46b8b432f85928ab5d21cc9a5b9916a | 5 - ... 610152b2a025155f15cec304e817b6de274301ed} | Bin 1596 -> 1537 bytes .../67ddf5d2d4ee8b1862715ef44aeecd9f426dce8c | Bin 0 -> 45 bytes .../76dadf69d4d7273035b509c4fb172df1a1e3aa42 | 2 + .../77a11b466eeb44409a77ffbd135e944a51a0c967 | Bin 0 -> 1537 bytes .../7b328d9887fccefef2d299580e2d8d4b9ba3d901 | Bin 20 -> 0 bytes ... 7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd} | Bin 1546 -> 1546 bytes .../826bcabe227bcaf79bf5238c84be3fcf93827222 | Bin 1564 -> 0 bytes .../898ebe807cbb9dac8f0223f66cd82b736d79f2bc | Bin 0 -> 1557 bytes .../8e97bf37cf7ab00ab5b748b108e331a60394d956 | 1 + .../8eae777ab67ed2d99960854e6cd0c5a737ad4208 | Bin 0 -> 90 bytes .../90ad557dfb83213e837631d70ed888c7cd8bd2e0 | 1 + ... 91872b929eee06159ae9f2704dd7ce10c46613d0} | Bin 381 -> 320 bytes .../9252fd7a390bc4e1eaf6afd6eab7abedda07953e | Bin 0 -> 763 bytes ... 97fbb25a4fc896e423c47f54e3b1b8b4c79560e6} | Bin 1659 -> 1659 bytes .../9e17092d3faa95169b6eb6da9ac36d4650175281 | 4 + .../9e5b0e206c5e57b929e482e6ae5abf8b036367ef | 2 + ... a3bbfcce88dcd5e14053bcb5b772e0df581c4baf} | Bin 1781 -> 1781 bytes ... b4f57c1be7888ff72e68d0858ba92f9c68066b25} | Bin 1615 -> 1615 bytes .../bea99aa4703268b72289e7b88738c31160417b14 | Bin 0 -> 1821 bytes .../c35f59c3f064a5c96da46a4f88887d41a85dd669 | Bin 0 -> 161 bytes .../c3dcde8d12dcc6579f2c38bbd54e28a0200babce | Bin 0 -> 1788 bytes ... c7b7936a53fa182a83740574917b635b8647a4fa} | Bin 1666 -> 1612 bytes .../ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d | Bin 22 -> 0 bytes .../cd4f0538db030c9794809b18f32321b125d67b28 | 1 - ... cd625ab558a1b28ad90d1083d64b2096a52f401d} | Bin 1604 -> 1604 bytes ... d2ec3276043232db9afb98075728e963a5b8e369} | Bin 1547 -> 1547 bytes ... dfe1dd788a4b4cad55dd9803c46d0aa272d665d7} | Bin 1549 -> 1606 bytes .../e10c04a62961bd591639c79e484993930e205132 | Bin 1615 -> 0 bytes .../e245e2f884af2d01bea253673200a29f51363fa2 | Bin 628 -> 0 bytes .../e5a8d3e8ac6b758fde8c6315baf28e825d029ea8 | Bin 169 -> 0 bytes .../e7f52f8df718abe22a791204ce57b71c6f2638e5 | Bin 1567 -> 0 bytes .../e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0 | Bin 1549 -> 0 bytes .../f2cb21a8411c6b0dfb2ce1fce303b61126dea477 | Bin 0 -> 1549 bytes ... f7cf03173db4f86b52c8d5bd6369387535d4bb60} | Bin 1540 -> 1540 bytes .../fd8463c8426b79d944d2ff54554982c98617c3c8 | Bin 981 -> 0 bytes tests/__init__.py | 6 +- tests/samples/binary.yaml | 17 ++++ tests/samples/struct.json | 1 + tests/test_main.py | 51 +++++++++- 57 files changed, 263 insertions(+), 18 deletions(-) create mode 100644 credsweeper/file_handler/struct_content_provider.py delete mode 100644 fuzz/corpus/096ec2ed3a11a2c4422fe445f86fc03963adf350 create mode 100644 fuzz/corpus/0d36f4956137486243c60fadb4248f5b2f913d4c create mode 100644 fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf delete mode 100644 fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae delete mode 100644 fuzz/corpus/2023cf6be65f362b3892de9f6f1f8b7eec51d3ef create mode 100644 fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb create mode 100644 fuzz/corpus/25d7d0a438a8029f3d3a3d15163310a094573797 create mode 100644 fuzz/corpus/276d98250ec4b5946949b9ac49c4c8649c92b422 delete mode 100644 fuzz/corpus/2bc2e50780867fcce228c61c332e93862a7b8716 create mode 100644 fuzz/corpus/2e0ed4d19120c0095af1391325e73d763e97268d create mode 100644 fuzz/corpus/4c3dbea65792c1641b2c985d52aa0d2631a2c9c7 delete mode 100644 fuzz/corpus/4f48be8a6966a19677115dfe5119ac7aa2ae667e delete mode 100644 fuzz/corpus/50127775b46b8b432f85928ab5d21cc9a5b9916a rename fuzz/corpus/{6b78a49f2f81c15b54c1bc271903f8c721e7667e => 610152b2a025155f15cec304e817b6de274301ed} (65%) create mode 100644 fuzz/corpus/67ddf5d2d4ee8b1862715ef44aeecd9f426dce8c create mode 100644 fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42 create mode 100644 fuzz/corpus/77a11b466eeb44409a77ffbd135e944a51a0c967 delete mode 100644 fuzz/corpus/7b328d9887fccefef2d299580e2d8d4b9ba3d901 rename fuzz/corpus/{c7832116c548a936d02dba5ba9ea32ca2a61855b => 7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd} (57%) delete mode 100644 fuzz/corpus/826bcabe227bcaf79bf5238c84be3fcf93827222 create mode 100644 fuzz/corpus/898ebe807cbb9dac8f0223f66cd82b736d79f2bc create mode 100644 fuzz/corpus/8e97bf37cf7ab00ab5b748b108e331a60394d956 create mode 100644 fuzz/corpus/8eae777ab67ed2d99960854e6cd0c5a737ad4208 create mode 100644 fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0 rename fuzz/corpus/{05d0262facb829f463770b7b5bebfa0ae2b91cd1 => 91872b929eee06159ae9f2704dd7ce10c46613d0} (56%) create mode 100644 fuzz/corpus/9252fd7a390bc4e1eaf6afd6eab7abedda07953e rename fuzz/corpus/{b272cbeed5b4fd5774ab0c108e3d05bc67ae734f => 97fbb25a4fc896e423c47f54e3b1b8b4c79560e6} (91%) create mode 100644 fuzz/corpus/9e17092d3faa95169b6eb6da9ac36d4650175281 create mode 100644 fuzz/corpus/9e5b0e206c5e57b929e482e6ae5abf8b036367ef rename fuzz/corpus/{6b8737b1e00c72a699151913c883a2a5df41dd76 => a3bbfcce88dcd5e14053bcb5b772e0df581c4baf} (70%) rename fuzz/corpus/{b9c8aa807a927cee3b6f4ec1475679385db74f9a => b4f57c1be7888ff72e68d0858ba92f9c68066b25} (84%) create mode 100644 fuzz/corpus/bea99aa4703268b72289e7b88738c31160417b14 create mode 100644 fuzz/corpus/c35f59c3f064a5c96da46a4f88887d41a85dd669 create mode 100644 fuzz/corpus/c3dcde8d12dcc6579f2c38bbd54e28a0200babce rename fuzz/corpus/{e1182b80ce7fddf42850af97edf022a5fd61e421 => c7b7936a53fa182a83740574917b635b8647a4fa} (78%) delete mode 100644 fuzz/corpus/ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d delete mode 100644 fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28 rename fuzz/corpus/{c5f4facd0a5b1a657433ca68154e33a7e48bb663 => cd625ab558a1b28ad90d1083d64b2096a52f401d} (92%) rename fuzz/corpus/{eeaaa652952fef8117a481d1d7d2438fc815f646 => d2ec3276043232db9afb98075728e963a5b8e369} (65%) rename fuzz/corpus/{1188838daba5b306b8c7bf4142b0e33e19567be1 => dfe1dd788a4b4cad55dd9803c46d0aa272d665d7} (69%) delete mode 100644 fuzz/corpus/e10c04a62961bd591639c79e484993930e205132 delete mode 100644 fuzz/corpus/e245e2f884af2d01bea253673200a29f51363fa2 delete mode 100644 fuzz/corpus/e5a8d3e8ac6b758fde8c6315baf28e825d029ea8 delete mode 100644 fuzz/corpus/e7f52f8df718abe22a791204ce57b71c6f2638e5 delete mode 100644 fuzz/corpus/e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0 create mode 100644 fuzz/corpus/f2cb21a8411c6b0dfb2ce1fce303b61126dea477 rename fuzz/corpus/{eaacd823db36dbd3ca4241c26eaf311de7528af4 => f7cf03173db4f86b52c8d5bd6369387535d4bb60} (93%) delete mode 100644 fuzz/corpus/fd8463c8426b79d944d2ff54554982c98617c3c8 create mode 100644 tests/samples/binary.yaml create mode 100644 tests/samples/struct.json diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 48affa03f..e5961c2d0 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -78,8 +78,9 @@ jobs: run: | COVERAGE=$(tail -1 report.txt | awk '{print $6}' | tr --delete '%') # additionally check correctness of the value - should be an integer - if ! [ 75 -le ${COVERAGE} ]; then - echo "Fuzzing coverage '${COVERAGE}' does not satisfy the limit 74%" + FUZZ_COVERAGE_LIMIT=75 + if ! [ ${FUZZ_COVERAGE_LIMIT} -le ${COVERAGE} ]; then + echo "Fuzzing coverage '${COVERAGE}' does not satisfy the limit ${FUZZ_COVERAGE_LIMIT}%" exit 1 fi diff --git a/cicd/mypy_warnings.txt b/cicd/mypy_warnings.txt index 503915c85..ce61fd05b 100644 --- a/cicd/mypy_warnings.txt +++ b/cicd/mypy_warnings.txt @@ -1 +1 @@ -Success: no issues found in 83 source files +Success: no issues found in 84 source files diff --git a/credsweeper/app.py b/credsweeper/app.py index e0699044c..58fe45440 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -7,11 +7,12 @@ import signal import sys import zipfile -from typing import List, Optional, Union +from typing import List, Optional, Union, Tuple, Any import pandas as pd -from credsweeper.common.constants import KeyValidationOption, ThresholdPreset, RECURSIVE_SCAN_LIMITATION +from credsweeper.common.constants import KeyValidationOption, ThresholdPreset, RECURSIVE_SCAN_LIMITATION, \ + DEFAULT_ENCODING from credsweeper.config import Config from credsweeper.credentials import Candidate, CredentialManager from credsweeper.file_handler.byte_content_provider import ByteContentProvider @@ -21,6 +22,7 @@ from credsweeper.file_handler.file_path_extractor import FilePathExtractor from credsweeper.file_handler.files_provider import FilesProvider from credsweeper.file_handler.string_content_provider import StringContentProvider +from credsweeper.file_handler.struct_content_provider import StructContentProvider from credsweeper.file_handler.text_content_provider import TextContentProvider from credsweeper.scanner import Scanner from credsweeper.utils import Util @@ -356,13 +358,20 @@ def data_scan(self, data_provider: DataContentProvider, depth: int, recursive_li new_limit = recursive_limit_size - len(decoded_data_provider.data) candidates.extend(self.data_scan(decoded_data_provider, depth, new_limit)) + elif data_provider.represent_as_structure(): + struct_data_provider = StructContentProvider(struct=data_provider.structure, + file_path=data_provider.file_path, + file_type=data_provider.file_type, + info=f"{data_provider.info}|STRUCT") + candidates.extend(self.struct_scan(struct_data_provider, depth, recursive_limit_size)) + elif data_provider.represent_as_xml(): - struct_data_provider = StringContentProvider(lines=data_provider.lines, + string_data_provider = StringContentProvider(lines=data_provider.lines, line_numbers=data_provider.line_numbers, file_path=data_provider.file_path, file_type=".xml", info=f"{data_provider.info}|XML") - candidates.extend(self.file_scan(struct_data_provider)) + candidates.extend(self.file_scan(string_data_provider)) else: # finally try scan the data via byte content provider @@ -378,6 +387,83 @@ def data_scan(self, data_provider: DataContentProvider, depth: int, recursive_li # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def struct_scan(self, struct_provider: StructContentProvider, depth: int, recursive_limit_size: int) -> \ + List[Candidate]: + """Recursive function to scan structured data + + Args: + struct_provider: DataContentProvider object may be a container + depth: maximal level of recursion + recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack + """ + candidates: List[Candidate] = [] + logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size, + struct_provider.file_path, struct_provider.info) + + if 0 > depth: + # break recursion if maximal depth is reached + logger.debug("bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size) + return candidates + + depth -= 1 + + items: List[Tuple[Union[int, str], Any]] = [] + if isinstance(struct_provider.struct, dict): + items = list(struct_provider.struct.items()) + elif isinstance(struct_provider.struct, list): + items = list(enumerate(struct_provider.struct)) + else: + logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct)) + + for key, value in items: + if isinstance(value, dict) or isinstance(value, list): + val_struct_provider = StructContentProvider(struct=value, + file_path=struct_provider.file_path, + file_type=struct_provider.file_type, + info=f"{struct_provider.info}|STRUCT:{key}") + candidates.extend(self.struct_scan(val_struct_provider, depth, recursive_limit_size)) + + elif isinstance(value, bytes): + bytes_struct_provider = DataContentProvider(data=value, + file_path=struct_provider.file_path, + file_type=struct_provider.file_type, + info=f"{struct_provider.info}|BYTES:{key}") + new_limit = recursive_limit_size - len(value) + new_candidates = self.data_scan(bytes_struct_provider, depth, new_limit) + candidates.extend(new_candidates) + + elif isinstance(value, str): + str_struct_provider = DataContentProvider(data=value.encode(encoding=DEFAULT_ENCODING), + file_path=struct_provider.file_path, + file_type=struct_provider.file_type, + info=f"{struct_provider.info}|STRING:{key}") + new_limit = recursive_limit_size - len(str_struct_provider.data) + new_candidates = self.data_scan(str_struct_provider, depth, new_limit) + candidates.extend(new_candidates) + + # use key = "value" scan for common cases like in Python code + if isinstance(struct_provider.struct, dict): + str_provider = StringContentProvider([f"{key} = \"{value}\""], + file_path=struct_provider.file_path, + file_type=".py", + info=f"{struct_provider.info}|STRING:`{key} = \"{value}\"`") + extra_candidates = self.file_scan(str_provider) + if extra_candidates: + found_values = set(line_data.value for candidate in candidates + for line_data in candidate.line_data_list) + for extra_candidate in extra_candidates: + for line_data in extra_candidate.line_data_list: + if line_data.value not in found_values: + candidates.append(extra_candidate) + break + + else: + logger.debug("Not supported type:%s value(%s)", str(type(value)), str(value)) + + return candidates + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def post_processing(self) -> None: """Machine learning validation for received credential candidates.""" if self._use_ml_validation(): diff --git a/credsweeper/file_handler/data_content_provider.py b/credsweeper/file_handler/data_content_provider.py index efa21263a..5997d914d 100644 --- a/credsweeper/file_handler/data_content_provider.py +++ b/credsweeper/file_handler/data_content_provider.py @@ -1,8 +1,10 @@ import base64 +import json import logging import string from typing import List, Optional +import yaml from credsweeper.common.constants import DEFAULT_ENCODING from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.file_handler.content_provider import ContentProvider @@ -28,6 +30,7 @@ def __init__( info: Optional[str] = None) -> None: super().__init__(file_path=file_path, file_type=file_type, info=info) self.data = data + self.structure = None self.decoded: Optional[bytes] = None self.lines: List[str] = [] self.line_numbers: List[int] = [] @@ -42,6 +45,43 @@ def data(self, data: bytes) -> None: """data setter""" self.__data = data + def represent_as_structure(self) -> bool: + """Tries to convert data with many parsers. Stores result to internal structure + Return True if some structure found + """ + try: + text = self.data.decode(encoding='utf-8', errors='strict') + except Exception: + return False + # JSON + try: + if "{" in text: + self.structure = json.loads(text) + logger.debug("CONVERTED from json") + else: + logger.debug("Data do not contain { - weak JSON") + except Exception as exc: + logger.debug("Cannot parse as json:%s %s", exc, self.data) + self.structure = None + if self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys()) + or isinstance(self.structure, list) and 0 < len(self.structure)): + return True + # # # YAML - almost always recognized + try: + if ":" in text: + self.structure = yaml.load(text, Loader=yaml.FullLoader) + logger.debug("CONVERTED from yaml") + else: + logger.debug("Data do not contain colon mark - weak YAML") + except Exception as exc: + logger.debug("Cannot parse as yaml:%s %s", exc, self.data) + self.structure = None + if self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys()) + or isinstance(self.structure, list) and 0 < len(self.structure)): + return True + # # # None of above + return False + def represent_as_xml(self) -> bool: """Tries to read data as xml diff --git a/credsweeper/file_handler/struct_content_provider.py b/credsweeper/file_handler/struct_content_provider.py new file mode 100644 index 000000000..ad3f93f05 --- /dev/null +++ b/credsweeper/file_handler/struct_content_provider.py @@ -0,0 +1,45 @@ +import logging +from typing import List, Optional, Any + +from credsweeper.file_handler.analysis_target import AnalysisTarget +from credsweeper.file_handler.content_provider import ContentProvider + +logger = logging.getLogger(__name__) + + +class StructContentProvider(ContentProvider): + """Dummy raw provider to keep structured data + + Parameters: + struct: byte sequence to be stored. + file_path: optional string. Might be specified if you know true file name where lines were taken from. + + """ + + def __init__( + self, # + struct: Any, # + file_path: Optional[str] = None, # + file_type: Optional[str] = None, # + info: Optional[str] = None) -> None: + super().__init__(file_path=file_path, file_type=file_type, info=info) + self.struct = struct + + @property + def struct(self) -> Any: + """obj getter""" + return self.__struct + + @struct.setter + def struct(self, struct: Any) -> None: + """obj setter""" + self.__struct = struct + + def get_analysis_target(self) -> List[AnalysisTarget]: + """Return nothing. The class provides only data storage. + + Raise: + NotImplementedError + + """ + raise NotImplementedError() diff --git a/fuzz/corpus/096ec2ed3a11a2c4422fe445f86fc03963adf350 b/fuzz/corpus/096ec2ed3a11a2c4422fe445f86fc03963adf350 deleted file mode 100644 index 786b2d26d2393ba06d64cd8d3a15d254ed5038ed..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 492 zcmbu5NovC|6o%ccPhqmiTJoTwkcC4VNTJj;w4s4wBuAEo2f5BroI~_lU1r-`Qw>K7y{x5vmRawBIpK`6m{KWu-b{F$7 zHVJ&R%J(^aQ2{H}WwU1HmD-S?WRJJ?w2Tg~yXlid2|?J487Fkjy~t+Nb{QaR0^`ZU Y4zKDQ73!qbsfDy0re;N3hX<*OPX}JWV*mgE diff --git a/fuzz/corpus/0d36f4956137486243c60fadb4248f5b2f913d4c b/fuzz/corpus/0d36f4956137486243c60fadb4248f5b2f913d4c new file mode 100644 index 0000000000000000000000000000000000000000..218b14a3584d40f6103bfe3aec907907cf189360 GIT binary patch literal 269 zcmY#mWME)W0}%`gV33$>#8sRcpPpHiY|K@h9iNj~2Ii%@RwU-98!B;SSU5TcXB#G( zC7nN?W^R_=mtm1@mIPFvnxCGTT$Grcos$ZpECL+OqH@b}EekUP5(_e|fY^=8C9~Kf zpdc~Kx6-K~xvIR>Dbm=|!otklG%Yd7!ZIbvDB0+_#d-sa&cqZG!!+B_So5gLT(i8) z_xaD}t~1F=g;-;1R{0+c%#sWZ(lblqOY*Z*^Av0ql+rV+h;jx41JKJiz#ewX1Gz4~ IDm^m~0Qa(8oB#j- literal 0 HcmV?d00001 diff --git a/fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf b/fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf new file mode 100644 index 000000000..b3edf8438 --- /dev/null +++ b/fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf @@ -0,0 +1 @@ +{"t:ire\"l#b.ls\":\"ap\n"} \ No newline at end of file diff --git a/fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae b/fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae deleted file mode 100644 index 6ac557290..000000000 --- a/fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae +++ /dev/null @@ -1 +0,0 @@ - diff --git a/fuzz/corpus/2023cf6be65f362b3892de9f6f1f8b7eec51d3ef b/fuzz/corpus/2023cf6be65f362b3892de9f6f1f8b7eec51d3ef deleted file mode 100644 index 492ed74aee7a9faaa46f2684a0a2ed3517db9b14..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 167 zcmWIWW@hnX0D+tUm(V3{PordjY!K!IlEo#JIjMTd#l@i^oD9r+ZdoU9%}y`MFU?D_ zQY+8MEJ?NIstxdFWRhd%0vUm9hNGjmQ)q@^m|3P_va5SRl7)GOsb5uiL`GGpd1{D( lEtf73fJ`i{;AUWCdBM!U044&wS=m5FF#@4Kkd6m&7yt-tC&&N* diff --git a/fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb b/fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb new file mode 100644 index 000000000..1de8b369d --- /dev/null +++ b/fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb @@ -0,0 +1 @@ +PK4 \ No newline at end of file diff --git a/fuzz/corpus/25d7d0a438a8029f3d3a3d15163310a094573797 b/fuzz/corpus/25d7d0a438a8029f3d3a3d15163310a094573797 new file mode 100644 index 0000000000000000000000000000000000000000..a56479bd8eedd273ebbdd6e66a96b2bbb82f5dd3 GIT binary patch literal 392 zcma*fJyU{U00(eqzJM!poCll`Jl=U70^*~OfFPzLh!CPefPlUlL2qD)f+LsOtE;-{Ba{6FftZj@1JxB6&-;7Asf zkEaLCs{v+lt0~pRT0dk}!VOcvb)yrUF1H!(9PsSU&MtXRR-pRZI3ADM)wjp@W*k&@ z9CWdreLj;U`S#pt#EoVEJ~8b4cV_9;b~{pRr-VIqPipLX4neZheV;Y#WG9L0B8Zk2 zLnTg5`$P9;zw$SH5vpxcwOl5XLeDW74uBvc51vF=0P)EhEvGclhY|-?aXBEgQiA>a Ll)av~nL%S;wD@?l literal 0 HcmV?d00001 diff --git a/fuzz/corpus/276d98250ec4b5946949b9ac49c4c8649c92b422 b/fuzz/corpus/276d98250ec4b5946949b9ac49c4c8649c92b422 new file mode 100644 index 0000000000000000000000000000000000000000..5458cdb727686d95b2e8d4035496d37082ef7fa3 GIT binary patch literal 1538 zcmbtUOHbQC5Dupz|Ad~bqF#z(@5AvU5>lPS4kiJcgp~HUws&KbhfTa`lAhaBPd)U? zwLhg&e+T!T;7T1J4@4wrJCe1ZzMY-Ja0SqOl;Px}jlgAVpQ=lmtD}rI~CEdCX__hz#f?8TF)+1SF7$P5O`byf&87EQC8v zs3BoT>@={8TakNU%40xS>!bmU!>ZX*C+uBlB^d=l#WYF9uLdvHtQJ~f^+o^?73*<} zCiQr+2+y&Xb0;bPuCK4Iu9hdZmS_!p`;i5vW7eG(VaNc6$`ps*|;dYj7nb*O8~W*l3quzHa27Bn#7IN{V*#L(^$k z^D9^`E9q4@ z<~jRI{&`Dt4$IPZ0qBYv7`Iaa)Yb77dTu4n#=hxTr!{1Cbk{pRZ10!e3n|I6e4awl z!5&-tK*QcjAUZ+)&2rQj)V|**=iPs zna7lm-DT}WbX}hwilW%$BeCT=_EY-MKBh(Im}DUtC(U2f`Ry~#?{SLh+@I%Bj~k6u ze_F5#(t@@ego5z{TTn1+m;zCa&Cp;y09h8STJ(m46RBPB diff --git a/fuzz/corpus/4c3dbea65792c1641b2c985d52aa0d2631a2c9c7 b/fuzz/corpus/4c3dbea65792c1641b2c985d52aa0d2631a2c9c7 new file mode 100644 index 0000000000000000000000000000000000000000..97500787da09d5feb67058732b0eba525151f875 GIT binary patch literal 321 zcmeZ#ws)y(kb+GjmtE29`I$wj$t9^NT!vg+86_nJ#a2O%|NlEDIy?Eey1P0DIRY7n zdtKc{S27};tWO%dM{QaukiMCOm_5!*jtdPudi>Fomy!HRScv4 zLx3Qd{}m>B1q2*}vkeWBiy0V-@+&HVS`};+96cc_Y?AU*D(xckOBE7}QWcU?GxO5X TJf4s6GfulwGsEW1svPS;M+%21=RdLdAf$agvBk!`Oc2m@-gi|5m zpwnE;^2*V^NjqJeuW@ZhsBHj%fQVu>`lQCoYP-yjB3^cp#c#5j%O7I>!~3(7SpVmj zj`WZ%1LX{%gkYh*AB>J%hUYB&ha`ri_V`zuaA|rkVt)vt7ILRm^=<#rvFihdg%65> z=d+Clw}Zez2)`3$1m+98ClgNijAQzjZS&K)Ps|{uD|T7z)IYIhVr_vPVCa&p3Oxvy z{-3^F;Fp3%WRWvoks=dIA)3d<$(sQ!9aKw|>fv}ip1X1HPNiChFw4J9nV7GfWFntk z14xw#)u?5w8X_$u5y`Uhs@!rE$5AMvT0x^oa}-I@C{>Z - cackl/City> - peace_for_ukraine - - \ No newline at end of file diff --git a/fuzz/corpus/6b78a49f2f81c15b54c1bc271903f8c721e7667e b/fuzz/corpus/610152b2a025155f15cec304e817b6de274301ed similarity index 65% rename from fuzz/corpus/6b78a49f2f81c15b54c1bc271903f8c721e7667e rename to fuzz/corpus/610152b2a025155f15cec304e817b6de274301ed index 620e4ee97f2386860f3b1eec7501faf099df6a48..dcf4ff16e7d480ce357363d98d067816e480428e 100644 GIT binary patch delta 419 zcmZXQy-wUf0E9!3Qf#PGZ$ydk+Fsw;Mxt;yJMr1zJsWdS`e%)g`&plNFUDyiBuWaT z6D@B*LC@n*LBT6vq;#2fzL}BcY3pk1@4<=-Hiz^wnO8H)L7S+i1Ze9{f{Py}5Nt{p z`^yFZ`yT;|y5mbBD!zx%`9zefRLa;K3C6PoH8e%(jHRehb4|_F6k|m(O^xPx-%v2_ zX?l_6)G&37O;yYL8mf|Pnw(RP?1h+!)~S;@ZCE{(dXg;3T`!yc?~w>g70;DzU(r>t z4Py@nI4pyqg;sJ>gvXw29v)a>zbQ^0zrTDYmgicoJsxOwYy@O<82##o64b! ze|wHhioytX{_M2;%y&K8`uTi!9TfK_h;@1_?}ObB*+&ByM%Mc4=FJF_aBuPkVRw#z zX$wW^PA`ZvR-L-k7mOOL9G8U1!R5epmfUZK17Qy%UMuJbM}gOj#KtGTRpV?K8z1!g QcH`UnVe|D%7xRhy4tIZt*#H0l delta 163 zcmZqV*~6pg?inBC>K_lJ+#H=%e3RX(4e$E;UKu{%=qU}}bZ>y literal 0 HcmV?d00001 diff --git a/fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42 b/fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42 new file mode 100644 index 000000000..a5bda3b1c --- /dev/null +++ b/fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42 @@ -0,0 +1,2 @@ +="cackl/Cit䝉 ace_for_ukraine + \ No newline at end of file diff --git a/fuzz/corpus/77a11b466eeb44409a77ffbd135e944a51a0c967 b/fuzz/corpus/77a11b466eeb44409a77ffbd135e944a51a0c967 new file mode 100644 index 0000000000000000000000000000000000000000..ae54bdd80b02cd103c2f80bf3a74e4aa0bc1f7f1 GIT binary patch literal 1537 zcmc&zy>HV%6hAiPf1nami6}!w<2%=OY>R-6l%!EZD+1w5NS5QTTs~~ebw$d;#KhEr zl@&2DGouO#RG|L=6M{}K8FeDu#X)vcw~;DUJjwREchB#B_j~ULZr6$~4sGAXZ=v4R&Q3;9YJDd*af;@ zB+u_Sj!(|rezieRYqgn3l7u`woSZ!(%gmqbV%mpjnMb&9h8?`T*=XDdup4w&&~UB9 zmj(j^_mvJF07AYQ8fLF=6N+8iIS&9(0f2y(^UEy{|MD(*Mu3wxr44hjKN^wFu`H_q zFc)AB6!%qTm3=a0lZ4AEWYxQT0%dI_79l=+W4Ii0&o*ze7UcE4PJm8Pp{w=^hPmTgIV!u$Vw zTcRP$j)*u>7A1#6&=5G$lG{1A7>BZwJ0dMI&5O}>(3UDv+c6*B@3vT0?v)GdH3oQC zxHFd5qoYbE@)|rH%ZC<-kn-;B&NkysRPWpVy)8F0FSDj)(pAhI?2R>XFS1ZQ*UDz<-PU;qF!dIYin diff --git a/fuzz/corpus/c7832116c548a936d02dba5ba9ea32ca2a61855b b/fuzz/corpus/7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd similarity index 57% rename from fuzz/corpus/c7832116c548a936d02dba5ba9ea32ca2a61855b rename to fuzz/corpus/7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd index f52f859e55d319838c13dc9c754cd0eafffcdb9e..8e3704d47e1b8b88a0a66c641299ebeb772a5637 100644 GIT binary patch delta 420 zcmeC;>EfBNov~`-j?j9GY_lXogY?Xj_>%nW)I5db3QFmjMXCAeTu8vxzgj6D$bUR% zCs$%Yrfo@4X=<$&k^(L+m=c6MSXxvRL?sry1nK$DRg_;*nVnjxV5{KhS(O-EX~3Lp zY<2IVBGwT(U5VnBkM^3Yh!mt delta 113 zcmeC;>EfBNosnVUj?l>;n2Oj+bU|REzBmt%;_~(MbdGTJbeinLY$2uq1)=(RiAg!B z3MKgpX_vzed=%Mtw?aJ^#1HuX}^B-F@7rYVd|SdetY1C-kJ*oB%XbC_Vf%A6rIf|sAl;JQeI=kCrOeN z0k|C?;jf>cr&lpj{|y|=u%_<;VsJv4gCL_wHBBkY#hO$ssIrnQ9(^e*<(jO;fxdXQ z1RS&=NtHsgL+x>hHf1F+1fj!L!~KI!*yODSwN7MW$kokMtuCeXnuuO3=Xzt%rI1my zoZy2u0wlQUU}~O7c~5G{sfvvBO*9bn0PgMFF9c>d85Is0ZMS<|52`90@vZwV7kX?a z&E=a;IG)&AwXr{x+q-R{*h#B2BntDieG0&pAU z^Qs!n(JeA{HAmO@aYG-6dy+rYH0E=I_ce9PVEdDnF6a{1a>s7|Y?ja6lhG#h3bgZ*yDq8zVGRd!`9%t$YZkt zooDPE4-K9(1tv=~JeAFI7Bo4-Xmj~op0jz*vTep>VcX{O0?+UalgnkH%?a>I-uX?V z>Eu4#dgYqmO7`2(#yv-KEJq7@8n1Hf%LOmcnbCmCen4C!=|ECk9S*6DlIU%CrJ?0J z$U)fjnEaL$=Qdh=^!4l%*YUbzxm>jJJ4FYckM1;~M*y$_;%1Ir|9Se=z?3Rw3HRFh z`T3&M6wx-?Q-2CGCCC|yoz(BW1}jjbaG^)2a&D>3LEUGL)5%sw=`Jqw?4 zEITH?0RRQ!ApUyOLod7!m(m=mY9T;rCpg~vl%7`k9Phdi^~?@(6RmRp(h9qbnWSTa zKLo_xIqC0b7ZfgM7|ytn`^1HV7YPv6*2MityGoT7wkV2(7_^&qLXn#q*Gd!@8X zRs6CV2j0Ct9d6>(ohBeZ=wo!BJFYCkZl=I6Ytl&^KXMUelX_8}t)>yd>}W*vr=(A4 z;w5S?NqF4Qb)Qy?gfY^`gBV9X3lkh7PMZ~I)-<(Ox2$#mk@>ST7N}-jCs6;(&7JZ1 zSO1y)6yTA##&SkHG6{@dB8#q&-Cl(( z)k{Ix`KMc)p2`&`77^$1f(nzO&yE|7g6>X4UpO~*)GxZ@F7{ez zQW9OSg{I7rKT}9X95>~g2+N8{h$M%Kl_L2QD%AMI@ME5*vz0Ag&e&Yam!K@i?&SCv z+-9|GiYcL7hjh8#KE9q;+!W)&CDNe)z^2pcV*0<|g`SuAQpJjr4zyW{7zuACbY=88 z-*}Rs+?er6~liZ@c(aX mV&IjMlv0sjk*MpIXpm&c00M_sF)(C<6o5F7w&vL?C;$NB?jcA3 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0 b/fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0 new file mode 100644 index 000000000..eeab189f6 --- /dev/null +++ b/fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0 @@ -0,0 +1 @@ +{"test.domain.io/actual-configuration": "{\"apiVersion\":\"v1\",\"data\":{\"smtp-password\":\"\",\"wordpresdpress-wordpress\",\"chart\":\"wordp:\"wordpress\"},\"name\":\"wordpress-wopaque\"}\n"} \ No newline at end of file diff --git a/fuzz/corpus/05d0262facb829f463770b7b5bebfa0ae2b91cd1 b/fuzz/corpus/91872b929eee06159ae9f2704dd7ce10c46613d0 similarity index 56% rename from fuzz/corpus/05d0262facb829f463770b7b5bebfa0ae2b91cd1 rename to fuzz/corpus/91872b929eee06159ae9f2704dd7ce10c46613d0 index c638cbcc45b17aa0edbb05b73cafb95e4f20b955..b241bef4f264cf2cea823c0272a30f94b7de13f3 100644 GIT binary patch delta 31 ecmey%bbv|NCMWYtZ8A1AQy4H>nDl#R_@(?w;{M UuKw}P{{GwIL;U}56xL+~0E~4WKmY&$ diff --git a/fuzz/corpus/9252fd7a390bc4e1eaf6afd6eab7abedda07953e b/fuzz/corpus/9252fd7a390bc4e1eaf6afd6eab7abedda07953e new file mode 100644 index 0000000000000000000000000000000000000000..0e25d6a842507de59930f2a2c5a755c27b21f702 GIT binary patch literal 763 zcmZ>(($~LjRiy_cZZQ0pI(pMFJUCt*h`n7S6%O3LJ1@5?PcJvIAk%6=!h+ii61fZv zjda~JgHrR|Gu^RB0#)kk>svACh!nc??{|g$BvR1-kBz zLH_QZuFgS@&Q3nA?oh_zURQU|_?*nL)cEwwqSXBK%;ciPNje~)K!GVFn zfZ;z7 zoO_kO&0EWAXn2z4ZM2^qkbv;#3CJL@sZT*R=c&9RiuI@&9SbRVy_H zjjNgyL-Ai2RS-B`MA1+8Jk5}_cF3t8mF0< JPrlA32LLOK5x@Wd delta 312 zcmey$`;~XYW5&s~%qjH+|NlD%%_9gHpa7Rbg#}HQqZ`aHKm&{^|fP%y@ z-%6)~l^r#D+IiH^~zsMOKbB(W*J5&BO|la#ALIiq!cqt6HBu+bFj=mI diff --git a/fuzz/corpus/bea99aa4703268b72289e7b88738c31160417b14 b/fuzz/corpus/bea99aa4703268b72289e7b88738c31160417b14 new file mode 100644 index 0000000000000000000000000000000000000000..7eddd1eb46ddc79640f2d6b364e19b627aec9f10 GIT binary patch literal 1821 zcmZ`(3piA17(VVoB1Enc#pD*DW)aGL6lP{gB*TzfJPf&HTyrZ#N=?Sd6g4F=E*UcG z7K(ailp!?iwAu_?Zn^L78I{V~bDsA*|MQ&lzW?|C=YL;EdmdgPfOD||hY@RouZ0hg z2fV}3J~~Lm7Jz45qY=eHM3gYV#rr1@0IU_qbviDYjd*mwUt|&yBK1nEdvpyor07*e%u`?Iqi-j?G`mzwYt*+H zSYW9`a#YjUpg;rlAk)lQNV2WkVUfQEV9)i2O*1h3fQS5W3}xVzN(aVkvO}^Nv4F5% z2^S4Lt6b4DjDujUu+Tb1;Mqx1=8Hinj^?mSTNdM~R5oemqnC9f zHl~(T<62r*~toH&R=a4@nrA>L4Xiq4yG>`T9!sO!(P}Qix|(qvEP^ zr!=Mv>Z}PZXjOIrgbI} zqGHr@m|?-_EMkU*)XoXpR4RtWHe0r+jodDGT2l83CqNKFMj_neRkh4 z;>l=fMjhpld_YEePai3>?5-+b*CC@wC}#P9@`47hXrZ&ECQLWs)GbDUN%ov$y{zye zx#_VF{KE{)g^>KzmQFA;RN0y?o3B38W0GCXdf+f-{=wKPHcdfc{#2t_^!^x&9n8Rw zx5-V-MJ`Ip(s&)xMc83UJJKIXWq$p8<799`mkFsyyzB&jd+0v&q8vNzH|l8zaXPIe zY|(sTL<(;%xcgE)Og60)AXF2{^h*ODTX3oEi;X@-X3x*tz`7zMN+k9!IMMryRxYfz z5?!a`omTmOHjfB4zMmH~00&#ZBkvOt92~9f9fnzV4aYef4m}1Pj&Zc-;#TDOKkvw_ z*_jKh35lHHx|8?-o=sfzMeIu=a4h|ES3io)fcE^MM^J8){v7T3_&Kw3gLfhLgo1ts zfbR<*JN5Rb(Alu?AHHl(Wg`OPG!WnNg%eNu>!A|8uN%d|hVA)2(MSp#q2TZ80RtQ0 zfJ4_?r_b)~MsctS{mZ_-1nF9oBERadnZ(7Rul<-lkT&#RYVei%Ya~0sGQ6oI{8t^{ zwh{gW*Q5l=UAu0;vXC%mJ?tv`9IJs$KHf;B#?wSjQ#`T7*yyI6_ijx^7%2Xu#LV}` zK_NB;!YHIzN<4|XN5LxEs9^YRclyfNr16rL+fUIZLMRi(t9KsmNs8AZF&%o1FXD=m z+}woK?Rk6mLCmv{8J(mU`QSS1pU;`}jWkm!^Tn!Kvv^=coh8FX<1L$BmFT6nDc-zj ze|aTwd%L*yCo7LubZ)(roTz4}JiB@k6)kdNzk~G5Z3`qkwz$Ysa6S)4Z61S&vaJVu zU9R9u8K2^46S(cB_hnWO62$41Q$!+VTS1ZZ!|c*yI|LQ+%+GG)f?4njj4;E>C5W*% zUMYrV*Ur*X^$PJ0dYRV)PxiN}BZqU(z7~Ye$t%d3md%`<$6vprU+?Tj=q)$F_jT5y zJEvakDp6}oxeobGr5FpdRtyu-q_AtuGuJS*Pcoluf-(pBs4p{s>OEWAEN_WS$NER W2+h3zg^EjT8zZM&)3bxpsPHd?OV=L& literal 0 HcmV?d00001 diff --git a/fuzz/corpus/c35f59c3f064a5c96da46a4f88887d41a85dd669 b/fuzz/corpus/c35f59c3f064a5c96da46a4f88887d41a85dd669 new file mode 100644 index 0000000000000000000000000000000000000000..a0216d14bdaf2646edc98f46ec116331ec7cbeb4 GIT binary patch literal 161 zcmWIWW@hnX0D+tUm(V3{PordjY!K!IlEuZvp&^_M%zJKGCvVM8FUl{?OR-Wb&&VuE zwdSe~@MdI^W99;BM%L}<=6RuVqnXq3j`n& fODnh;7+GF0GcbUO0B=?{kWq|4=ntghK^z7Ev>PRP literal 0 HcmV?d00001 diff --git a/fuzz/corpus/c3dcde8d12dcc6579f2c38bbd54e28a0200babce b/fuzz/corpus/c3dcde8d12dcc6579f2c38bbd54e28a0200babce new file mode 100644 index 0000000000000000000000000000000000000000..3375270b59088a52f0ba60d2737a51e98799d35e GIT binary patch literal 1788 zcmd5-&2G~`5H?qHAjWVR`d-Y$Alm;|`Cf2FIJ7wrRDytv#!4+PzNenItRxBQ|sF;Vup>LM`-Yh^IWu zrBdcTXPWz%D~>}kPCE45;G6W%!x6Sfag!R%%a)B;mqSSwlZ#e`vZ{b;)sp~{mPQ82 zqKAr)8`rN>R)zft#AsIsmHK575F1wnDkUjta163cmGS(2#HH;xvQ~(tW zBm20hV z{ack+=CPnc0=u_$?^D3CjMfT@h^VZ9&ZZt1xY8>ByE_M#GvTRZ)!u-{YqG0GuK=V+j4!Koi|(1 zt&+)FAJPAnCMo}^+@sISbw@ORlrCs>AHIcx%#_%ew&=gqB?mdb3LQzW(gKn MCKizJiIHJ)4|Ch(X4a{bwU{M0 oA7lw<1TkcucDCy4>s#qtX)!W@0k^B8qob>9KuKx%WEnOC09rC4>Hq)$ diff --git a/fuzz/corpus/ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d b/fuzz/corpus/ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d deleted file mode 100644 index 62a03cb3a1826a4e9214c962e4074d5b6840ccb3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 22 ecmWIWW@U5te7cB{QAzsg#-~pim^m+5vjG57F9&=8 diff --git a/fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28 b/fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28 deleted file mode 100644 index 4a8226f6a..000000000 --- a/fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28 +++ /dev/null @@ -1 +0,0 @@ -PK \ No newline at end of file diff --git a/fuzz/corpus/c5f4facd0a5b1a657433ca68154e33a7e48bb663 b/fuzz/corpus/cd625ab558a1b28ad90d1083d64b2096a52f401d similarity index 92% rename from fuzz/corpus/c5f4facd0a5b1a657433ca68154e33a7e48bb663 rename to fuzz/corpus/cd625ab558a1b28ad90d1083d64b2096a52f401d index a98b5606e60c94129b5ca1f970cfbf0b3b1ae796..5dd16dd20fea76d9d0955365da483790dacac20d 100644 GIT binary patch delta 24 bcmX@YbA)HZRaTDw5FkAHBa`uD0X7o=t``i| delta 19 bcmX@YbA)HZRo2P%%)FBenE57uU=0BPP1pxL diff --git a/fuzz/corpus/eeaaa652952fef8117a481d1d7d2438fc815f646 b/fuzz/corpus/d2ec3276043232db9afb98075728e963a5b8e369 similarity index 65% rename from fuzz/corpus/eeaaa652952fef8117a481d1d7d2438fc815f646 rename to fuzz/corpus/d2ec3276043232db9afb98075728e963a5b8e369 index 043d2955bd5acdfa75401e8a6d7c43ffd9704a5e..9f592e64ce766e5b665663c91bfe3d58d0789b94 100644 GIT binary patch delta 19 acmeC?>E_wc%CzxcFwE_wc$~2jW<;-LWmIadySiC0dF|$qPWVN68(T*=5!q>y!G?1$xvAFpE|H%c6 KwwpOwnHT}%h7p4R diff --git a/fuzz/corpus/1188838daba5b306b8c7bf4142b0e33e19567be1 b/fuzz/corpus/dfe1dd788a4b4cad55dd9803c46d0aa272d665d7 similarity index 69% rename from fuzz/corpus/1188838daba5b306b8c7bf4142b0e33e19567be1 rename to fuzz/corpus/dfe1dd788a4b4cad55dd9803c46d0aa272d665d7 index 0b5d5d90ff47cff671bb57b7a1ba2f5abb78fd2a..049a9bcb07251ffb0ced5acfef195661edf8e04f 100644 GIT binary patch delta 59 zcmeC>ImWZ$6eHuK$#+;P*piD9le69nfe$78ky+2XQt+R26=*wbO%#f-3AU^*Y&}i((wP>21X_j>HPG} zOn0CLu$@>bm_tC8LLGv`Qru=3m>HVtB^ngqFvH49OWzaCp`b88bq~al0^EkE!VKxY z&ZTAG0Q4<}A&78AbBZFCK*Z^vXRV7t{=wrEoc_TK>JT)58pi+smkjDS#S#XfGzGg5$A8bxi8Rlc_K;NqC#OQ0HR-gf`;K zc)KoKi`f|?cD4}PPynD1hc00I4tMh4>9Uw0Y^xK;R&`k<-@be$|Ht)u?RBUN9u=?# zx+zc8AD>?BP7NNlk$j^<&d&FUAq(u*7_%Yjm-+#ap49;b%70p7ZUHqC(6kPaA_jo~!W zl#j$rt8%{|SGBPVQip=dY>AS5u$Sn=It5C^t`;C#?i|*kG_>)?BFCI&&Jbya^a7f) zE1N2au%`28No`P3I=I3~OxXg%hnf|`@M6xFU0KnEo1eJEQ#3sFwo%S9b$IG;jkwut zOTI0vA#M3Fi=`q7U6vDs4AJLAqI?1Hbq*r0FF-@$giY0HR1W7r97k<9EVIixDG`Td q!#&o0+8VmH+0L=Xfb+nQ4nDu(QMybaWb97TmBVo~p2yhf6Tbk;a>+IT diff --git a/fuzz/corpus/e5a8d3e8ac6b758fde8c6315baf28e825d029ea8 b/fuzz/corpus/e5a8d3e8ac6b758fde8c6315baf28e825d029ea8 deleted file mode 100644 index 3cfa55284341732f2af50c1e21c1e4a89d993ac9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 169 zcmWIWW@h1H0D&bzE}@gzt{X7|*&xixAj6PSnwwjxmy%i(8p6rI{N$c>G6Hch{FH?f)O9b diff --git a/fuzz/corpus/e7f52f8df718abe22a791204ce57b71c6f2638e5 b/fuzz/corpus/e7f52f8df718abe22a791204ce57b71c6f2638e5 deleted file mode 100644 index 4201ea197a0c2fc572b976fa899fb9ee4eb47ac8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1567 zcmc&!&2G~`5H=?y^8mRnmgpfO_}@;9BGrmg%gUiuq7ZSYEPJzd7aQAkHne#F4ji~5 zA#vahc#57-A-n)Lq&z}zVQt4r(@GVo5bR~Nvoo_Z-}mh%0fw0iWp)xe5q^W50B0yE zoD9pEXA=`bG@0}`G&2Ba_H5I#``ts!Hb;YA_oXPw+*7s)9H2f*EKEK0C`2=k1^KFj zFNFo4O9KGR5b4o#gX_?n!w6Zp%A^i++jFDVb}7+f%B8EWq{)h^`l15Gm6L&z;4`q= z{afp`?^vjd!potiYh6Z;09(6dN?`dW7mri%wEwQvI`E#bq;bc3PzAx zCA~H+hNZ(4Y<>Lj{!@K4XiAEp>692zloe6an5YNyftnz*m0;p;BOcyog@GN diff --git a/fuzz/corpus/e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0 b/fuzz/corpus/e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0 deleted file mode 100644 index ef458cdf8a368aedb5d601c60ad4f0397e0bd56a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1549 zcmd^9&1&2*5cVFcd+1g1E&a2#-nG383w47rCMkhkO6Z|1icqz}i{;wh(oG0`fI^-i zFOp+Vg$<~c3os7L(3Mzb)ACK;jr-s$P%i^5_TfzDK_yp->b-Cb&EgZI&E?4liHByEt0ug{=7@LARmQ`#F*OL@PyA{7AT|sy8mH zit9N3CzF2b7VdW=t=)EC{s-M+49~WCmM<34^+?|j$b|}hkYUp_nu)L&3lW7=2hKm5 R<~?i=?5d{qqOynN@FzQb#>oHx diff --git a/fuzz/corpus/f2cb21a8411c6b0dfb2ce1fce303b61126dea477 b/fuzz/corpus/f2cb21a8411c6b0dfb2ce1fce303b61126dea477 new file mode 100644 index 0000000000000000000000000000000000000000..63baff04ff20eaa5e0787c2d77b908e1002b05ac GIT binary patch literal 1549 zcmcIk&x_MQ6wV?D&VAi$+QS~|uE{iQk_O5eml%pGg_fl!DQTLdLrNy3S+`yBnv3Ak zlV=a!1w~Lq*n^kFe?ei^Z{GXf_r4kIATuyb0Sw%BA3d_# z4hk$h*>w9BSveMNjU4>Y={qQFhuF0|w#w$6?W~^}m=kYuw`>PIE{UU1M-2<>5P)77 z`U8dIOvK-IFn0RZ(ZC8!?1jkN^6UDW_pRoANyzW`$6XRTjtc;>O_kF)g)@AlCC4a|d{tg1~H8e-?{ z%$5}ASxh8b;+J<5Axs^ z%Z7OJ%S|6q92i1TBn4gq*s5nZOGzea*2@9AL0dsATfmj2GZ(d4*=Hy(-K8 E0yL#L2LJ#7 literal 0 HcmV?d00001 diff --git a/fuzz/corpus/eaacd823db36dbd3ca4241c26eaf311de7528af4 b/fuzz/corpus/f7cf03173db4f86b52c8d5bd6369387535d4bb60 similarity index 93% rename from fuzz/corpus/eaacd823db36dbd3ca4241c26eaf311de7528af4 rename to fuzz/corpus/f7cf03173db4f86b52c8d5bd6369387535d4bb60 index 2782d6b054fdcd3a02a78b828a9da060d06a8e2a..165fe32d9cbd54af1354d576d4d5807e0cd59fb8 100644 GIT binary patch delta 14 VcmZqSY2n$R$-=m4vlh!OMgSoq1Z@BS delta 19 acmZqSY2n$R$+DS+aRKAx1FYtg*jNBMe+7a7 diff --git a/fuzz/corpus/fd8463c8426b79d944d2ff54554982c98617c3c8 b/fuzz/corpus/fd8463c8426b79d944d2ff54554982c98617c3c8 deleted file mode 100644 index e06a86e492ea71de21c123e84cc8722d3fc23236..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 981 zcmeZ#ws)y(kb+GjmtE29`I$wj$t9^NT!vg+86_nJ#a2O%|NlEDIy?Eey1P0DIRY7n zdtKc None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_json_p(self) -> None: + # test for finding credentials in JSON + content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "struct.json"]) + # depth must be set in constructor to remove .zip as ignored extension + cred_sweeper = CredSweeper(depth=5) + cred_sweeper.run(content_provider=content_provider) + found_credentials = cred_sweeper.credential_manager.get_credentials() + assert len(found_credentials) == 1 + assert {"Password"} == set(i.rule_name for i in found_credentials) + assert {"Axt4T0eO0lm9sS=="} == set(i.line_data_list[0].value for i in found_credentials) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_json_n(self) -> None: + # test to prove that no credentials are found without depth + content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "struct.json"]) + # depth must be set in constructor to remove .zip as ignored extension + cred_sweeper = CredSweeper() + cred_sweeper.run(content_provider=content_provider) + found_credentials = cred_sweeper.credential_manager.get_credentials() + assert len(found_credentials) == 0 + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_yaml_p(self) -> None: + # test for finding credentials in YAML + content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "binary.yaml"]) + # depth must be set in constructor to remove .zip as ignored extension + cred_sweeper = CredSweeper(depth=5) + cred_sweeper.run(content_provider=content_provider) + found_credentials = cred_sweeper.credential_manager.get_credentials() + assert len(found_credentials) == 2 + assert {"Secret", "PEM Certificate"} == set(i.rule_name for i in found_credentials) + assert {"we5345d0f3da48544z1t1e275y05i161x995q485\n", "-----BEGIN RSA PRIVATE"} == \ + set(i.line_data_list[0].value for i in found_credentials) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_yaml_n(self) -> None: + # test to prove that no credentials are found without depth + content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "binary.yaml"]) + # depth must be set in constructor to remove .zip as ignored extension + cred_sweeper = CredSweeper() + cred_sweeper.run(content_provider=content_provider) + found_credentials = cred_sweeper.credential_manager.get_credentials() + assert len(found_credentials) == 0 + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_encoded_p(self) -> None: - # test for finding credentials in docx + # test for finding credentials in ENCODED data content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "encoded"]) # depth must be set in constructor to remove .zip as ignored extension cred_sweeper = CredSweeper(depth=5)