diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 48affa03f..e5961c2d0 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -78,8 +78,9 @@ jobs: run: | COVERAGE=$(tail -1 report.txt | awk '{print $6}' | tr --delete '%') # additionally check correctness of the value - should be an integer - if ! [ 75 -le ${COVERAGE} ]; then - echo "Fuzzing coverage '${COVERAGE}' does not satisfy the limit 74%" + FUZZ_COVERAGE_LIMIT=75 + if ! [ ${FUZZ_COVERAGE_LIMIT} -le ${COVERAGE} ]; then + echo "Fuzzing coverage '${COVERAGE}' does not satisfy the limit ${FUZZ_COVERAGE_LIMIT}%" exit 1 fi diff --git a/cicd/mypy_warnings.txt b/cicd/mypy_warnings.txt index 503915c85..ce61fd05b 100644 --- a/cicd/mypy_warnings.txt +++ b/cicd/mypy_warnings.txt @@ -1 +1 @@ -Success: no issues found in 83 source files +Success: no issues found in 84 source files diff --git a/credsweeper/app.py b/credsweeper/app.py index e0699044c..58fe45440 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -7,11 +7,12 @@ import signal import sys import zipfile -from typing import List, Optional, Union +from typing import List, Optional, Union, Tuple, Any import pandas as pd -from credsweeper.common.constants import KeyValidationOption, ThresholdPreset, RECURSIVE_SCAN_LIMITATION +from credsweeper.common.constants import KeyValidationOption, ThresholdPreset, RECURSIVE_SCAN_LIMITATION, \ + DEFAULT_ENCODING from credsweeper.config import Config from credsweeper.credentials import Candidate, CredentialManager from credsweeper.file_handler.byte_content_provider import ByteContentProvider @@ -21,6 +22,7 @@ from credsweeper.file_handler.file_path_extractor import FilePathExtractor from credsweeper.file_handler.files_provider import FilesProvider from credsweeper.file_handler.string_content_provider import StringContentProvider +from credsweeper.file_handler.struct_content_provider import StructContentProvider from credsweeper.file_handler.text_content_provider import TextContentProvider from credsweeper.scanner import Scanner from credsweeper.utils import Util @@ -356,13 +358,20 @@ def data_scan(self, data_provider: DataContentProvider, depth: int, recursive_li new_limit = recursive_limit_size - len(decoded_data_provider.data) candidates.extend(self.data_scan(decoded_data_provider, depth, new_limit)) + elif data_provider.represent_as_structure(): + struct_data_provider = StructContentProvider(struct=data_provider.structure, + file_path=data_provider.file_path, + file_type=data_provider.file_type, + info=f"{data_provider.info}|STRUCT") + candidates.extend(self.struct_scan(struct_data_provider, depth, recursive_limit_size)) + elif data_provider.represent_as_xml(): - struct_data_provider = StringContentProvider(lines=data_provider.lines, + string_data_provider = StringContentProvider(lines=data_provider.lines, line_numbers=data_provider.line_numbers, file_path=data_provider.file_path, file_type=".xml", info=f"{data_provider.info}|XML") - candidates.extend(self.file_scan(struct_data_provider)) + candidates.extend(self.file_scan(string_data_provider)) else: # finally try scan the data via byte content provider @@ -378,6 +387,83 @@ def data_scan(self, data_provider: DataContentProvider, depth: int, recursive_li # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def struct_scan(self, struct_provider: StructContentProvider, depth: int, recursive_limit_size: int) -> \ + List[Candidate]: + """Recursive function to scan structured data + + Args: + struct_provider: DataContentProvider object may be a container + depth: maximal level of recursion + recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack + """ + candidates: List[Candidate] = [] + logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size, + struct_provider.file_path, struct_provider.info) + + if 0 > depth: + # break recursion if maximal depth is reached + logger.debug("bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size) + return candidates + + depth -= 1 + + items: List[Tuple[Union[int, str], Any]] = [] + if isinstance(struct_provider.struct, dict): + items = list(struct_provider.struct.items()) + elif isinstance(struct_provider.struct, list): + items = list(enumerate(struct_provider.struct)) + else: + logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct)) + + for key, value in items: + if isinstance(value, dict) or isinstance(value, list): + val_struct_provider = StructContentProvider(struct=value, + file_path=struct_provider.file_path, + file_type=struct_provider.file_type, + info=f"{struct_provider.info}|STRUCT:{key}") + candidates.extend(self.struct_scan(val_struct_provider, depth, recursive_limit_size)) + + elif isinstance(value, bytes): + bytes_struct_provider = DataContentProvider(data=value, + file_path=struct_provider.file_path, + file_type=struct_provider.file_type, + info=f"{struct_provider.info}|BYTES:{key}") + new_limit = recursive_limit_size - len(value) + new_candidates = self.data_scan(bytes_struct_provider, depth, new_limit) + candidates.extend(new_candidates) + + elif isinstance(value, str): + str_struct_provider = DataContentProvider(data=value.encode(encoding=DEFAULT_ENCODING), + file_path=struct_provider.file_path, + file_type=struct_provider.file_type, + info=f"{struct_provider.info}|STRING:{key}") + new_limit = recursive_limit_size - len(str_struct_provider.data) + new_candidates = self.data_scan(str_struct_provider, depth, new_limit) + candidates.extend(new_candidates) + + # use key = "value" scan for common cases like in Python code + if isinstance(struct_provider.struct, dict): + str_provider = StringContentProvider([f"{key} = \"{value}\""], + file_path=struct_provider.file_path, + file_type=".py", + info=f"{struct_provider.info}|STRING:`{key} = \"{value}\"`") + extra_candidates = self.file_scan(str_provider) + if extra_candidates: + found_values = set(line_data.value for candidate in candidates + for line_data in candidate.line_data_list) + for extra_candidate in extra_candidates: + for line_data in extra_candidate.line_data_list: + if line_data.value not in found_values: + candidates.append(extra_candidate) + break + + else: + logger.debug("Not supported type:%s value(%s)", str(type(value)), str(value)) + + return candidates + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def post_processing(self) -> None: """Machine learning validation for received credential candidates.""" if self._use_ml_validation(): diff --git a/credsweeper/file_handler/data_content_provider.py b/credsweeper/file_handler/data_content_provider.py index efa21263a..5997d914d 100644 --- a/credsweeper/file_handler/data_content_provider.py +++ b/credsweeper/file_handler/data_content_provider.py @@ -1,8 +1,10 @@ import base64 +import json import logging import string from typing import List, Optional +import yaml from credsweeper.common.constants import DEFAULT_ENCODING from credsweeper.file_handler.analysis_target import AnalysisTarget from credsweeper.file_handler.content_provider import ContentProvider @@ -28,6 +30,7 @@ def __init__( info: Optional[str] = None) -> None: super().__init__(file_path=file_path, file_type=file_type, info=info) self.data = data + self.structure = None self.decoded: Optional[bytes] = None self.lines: List[str] = [] self.line_numbers: List[int] = [] @@ -42,6 +45,43 @@ def data(self, data: bytes) -> None: """data setter""" self.__data = data + def represent_as_structure(self) -> bool: + """Tries to convert data with many parsers. Stores result to internal structure + Return True if some structure found + """ + try: + text = self.data.decode(encoding='utf-8', errors='strict') + except Exception: + return False + # JSON + try: + if "{" in text: + self.structure = json.loads(text) + logger.debug("CONVERTED from json") + else: + logger.debug("Data do not contain { - weak JSON") + except Exception as exc: + logger.debug("Cannot parse as json:%s %s", exc, self.data) + self.structure = None + if self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys()) + or isinstance(self.structure, list) and 0 < len(self.structure)): + return True + # # # YAML - almost always recognized + try: + if ":" in text: + self.structure = yaml.load(text, Loader=yaml.FullLoader) + logger.debug("CONVERTED from yaml") + else: + logger.debug("Data do not contain colon mark - weak YAML") + except Exception as exc: + logger.debug("Cannot parse as yaml:%s %s", exc, self.data) + self.structure = None + if self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys()) + or isinstance(self.structure, list) and 0 < len(self.structure)): + return True + # # # None of above + return False + def represent_as_xml(self) -> bool: """Tries to read data as xml diff --git a/credsweeper/file_handler/struct_content_provider.py b/credsweeper/file_handler/struct_content_provider.py new file mode 100644 index 000000000..ad3f93f05 --- /dev/null +++ b/credsweeper/file_handler/struct_content_provider.py @@ -0,0 +1,45 @@ +import logging +from typing import List, Optional, Any + +from credsweeper.file_handler.analysis_target import AnalysisTarget +from credsweeper.file_handler.content_provider import ContentProvider + +logger = logging.getLogger(__name__) + + +class StructContentProvider(ContentProvider): + """Dummy raw provider to keep structured data + + Parameters: + struct: byte sequence to be stored. + file_path: optional string. Might be specified if you know true file name where lines were taken from. + + """ + + def __init__( + self, # + struct: Any, # + file_path: Optional[str] = None, # + file_type: Optional[str] = None, # + info: Optional[str] = None) -> None: + super().__init__(file_path=file_path, file_type=file_type, info=info) + self.struct = struct + + @property + def struct(self) -> Any: + """obj getter""" + return self.__struct + + @struct.setter + def struct(self, struct: Any) -> None: + """obj setter""" + self.__struct = struct + + def get_analysis_target(self) -> List[AnalysisTarget]: + """Return nothing. The class provides only data storage. + + Raise: + NotImplementedError + + """ + raise NotImplementedError() diff --git a/fuzz/corpus/096ec2ed3a11a2c4422fe445f86fc03963adf350 b/fuzz/corpus/096ec2ed3a11a2c4422fe445f86fc03963adf350 deleted file mode 100644 index 786b2d26d..000000000 Binary files a/fuzz/corpus/096ec2ed3a11a2c4422fe445f86fc03963adf350 and /dev/null differ diff --git a/fuzz/corpus/0d36f4956137486243c60fadb4248f5b2f913d4c b/fuzz/corpus/0d36f4956137486243c60fadb4248f5b2f913d4c new file mode 100644 index 000000000..218b14a35 Binary files /dev/null and b/fuzz/corpus/0d36f4956137486243c60fadb4248f5b2f913d4c differ diff --git a/fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf b/fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf new file mode 100644 index 000000000..b3edf8438 --- /dev/null +++ b/fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf @@ -0,0 +1 @@ +{"t:ire\"l#b.ls\":\"ap\n"} \ No newline at end of file diff --git a/fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae b/fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae deleted file mode 100644 index 6ac557290..000000000 --- a/fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae +++ /dev/null @@ -1 +0,0 @@ - diff --git a/fuzz/corpus/2023cf6be65f362b3892de9f6f1f8b7eec51d3ef b/fuzz/corpus/2023cf6be65f362b3892de9f6f1f8b7eec51d3ef deleted file mode 100644 index 492ed74ae..000000000 Binary files a/fuzz/corpus/2023cf6be65f362b3892de9f6f1f8b7eec51d3ef and /dev/null differ diff --git a/fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb b/fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb new file mode 100644 index 000000000..1de8b369d --- /dev/null +++ b/fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb @@ -0,0 +1 @@ +PKå4 \ No newline at end of file diff --git a/fuzz/corpus/25d7d0a438a8029f3d3a3d15163310a094573797 b/fuzz/corpus/25d7d0a438a8029f3d3a3d15163310a094573797 new file mode 100644 index 000000000..a56479bd8 Binary files /dev/null and b/fuzz/corpus/25d7d0a438a8029f3d3a3d15163310a094573797 differ diff --git a/fuzz/corpus/276d98250ec4b5946949b9ac49c4c8649c92b422 b/fuzz/corpus/276d98250ec4b5946949b9ac49c4c8649c92b422 new file mode 100644 index 000000000..5458cdb72 Binary files /dev/null and b/fuzz/corpus/276d98250ec4b5946949b9ac49c4c8649c92b422 differ diff --git a/fuzz/corpus/2bc2e50780867fcce228c61c332e93862a7b8716 b/fuzz/corpus/2bc2e50780867fcce228c61c332e93862a7b8716 deleted file mode 100644 index 976ae7a0a..000000000 Binary files a/fuzz/corpus/2bc2e50780867fcce228c61c332e93862a7b8716 and /dev/null differ diff --git a/fuzz/corpus/2e0ed4d19120c0095af1391325e73d763e97268d b/fuzz/corpus/2e0ed4d19120c0095af1391325e73d763e97268d new file mode 100644 index 000000000..216e4f3a7 --- /dev/null +++ b/fuzz/corpus/2e0ed4d19120c0095af1391325e73d763e97268d @@ -0,0 +1 @@ + diff --git a/fuzz/corpus/4c3dbea65792c1641b2c985d52aa0d2631a2c9c7 b/fuzz/corpus/4c3dbea65792c1641b2c985d52aa0d2631a2c9c7 new file mode 100644 index 000000000..97500787d Binary files /dev/null and b/fuzz/corpus/4c3dbea65792c1641b2c985d52aa0d2631a2c9c7 differ diff --git a/fuzz/corpus/4f48be8a6966a19677115dfe5119ac7aa2ae667e b/fuzz/corpus/4f48be8a6966a19677115dfe5119ac7aa2ae667e deleted file mode 100644 index c73b80a1d..000000000 Binary files a/fuzz/corpus/4f48be8a6966a19677115dfe5119ac7aa2ae667e and /dev/null differ diff --git a/fuzz/corpus/50127775b46b8b432f85928ab5d21cc9a5b9916a b/fuzz/corpus/50127775b46b8b432f85928ab5d21cc9a5b9916a deleted file mode 100644 index 42936cac1..000000000 --- a/fuzz/corpus/50127775b46b8b432f85928ab5d21cc9a5b9916a +++ /dev/null @@ -1,5 +0,0 @@ - - cackl/City> - peace_for_ukraine - - \ No newline at end of file diff --git a/fuzz/corpus/6b78a49f2f81c15b54c1bc271903f8c721e7667e b/fuzz/corpus/610152b2a025155f15cec304e817b6de274301ed similarity index 65% rename from fuzz/corpus/6b78a49f2f81c15b54c1bc271903f8c721e7667e rename to fuzz/corpus/610152b2a025155f15cec304e817b6de274301ed index 620e4ee97..dcf4ff16e 100644 Binary files a/fuzz/corpus/6b78a49f2f81c15b54c1bc271903f8c721e7667e and b/fuzz/corpus/610152b2a025155f15cec304e817b6de274301ed differ diff --git a/fuzz/corpus/67ddf5d2d4ee8b1862715ef44aeecd9f426dce8c b/fuzz/corpus/67ddf5d2d4ee8b1862715ef44aeecd9f426dce8c new file mode 100644 index 000000000..7838e2e60 Binary files /dev/null and b/fuzz/corpus/67ddf5d2d4ee8b1862715ef44aeecd9f426dce8c differ diff --git a/fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42 b/fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42 new file mode 100644 index 000000000..a5bda3b1c --- /dev/null +++ b/fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42 @@ -0,0 +1,2 @@ +="cackl/Cit䯉 ace_for_ukraine + \ No newline at end of file diff --git a/fuzz/corpus/77a11b466eeb44409a77ffbd135e944a51a0c967 b/fuzz/corpus/77a11b466eeb44409a77ffbd135e944a51a0c967 new file mode 100644 index 000000000..ae54bdd80 Binary files /dev/null and b/fuzz/corpus/77a11b466eeb44409a77ffbd135e944a51a0c967 differ diff --git a/fuzz/corpus/7b328d9887fccefef2d299580e2d8d4b9ba3d901 b/fuzz/corpus/7b328d9887fccefef2d299580e2d8d4b9ba3d901 deleted file mode 100644 index 8f14ba41b..000000000 Binary files a/fuzz/corpus/7b328d9887fccefef2d299580e2d8d4b9ba3d901 and /dev/null differ diff --git a/fuzz/corpus/c7832116c548a936d02dba5ba9ea32ca2a61855b b/fuzz/corpus/7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd similarity index 57% rename from fuzz/corpus/c7832116c548a936d02dba5ba9ea32ca2a61855b rename to fuzz/corpus/7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd index f52f859e5..8e3704d47 100644 Binary files a/fuzz/corpus/c7832116c548a936d02dba5ba9ea32ca2a61855b and b/fuzz/corpus/7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd differ diff --git a/fuzz/corpus/826bcabe227bcaf79bf5238c84be3fcf93827222 b/fuzz/corpus/826bcabe227bcaf79bf5238c84be3fcf93827222 deleted file mode 100644 index 5e97fe373..000000000 Binary files a/fuzz/corpus/826bcabe227bcaf79bf5238c84be3fcf93827222 and /dev/null differ diff --git a/fuzz/corpus/898ebe807cbb9dac8f0223f66cd82b736d79f2bc b/fuzz/corpus/898ebe807cbb9dac8f0223f66cd82b736d79f2bc new file mode 100644 index 000000000..de706fae1 Binary files /dev/null and b/fuzz/corpus/898ebe807cbb9dac8f0223f66cd82b736d79f2bc differ diff --git a/fuzz/corpus/8e97bf37cf7ab00ab5b748b108e331a60394d956 b/fuzz/corpus/8e97bf37cf7ab00ab5b748b108e331a60394d956 new file mode 100644 index 000000000..4fa45b741 --- /dev/null +++ b/fuzz/corpus/8e97bf37cf7ab00ab5b748b108e331a60394d956 @@ -0,0 +1 @@ +pwd : "cace!" diff --git a/fuzz/corpus/8eae777ab67ed2d99960854e6cd0c5a737ad4208 b/fuzz/corpus/8eae777ab67ed2d99960854e6cd0c5a737ad4208 new file mode 100644 index 000000000..0fa560994 Binary files /dev/null and b/fuzz/corpus/8eae777ab67ed2d99960854e6cd0c5a737ad4208 differ diff --git a/fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0 b/fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0 new file mode 100644 index 000000000..eeab189f6 --- /dev/null +++ b/fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0 @@ -0,0 +1 @@ +{"test.domain.io/actual-configuration": "{\"apiVersion\":\"v1\",\"data\":{\"smtp-password\":\"\",\"wordpresdpress-wordpress\",\"chart\":\"wordp:\"wordpress\"},\"name\":\"wordpress-wopaque\"}\n"} \ No newline at end of file diff --git a/fuzz/corpus/05d0262facb829f463770b7b5bebfa0ae2b91cd1 b/fuzz/corpus/91872b929eee06159ae9f2704dd7ce10c46613d0 similarity index 56% rename from fuzz/corpus/05d0262facb829f463770b7b5bebfa0ae2b91cd1 rename to fuzz/corpus/91872b929eee06159ae9f2704dd7ce10c46613d0 index c638cbcc4..b241bef4f 100644 Binary files a/fuzz/corpus/05d0262facb829f463770b7b5bebfa0ae2b91cd1 and b/fuzz/corpus/91872b929eee06159ae9f2704dd7ce10c46613d0 differ diff --git a/fuzz/corpus/9252fd7a390bc4e1eaf6afd6eab7abedda07953e b/fuzz/corpus/9252fd7a390bc4e1eaf6afd6eab7abedda07953e new file mode 100644 index 000000000..0e25d6a84 Binary files /dev/null and b/fuzz/corpus/9252fd7a390bc4e1eaf6afd6eab7abedda07953e differ diff --git a/fuzz/corpus/b272cbeed5b4fd5774ab0c108e3d05bc67ae734f b/fuzz/corpus/97fbb25a4fc896e423c47f54e3b1b8b4c79560e6 similarity index 91% rename from fuzz/corpus/b272cbeed5b4fd5774ab0c108e3d05bc67ae734f rename to fuzz/corpus/97fbb25a4fc896e423c47f54e3b1b8b4c79560e6 index cca02ac52..3bfc49c25 100644 Binary files a/fuzz/corpus/b272cbeed5b4fd5774ab0c108e3d05bc67ae734f and b/fuzz/corpus/97fbb25a4fc896e423c47f54e3b1b8b4c79560e6 differ diff --git a/fuzz/corpus/9e17092d3faa95169b6eb6da9ac36d4650175281 b/fuzz/corpus/9e17092d3faa95169b6eb6da9ac36d4650175281 new file mode 100644 index 000000000..cab2ed3e0 --- /dev/null +++ b/fuzz/corpus/9e17092d3faa95169b6eb6da9ac36d4650175281 @@ -0,0 +1,4 @@ +IREOGIogicr_gireAbody: + WM824c3 +sk_liv,e_gireogicracklea)pGI: !!binary | + H4sICv2xH9UVPREO \ No newline at end of file diff --git a/fuzz/corpus/9e5b0e206c5e57b929e482e6ae5abf8b036367ef b/fuzz/corpus/9e5b0e206c5e57b929e482e6ae5abf8b036367ef new file mode 100644 index 000000000..2aacd9ac5 --- /dev/null +++ b/fuzz/corpus/9e5b0e206c5e57b929e482e6ae5abf8b036367ef @@ -0,0 +1,2 @@ +body: +- stringna6@^ame\":---ordpress-wordp485 diff --git a/fuzz/corpus/6b8737b1e00c72a699151913c883a2a5df41dd76 b/fuzz/corpus/a3bbfcce88dcd5e14053bcb5b772e0df581c4baf similarity index 70% rename from fuzz/corpus/6b8737b1e00c72a699151913c883a2a5df41dd76 rename to fuzz/corpus/a3bbfcce88dcd5e14053bcb5b772e0df581c4baf index 41e69ddf5..ffe184d50 100644 Binary files a/fuzz/corpus/6b8737b1e00c72a699151913c883a2a5df41dd76 and b/fuzz/corpus/a3bbfcce88dcd5e14053bcb5b772e0df581c4baf differ diff --git a/fuzz/corpus/b9c8aa807a927cee3b6f4ec1475679385db74f9a b/fuzz/corpus/b4f57c1be7888ff72e68d0858ba92f9c68066b25 similarity index 84% rename from fuzz/corpus/b9c8aa807a927cee3b6f4ec1475679385db74f9a rename to fuzz/corpus/b4f57c1be7888ff72e68d0858ba92f9c68066b25 index 0960e866c..9c3fe1146 100644 Binary files a/fuzz/corpus/b9c8aa807a927cee3b6f4ec1475679385db74f9a and b/fuzz/corpus/b4f57c1be7888ff72e68d0858ba92f9c68066b25 differ diff --git a/fuzz/corpus/bea99aa4703268b72289e7b88738c31160417b14 b/fuzz/corpus/bea99aa4703268b72289e7b88738c31160417b14 new file mode 100644 index 000000000..7eddd1eb4 Binary files /dev/null and b/fuzz/corpus/bea99aa4703268b72289e7b88738c31160417b14 differ diff --git a/fuzz/corpus/c35f59c3f064a5c96da46a4f88887d41a85dd669 b/fuzz/corpus/c35f59c3f064a5c96da46a4f88887d41a85dd669 new file mode 100644 index 000000000..a0216d14b Binary files /dev/null and b/fuzz/corpus/c35f59c3f064a5c96da46a4f88887d41a85dd669 differ diff --git a/fuzz/corpus/c3dcde8d12dcc6579f2c38bbd54e28a0200babce b/fuzz/corpus/c3dcde8d12dcc6579f2c38bbd54e28a0200babce new file mode 100644 index 000000000..3375270b5 Binary files /dev/null and b/fuzz/corpus/c3dcde8d12dcc6579f2c38bbd54e28a0200babce differ diff --git a/fuzz/corpus/e1182b80ce7fddf42850af97edf022a5fd61e421 b/fuzz/corpus/c7b7936a53fa182a83740574917b635b8647a4fa similarity index 78% rename from fuzz/corpus/e1182b80ce7fddf42850af97edf022a5fd61e421 rename to fuzz/corpus/c7b7936a53fa182a83740574917b635b8647a4fa index ac01e03f0..8b3cc05d4 100644 Binary files a/fuzz/corpus/e1182b80ce7fddf42850af97edf022a5fd61e421 and b/fuzz/corpus/c7b7936a53fa182a83740574917b635b8647a4fa differ diff --git a/fuzz/corpus/ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d b/fuzz/corpus/ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d deleted file mode 100644 index 62a03cb3a..000000000 Binary files a/fuzz/corpus/ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d and /dev/null differ diff --git a/fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28 b/fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28 deleted file mode 100644 index 4a8226f6a..000000000 --- a/fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28 +++ /dev/null @@ -1 +0,0 @@ -PK˙ö \ No newline at end of file diff --git a/fuzz/corpus/c5f4facd0a5b1a657433ca68154e33a7e48bb663 b/fuzz/corpus/cd625ab558a1b28ad90d1083d64b2096a52f401d similarity index 92% rename from fuzz/corpus/c5f4facd0a5b1a657433ca68154e33a7e48bb663 rename to fuzz/corpus/cd625ab558a1b28ad90d1083d64b2096a52f401d index a98b5606e..5dd16dd20 100644 Binary files a/fuzz/corpus/c5f4facd0a5b1a657433ca68154e33a7e48bb663 and b/fuzz/corpus/cd625ab558a1b28ad90d1083d64b2096a52f401d differ diff --git a/fuzz/corpus/eeaaa652952fef8117a481d1d7d2438fc815f646 b/fuzz/corpus/d2ec3276043232db9afb98075728e963a5b8e369 similarity index 65% rename from fuzz/corpus/eeaaa652952fef8117a481d1d7d2438fc815f646 rename to fuzz/corpus/d2ec3276043232db9afb98075728e963a5b8e369 index 043d2955b..9f592e64c 100644 Binary files a/fuzz/corpus/eeaaa652952fef8117a481d1d7d2438fc815f646 and b/fuzz/corpus/d2ec3276043232db9afb98075728e963a5b8e369 differ diff --git a/fuzz/corpus/1188838daba5b306b8c7bf4142b0e33e19567be1 b/fuzz/corpus/dfe1dd788a4b4cad55dd9803c46d0aa272d665d7 similarity index 69% rename from fuzz/corpus/1188838daba5b306b8c7bf4142b0e33e19567be1 rename to fuzz/corpus/dfe1dd788a4b4cad55dd9803c46d0aa272d665d7 index 0b5d5d90f..049a9bcb0 100644 Binary files a/fuzz/corpus/1188838daba5b306b8c7bf4142b0e33e19567be1 and b/fuzz/corpus/dfe1dd788a4b4cad55dd9803c46d0aa272d665d7 differ diff --git a/fuzz/corpus/e10c04a62961bd591639c79e484993930e205132 b/fuzz/corpus/e10c04a62961bd591639c79e484993930e205132 deleted file mode 100644 index 8b4a4d503..000000000 Binary files a/fuzz/corpus/e10c04a62961bd591639c79e484993930e205132 and /dev/null differ diff --git a/fuzz/corpus/e245e2f884af2d01bea253673200a29f51363fa2 b/fuzz/corpus/e245e2f884af2d01bea253673200a29f51363fa2 deleted file mode 100644 index cf8ca01e0..000000000 Binary files a/fuzz/corpus/e245e2f884af2d01bea253673200a29f51363fa2 and /dev/null differ diff --git a/fuzz/corpus/e5a8d3e8ac6b758fde8c6315baf28e825d029ea8 b/fuzz/corpus/e5a8d3e8ac6b758fde8c6315baf28e825d029ea8 deleted file mode 100644 index 3cfa55284..000000000 Binary files a/fuzz/corpus/e5a8d3e8ac6b758fde8c6315baf28e825d029ea8 and /dev/null differ diff --git a/fuzz/corpus/e7f52f8df718abe22a791204ce57b71c6f2638e5 b/fuzz/corpus/e7f52f8df718abe22a791204ce57b71c6f2638e5 deleted file mode 100644 index 4201ea197..000000000 Binary files a/fuzz/corpus/e7f52f8df718abe22a791204ce57b71c6f2638e5 and /dev/null differ diff --git a/fuzz/corpus/e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0 b/fuzz/corpus/e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0 deleted file mode 100644 index ef458cdf8..000000000 Binary files a/fuzz/corpus/e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0 and /dev/null differ diff --git a/fuzz/corpus/f2cb21a8411c6b0dfb2ce1fce303b61126dea477 b/fuzz/corpus/f2cb21a8411c6b0dfb2ce1fce303b61126dea477 new file mode 100644 index 000000000..63baff04f Binary files /dev/null and b/fuzz/corpus/f2cb21a8411c6b0dfb2ce1fce303b61126dea477 differ diff --git a/fuzz/corpus/eaacd823db36dbd3ca4241c26eaf311de7528af4 b/fuzz/corpus/f7cf03173db4f86b52c8d5bd6369387535d4bb60 similarity index 93% rename from fuzz/corpus/eaacd823db36dbd3ca4241c26eaf311de7528af4 rename to fuzz/corpus/f7cf03173db4f86b52c8d5bd6369387535d4bb60 index 2782d6b05..165fe32d9 100644 Binary files a/fuzz/corpus/eaacd823db36dbd3ca4241c26eaf311de7528af4 and b/fuzz/corpus/f7cf03173db4f86b52c8d5bd6369387535d4bb60 differ diff --git a/fuzz/corpus/fd8463c8426b79d944d2ff54554982c98617c3c8 b/fuzz/corpus/fd8463c8426b79d944d2ff54554982c98617c3c8 deleted file mode 100644 index e06a86e49..000000000 Binary files a/fuzz/corpus/fd8463c8426b79d944d2ff54554982c98617c3c8 and /dev/null differ diff --git a/tests/__init__.py b/tests/__init__.py index 038064695..dbc8c0c01 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ from pathlib import Path # total number of files in test samples, included .gitignore -SAMPLES_FILES_COUNT: int = 52 +SAMPLES_FILES_COUNT: int = 54 # credentials count after scan SAMPLES_CRED_COUNT: int = 51 @@ -12,8 +12,8 @@ # archived credentials that not found without --depth SAMPLES_IN_DEEP_1 = 6 -SAMPLES_IN_DEEP_2 = 7 -SAMPLES_IN_DEEP_3 = 8 +SAMPLES_IN_DEEP_2 = 8 +SAMPLES_IN_DEEP_3 = 9 SAMPLES_FILTERED_BY_POST_COUNT = 1 diff --git a/tests/samples/binary.yaml b/tests/samples/binary.yaml new file mode 100644 index 000000000..4b22e0351 --- /dev/null +++ b/tests/samples/binary.yaml @@ -0,0 +1,17 @@ +body: + string: !!binary | + H4sICIur8mIAA3BlbV9rZXkAbdM3kqNAAEDRnFNMTk3hEQo2oAG1sKIBASIbnLDC29OvifenP37f + 338CClStL8cVv2xH9UVP+dKV19/xjZmqKoWiCkRRB28kDVBf6gclZ5eziCoev5PDXHm1v2+e1K96 + xmZSRN7sYSzJJKa1KA81Qn6/3Bu/PntsazUobD6K9CqDSSU/DO7ZTMsy3T6JdAYAXRzderrZ1CLH + dGHtxxTBVPhUR/xzDnBuIa/N3ZoqfkYcRk2Ua48SqLM0tnLS60kYm5p8OGx29Ug2ijZVFpEIxA6K + t7KqO47HB3hYgkk6/vHjiOGJ47s33IFRYMy8s/7bnEeEB8pbqorO2zqa0U0gLhp0Xx+n7UBkMo2Z + e3q2qrVYprayry8pbbn0NTCh1xl1baycQWO9qvqPmylDXFfcj3jzLw2d4MnndMyAxGM+F1qHkrQz + WnbfMHhE0vlqlBxHtLH72hUJITkTNz4vVRRicKmBymZmFM3sZ0oOuqNo/Xh9spHx+y5TcKunBzxi + +lU0U+LHOhERXIMfFbecPNmf2tjm9qbClmfKBhNrRdwlg7ujmI7RyIKjGxMzaIlCsWkzOp2Hf2GO + G0sV9uRI15bn9bHIHte77WlLxxDXievxaYD7o7lhBmnJM+vW3VS94aaJt7o5HGqJiM3WqoqnqCQF + yTk3djp0+zQh+CkEDpxSRSxMMIBeoddqPY71ULkaC/mzvrhkU+nzTFefg8ZJ0p9ANiINiBqUKPPN + PY6046xN5kHpPEZ7hx0d9168EHkxekIW32vvpLO+wZ5XHyEXnS+qi0w/FEqq5YKnZ9gnfRiaCpCf + hkNhSgjArlzczq1+8mfhX0oqUWAfC0LBWeAVnUEII4y5TBqHpgCftKOz0ozZ78KahsX5vGSeQMt8 + SEzJdnWiEyf4UdLYnvyF/cOjWPJ/Uf0Gdno9KXQDAAA= + secret: | + we5345d0f3da48544z1t1e275y05i161x995q485 diff --git a/tests/samples/struct.json b/tests/samples/struct.json new file mode 100644 index 000000000..873c92034 --- /dev/null +++ b/tests/samples/struct.json @@ -0,0 +1 @@ +{"test.domain.io/actual-configuration": "{\"apiVersion\":\"v1\",\"data\":{\"smtp-password\":\"\",\"wordpress-password\":\"Axt4T0eO0lm9sS==\"},\"kind\":\"Secret\",\"metadata\":{\"annotations\":{},\"labels\":{\"app\":\"wordpress-wordpress\",\"chart\":\"wordpress-5.0.1\",\"heritage\":\"Tiller\",\"release\":\"wordpress\"},\"name\":\"wordpress-wordpress\",\"namespace\":\"argocd\"},\"type\":\"Opaque\"}\n"} \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py index 67a074b0d..27bc6bfc6 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -362,8 +362,57 @@ def test_zip_p(self) -> None: # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_json_p(self) -> None: + # test for finding credentials in JSON + content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "struct.json"]) + # depth must be set in constructor to remove .zip as ignored extension + cred_sweeper = CredSweeper(depth=5) + cred_sweeper.run(content_provider=content_provider) + found_credentials = cred_sweeper.credential_manager.get_credentials() + assert len(found_credentials) == 1 + assert {"Password"} == set(i.rule_name for i in found_credentials) + assert {"Axt4T0eO0lm9sS=="} == set(i.line_data_list[0].value for i in found_credentials) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_json_n(self) -> None: + # test to prove that no credentials are found without depth + content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "struct.json"]) + # depth must be set in constructor to remove .zip as ignored extension + cred_sweeper = CredSweeper() + cred_sweeper.run(content_provider=content_provider) + found_credentials = cred_sweeper.credential_manager.get_credentials() + assert len(found_credentials) == 0 + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_yaml_p(self) -> None: + # test for finding credentials in YAML + content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "binary.yaml"]) + # depth must be set in constructor to remove .zip as ignored extension + cred_sweeper = CredSweeper(depth=5) + cred_sweeper.run(content_provider=content_provider) + found_credentials = cred_sweeper.credential_manager.get_credentials() + assert len(found_credentials) == 2 + assert {"Secret", "PEM Certificate"} == set(i.rule_name for i in found_credentials) + assert {"we5345d0f3da48544z1t1e275y05i161x995q485\n", "-----BEGIN RSA PRIVATE"} == \ + set(i.line_data_list[0].value for i in found_credentials) + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + + def test_yaml_n(self) -> None: + # test to prove that no credentials are found without depth + content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "binary.yaml"]) + # depth must be set in constructor to remove .zip as ignored extension + cred_sweeper = CredSweeper() + cred_sweeper.run(content_provider=content_provider) + found_credentials = cred_sweeper.credential_manager.get_credentials() + assert len(found_credentials) == 0 + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + def test_encoded_p(self) -> None: - # test for finding credentials in docx + # test for finding credentials in ENCODED data content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "encoded"]) # depth must be set in constructor to remove .zip as ignored extension cred_sweeper = CredSweeper(depth=5)