diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml
index 48affa03f..e5961c2d0 100644
--- a/.github/workflows/fuzz.yml
+++ b/.github/workflows/fuzz.yml
@@ -78,8 +78,9 @@ jobs:
run: |
COVERAGE=$(tail -1 report.txt | awk '{print $6}' | tr --delete '%')
# additionally check correctness of the value - should be an integer
- if ! [ 75 -le ${COVERAGE} ]; then
- echo "Fuzzing coverage '${COVERAGE}' does not satisfy the limit 74%"
+ FUZZ_COVERAGE_LIMIT=75
+ if ! [ ${FUZZ_COVERAGE_LIMIT} -le ${COVERAGE} ]; then
+ echo "Fuzzing coverage '${COVERAGE}' does not satisfy the limit ${FUZZ_COVERAGE_LIMIT}%"
exit 1
fi
diff --git a/cicd/mypy_warnings.txt b/cicd/mypy_warnings.txt
index 503915c85..ce61fd05b 100644
--- a/cicd/mypy_warnings.txt
+++ b/cicd/mypy_warnings.txt
@@ -1 +1 @@
-Success: no issues found in 83 source files
+Success: no issues found in 84 source files
diff --git a/credsweeper/app.py b/credsweeper/app.py
index e0699044c..58fe45440 100644
--- a/credsweeper/app.py
+++ b/credsweeper/app.py
@@ -7,11 +7,12 @@
import signal
import sys
import zipfile
-from typing import List, Optional, Union
+from typing import List, Optional, Union, Tuple, Any
import pandas as pd
-from credsweeper.common.constants import KeyValidationOption, ThresholdPreset, RECURSIVE_SCAN_LIMITATION
+from credsweeper.common.constants import KeyValidationOption, ThresholdPreset, RECURSIVE_SCAN_LIMITATION, \
+ DEFAULT_ENCODING
from credsweeper.config import Config
from credsweeper.credentials import Candidate, CredentialManager
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
@@ -21,6 +22,7 @@
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
from credsweeper.file_handler.files_provider import FilesProvider
from credsweeper.file_handler.string_content_provider import StringContentProvider
+from credsweeper.file_handler.struct_content_provider import StructContentProvider
from credsweeper.file_handler.text_content_provider import TextContentProvider
from credsweeper.scanner import Scanner
from credsweeper.utils import Util
@@ -356,13 +358,20 @@ def data_scan(self, data_provider: DataContentProvider, depth: int, recursive_li
new_limit = recursive_limit_size - len(decoded_data_provider.data)
candidates.extend(self.data_scan(decoded_data_provider, depth, new_limit))
+ elif data_provider.represent_as_structure():
+ struct_data_provider = StructContentProvider(struct=data_provider.structure,
+ file_path=data_provider.file_path,
+ file_type=data_provider.file_type,
+ info=f"{data_provider.info}|STRUCT")
+ candidates.extend(self.struct_scan(struct_data_provider, depth, recursive_limit_size))
+
elif data_provider.represent_as_xml():
- struct_data_provider = StringContentProvider(lines=data_provider.lines,
+ string_data_provider = StringContentProvider(lines=data_provider.lines,
line_numbers=data_provider.line_numbers,
file_path=data_provider.file_path,
file_type=".xml",
info=f"{data_provider.info}|XML")
- candidates.extend(self.file_scan(struct_data_provider))
+ candidates.extend(self.file_scan(string_data_provider))
else:
# finally try scan the data via byte content provider
@@ -378,6 +387,83 @@ def data_scan(self, data_provider: DataContentProvider, depth: int, recursive_li
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+ def struct_scan(self, struct_provider: StructContentProvider, depth: int, recursive_limit_size: int) -> \
+ List[Candidate]:
+ """Recursive function to scan structured data
+
+ Args:
+ struct_provider: DataContentProvider object may be a container
+ depth: maximal level of recursion
+ recursive_limit_size: maximal bytes of opened files to prevent recursive zip-bomb attack
+ """
+ candidates: List[Candidate] = []
+ logger.debug("Start struct_scan: depth=%d, limit=%d, path=%s, info=%s", depth, recursive_limit_size,
+ struct_provider.file_path, struct_provider.info)
+
+ if 0 > depth:
+ # break recursion if maximal depth is reached
+ logger.debug("bottom reached %s recursive_limit_size:%d", struct_provider.file_path, recursive_limit_size)
+ return candidates
+
+ depth -= 1
+
+ items: List[Tuple[Union[int, str], Any]] = []
+ if isinstance(struct_provider.struct, dict):
+ items = list(struct_provider.struct.items())
+ elif isinstance(struct_provider.struct, list):
+ items = list(enumerate(struct_provider.struct))
+ else:
+ logger.error("Not supported type:%s val:%s", str(type(struct_provider.struct)), str(struct_provider.struct))
+
+ for key, value in items:
+ if isinstance(value, dict) or isinstance(value, list):
+ val_struct_provider = StructContentProvider(struct=value,
+ file_path=struct_provider.file_path,
+ file_type=struct_provider.file_type,
+ info=f"{struct_provider.info}|STRUCT:{key}")
+ candidates.extend(self.struct_scan(val_struct_provider, depth, recursive_limit_size))
+
+ elif isinstance(value, bytes):
+ bytes_struct_provider = DataContentProvider(data=value,
+ file_path=struct_provider.file_path,
+ file_type=struct_provider.file_type,
+ info=f"{struct_provider.info}|BYTES:{key}")
+ new_limit = recursive_limit_size - len(value)
+ new_candidates = self.data_scan(bytes_struct_provider, depth, new_limit)
+ candidates.extend(new_candidates)
+
+ elif isinstance(value, str):
+ str_struct_provider = DataContentProvider(data=value.encode(encoding=DEFAULT_ENCODING),
+ file_path=struct_provider.file_path,
+ file_type=struct_provider.file_type,
+ info=f"{struct_provider.info}|STRING:{key}")
+ new_limit = recursive_limit_size - len(str_struct_provider.data)
+ new_candidates = self.data_scan(str_struct_provider, depth, new_limit)
+ candidates.extend(new_candidates)
+
+ # use key = "value" scan for common cases like in Python code
+ if isinstance(struct_provider.struct, dict):
+ str_provider = StringContentProvider([f"{key} = \"{value}\""],
+ file_path=struct_provider.file_path,
+ file_type=".py",
+ info=f"{struct_provider.info}|STRING:`{key} = \"{value}\"`")
+ extra_candidates = self.file_scan(str_provider)
+ if extra_candidates:
+ found_values = set(line_data.value for candidate in candidates
+ for line_data in candidate.line_data_list)
+ for extra_candidate in extra_candidates:
+ for line_data in extra_candidate.line_data_list:
+ if line_data.value not in found_values:
+ candidates.append(extra_candidate)
+ break
+
+ else:
+ logger.debug("Not supported type:%s value(%s)", str(type(value)), str(value))
+
+ return candidates
+
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
def post_processing(self) -> None:
"""Machine learning validation for received credential candidates."""
if self._use_ml_validation():
diff --git a/credsweeper/file_handler/data_content_provider.py b/credsweeper/file_handler/data_content_provider.py
index efa21263a..5997d914d 100644
--- a/credsweeper/file_handler/data_content_provider.py
+++ b/credsweeper/file_handler/data_content_provider.py
@@ -1,8 +1,10 @@
import base64
+import json
import logging
import string
from typing import List, Optional
+import yaml
from credsweeper.common.constants import DEFAULT_ENCODING
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.file_handler.content_provider import ContentProvider
@@ -28,6 +30,7 @@ def __init__(
info: Optional[str] = None) -> None:
super().__init__(file_path=file_path, file_type=file_type, info=info)
self.data = data
+ self.structure = None
self.decoded: Optional[bytes] = None
self.lines: List[str] = []
self.line_numbers: List[int] = []
@@ -42,6 +45,43 @@ def data(self, data: bytes) -> None:
"""data setter"""
self.__data = data
+ def represent_as_structure(self) -> bool:
+ """Tries to convert data with many parsers. Stores result to internal structure
+ Return True if some structure found
+ """
+ try:
+ text = self.data.decode(encoding='utf-8', errors='strict')
+ except Exception:
+ return False
+ # JSON
+ try:
+ if "{" in text:
+ self.structure = json.loads(text)
+ logger.debug("CONVERTED from json")
+ else:
+ logger.debug("Data do not contain { - weak JSON")
+ except Exception as exc:
+ logger.debug("Cannot parse as json:%s %s", exc, self.data)
+ self.structure = None
+ if self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys())
+ or isinstance(self.structure, list) and 0 < len(self.structure)):
+ return True
+ # # # YAML - almost always recognized
+ try:
+ if ":" in text:
+ self.structure = yaml.load(text, Loader=yaml.FullLoader)
+ logger.debug("CONVERTED from yaml")
+ else:
+ logger.debug("Data do not contain colon mark - weak YAML")
+ except Exception as exc:
+ logger.debug("Cannot parse as yaml:%s %s", exc, self.data)
+ self.structure = None
+ if self.structure is not None and (isinstance(self.structure, dict) and 0 < len(self.structure.keys())
+ or isinstance(self.structure, list) and 0 < len(self.structure)):
+ return True
+ # # # None of above
+ return False
+
def represent_as_xml(self) -> bool:
"""Tries to read data as xml
diff --git a/credsweeper/file_handler/struct_content_provider.py b/credsweeper/file_handler/struct_content_provider.py
new file mode 100644
index 000000000..ad3f93f05
--- /dev/null
+++ b/credsweeper/file_handler/struct_content_provider.py
@@ -0,0 +1,45 @@
+import logging
+from typing import List, Optional, Any
+
+from credsweeper.file_handler.analysis_target import AnalysisTarget
+from credsweeper.file_handler.content_provider import ContentProvider
+
+logger = logging.getLogger(__name__)
+
+
+class StructContentProvider(ContentProvider):
+ """Dummy raw provider to keep structured data
+
+ Parameters:
+ struct: byte sequence to be stored.
+ file_path: optional string. Might be specified if you know true file name where lines were taken from.
+
+ """
+
+ def __init__(
+ self, #
+ struct: Any, #
+ file_path: Optional[str] = None, #
+ file_type: Optional[str] = None, #
+ info: Optional[str] = None) -> None:
+ super().__init__(file_path=file_path, file_type=file_type, info=info)
+ self.struct = struct
+
+ @property
+ def struct(self) -> Any:
+ """obj getter"""
+ return self.__struct
+
+ @struct.setter
+ def struct(self, struct: Any) -> None:
+ """obj setter"""
+ self.__struct = struct
+
+ def get_analysis_target(self) -> List[AnalysisTarget]:
+ """Return nothing. The class provides only data storage.
+
+ Raise:
+ NotImplementedError
+
+ """
+ raise NotImplementedError()
diff --git a/fuzz/corpus/096ec2ed3a11a2c4422fe445f86fc03963adf350 b/fuzz/corpus/096ec2ed3a11a2c4422fe445f86fc03963adf350
deleted file mode 100644
index 786b2d26d..000000000
Binary files a/fuzz/corpus/096ec2ed3a11a2c4422fe445f86fc03963adf350 and /dev/null differ
diff --git a/fuzz/corpus/0d36f4956137486243c60fadb4248f5b2f913d4c b/fuzz/corpus/0d36f4956137486243c60fadb4248f5b2f913d4c
new file mode 100644
index 000000000..218b14a35
Binary files /dev/null and b/fuzz/corpus/0d36f4956137486243c60fadb4248f5b2f913d4c differ
diff --git a/fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf b/fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf
new file mode 100644
index 000000000..b3edf8438
--- /dev/null
+++ b/fuzz/corpus/19521a7555bd197646dd224e3890064c2f4cf9bf
@@ -0,0 +1 @@
+{"t:ire\"l#b.ls\":\"ap\n"}
\ No newline at end of file
diff --git a/fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae b/fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae
deleted file mode 100644
index 6ac557290..000000000
--- a/fuzz/corpus/1df555ea6ab8f834626d3002c2d3eaf7746450ae
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/fuzz/corpus/2023cf6be65f362b3892de9f6f1f8b7eec51d3ef b/fuzz/corpus/2023cf6be65f362b3892de9f6f1f8b7eec51d3ef
deleted file mode 100644
index 492ed74ae..000000000
Binary files a/fuzz/corpus/2023cf6be65f362b3892de9f6f1f8b7eec51d3ef and /dev/null differ
diff --git a/fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb b/fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb
new file mode 100644
index 000000000..1de8b369d
--- /dev/null
+++ b/fuzz/corpus/248b61d9c284b005868a4c0a80854281f99a7bdb
@@ -0,0 +1 @@
+PKå4
\ No newline at end of file
diff --git a/fuzz/corpus/25d7d0a438a8029f3d3a3d15163310a094573797 b/fuzz/corpus/25d7d0a438a8029f3d3a3d15163310a094573797
new file mode 100644
index 000000000..a56479bd8
Binary files /dev/null and b/fuzz/corpus/25d7d0a438a8029f3d3a3d15163310a094573797 differ
diff --git a/fuzz/corpus/276d98250ec4b5946949b9ac49c4c8649c92b422 b/fuzz/corpus/276d98250ec4b5946949b9ac49c4c8649c92b422
new file mode 100644
index 000000000..5458cdb72
Binary files /dev/null and b/fuzz/corpus/276d98250ec4b5946949b9ac49c4c8649c92b422 differ
diff --git a/fuzz/corpus/2bc2e50780867fcce228c61c332e93862a7b8716 b/fuzz/corpus/2bc2e50780867fcce228c61c332e93862a7b8716
deleted file mode 100644
index 976ae7a0a..000000000
Binary files a/fuzz/corpus/2bc2e50780867fcce228c61c332e93862a7b8716 and /dev/null differ
diff --git a/fuzz/corpus/2e0ed4d19120c0095af1391325e73d763e97268d b/fuzz/corpus/2e0ed4d19120c0095af1391325e73d763e97268d
new file mode 100644
index 000000000..216e4f3a7
--- /dev/null
+++ b/fuzz/corpus/2e0ed4d19120c0095af1391325e73d763e97268d
@@ -0,0 +1 @@
+
diff --git a/fuzz/corpus/4c3dbea65792c1641b2c985d52aa0d2631a2c9c7 b/fuzz/corpus/4c3dbea65792c1641b2c985d52aa0d2631a2c9c7
new file mode 100644
index 000000000..97500787d
Binary files /dev/null and b/fuzz/corpus/4c3dbea65792c1641b2c985d52aa0d2631a2c9c7 differ
diff --git a/fuzz/corpus/4f48be8a6966a19677115dfe5119ac7aa2ae667e b/fuzz/corpus/4f48be8a6966a19677115dfe5119ac7aa2ae667e
deleted file mode 100644
index c73b80a1d..000000000
Binary files a/fuzz/corpus/4f48be8a6966a19677115dfe5119ac7aa2ae667e and /dev/null differ
diff --git a/fuzz/corpus/50127775b46b8b432f85928ab5d21cc9a5b9916a b/fuzz/corpus/50127775b46b8b432f85928ab5d21cc9a5b9916a
deleted file mode 100644
index 42936cac1..000000000
--- a/fuzz/corpus/50127775b46b8b432f85928ab5d21cc9a5b9916a
+++ /dev/null
@@ -1,5 +0,0 @@
-
- cackl/City>
- peace_for_ukraine
-
-
\ No newline at end of file
diff --git a/fuzz/corpus/6b78a49f2f81c15b54c1bc271903f8c721e7667e b/fuzz/corpus/610152b2a025155f15cec304e817b6de274301ed
similarity index 65%
rename from fuzz/corpus/6b78a49f2f81c15b54c1bc271903f8c721e7667e
rename to fuzz/corpus/610152b2a025155f15cec304e817b6de274301ed
index 620e4ee97..dcf4ff16e 100644
Binary files a/fuzz/corpus/6b78a49f2f81c15b54c1bc271903f8c721e7667e and b/fuzz/corpus/610152b2a025155f15cec304e817b6de274301ed differ
diff --git a/fuzz/corpus/67ddf5d2d4ee8b1862715ef44aeecd9f426dce8c b/fuzz/corpus/67ddf5d2d4ee8b1862715ef44aeecd9f426dce8c
new file mode 100644
index 000000000..7838e2e60
Binary files /dev/null and b/fuzz/corpus/67ddf5d2d4ee8b1862715ef44aeecd9f426dce8c differ
diff --git a/fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42 b/fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42
new file mode 100644
index 000000000..a5bda3b1c
--- /dev/null
+++ b/fuzz/corpus/76dadf69d4d7273035b509c4fb172df1a1e3aa42
@@ -0,0 +1,2 @@
+="cackl/Cit䯉 ace_for_ukraine
+
\ No newline at end of file
diff --git a/fuzz/corpus/77a11b466eeb44409a77ffbd135e944a51a0c967 b/fuzz/corpus/77a11b466eeb44409a77ffbd135e944a51a0c967
new file mode 100644
index 000000000..ae54bdd80
Binary files /dev/null and b/fuzz/corpus/77a11b466eeb44409a77ffbd135e944a51a0c967 differ
diff --git a/fuzz/corpus/7b328d9887fccefef2d299580e2d8d4b9ba3d901 b/fuzz/corpus/7b328d9887fccefef2d299580e2d8d4b9ba3d901
deleted file mode 100644
index 8f14ba41b..000000000
Binary files a/fuzz/corpus/7b328d9887fccefef2d299580e2d8d4b9ba3d901 and /dev/null differ
diff --git a/fuzz/corpus/c7832116c548a936d02dba5ba9ea32ca2a61855b b/fuzz/corpus/7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd
similarity index 57%
rename from fuzz/corpus/c7832116c548a936d02dba5ba9ea32ca2a61855b
rename to fuzz/corpus/7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd
index f52f859e5..8e3704d47 100644
Binary files a/fuzz/corpus/c7832116c548a936d02dba5ba9ea32ca2a61855b and b/fuzz/corpus/7c6eecb1b52a22e187271bcfc4fab3f52c7bf8cd differ
diff --git a/fuzz/corpus/826bcabe227bcaf79bf5238c84be3fcf93827222 b/fuzz/corpus/826bcabe227bcaf79bf5238c84be3fcf93827222
deleted file mode 100644
index 5e97fe373..000000000
Binary files a/fuzz/corpus/826bcabe227bcaf79bf5238c84be3fcf93827222 and /dev/null differ
diff --git a/fuzz/corpus/898ebe807cbb9dac8f0223f66cd82b736d79f2bc b/fuzz/corpus/898ebe807cbb9dac8f0223f66cd82b736d79f2bc
new file mode 100644
index 000000000..de706fae1
Binary files /dev/null and b/fuzz/corpus/898ebe807cbb9dac8f0223f66cd82b736d79f2bc differ
diff --git a/fuzz/corpus/8e97bf37cf7ab00ab5b748b108e331a60394d956 b/fuzz/corpus/8e97bf37cf7ab00ab5b748b108e331a60394d956
new file mode 100644
index 000000000..4fa45b741
--- /dev/null
+++ b/fuzz/corpus/8e97bf37cf7ab00ab5b748b108e331a60394d956
@@ -0,0 +1 @@
+pwd : "cace!"
diff --git a/fuzz/corpus/8eae777ab67ed2d99960854e6cd0c5a737ad4208 b/fuzz/corpus/8eae777ab67ed2d99960854e6cd0c5a737ad4208
new file mode 100644
index 000000000..0fa560994
Binary files /dev/null and b/fuzz/corpus/8eae777ab67ed2d99960854e6cd0c5a737ad4208 differ
diff --git a/fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0 b/fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0
new file mode 100644
index 000000000..eeab189f6
--- /dev/null
+++ b/fuzz/corpus/90ad557dfb83213e837631d70ed888c7cd8bd2e0
@@ -0,0 +1 @@
+{"test.domain.io/actual-configuration": "{\"apiVersion\":\"v1\",\"data\":{\"smtp-password\":\"\",\"wordpresdpress-wordpress\",\"chart\":\"wordp:\"wordpress\"},\"name\":\"wordpress-wopaque\"}\n"}
\ No newline at end of file
diff --git a/fuzz/corpus/05d0262facb829f463770b7b5bebfa0ae2b91cd1 b/fuzz/corpus/91872b929eee06159ae9f2704dd7ce10c46613d0
similarity index 56%
rename from fuzz/corpus/05d0262facb829f463770b7b5bebfa0ae2b91cd1
rename to fuzz/corpus/91872b929eee06159ae9f2704dd7ce10c46613d0
index c638cbcc4..b241bef4f 100644
Binary files a/fuzz/corpus/05d0262facb829f463770b7b5bebfa0ae2b91cd1 and b/fuzz/corpus/91872b929eee06159ae9f2704dd7ce10c46613d0 differ
diff --git a/fuzz/corpus/9252fd7a390bc4e1eaf6afd6eab7abedda07953e b/fuzz/corpus/9252fd7a390bc4e1eaf6afd6eab7abedda07953e
new file mode 100644
index 000000000..0e25d6a84
Binary files /dev/null and b/fuzz/corpus/9252fd7a390bc4e1eaf6afd6eab7abedda07953e differ
diff --git a/fuzz/corpus/b272cbeed5b4fd5774ab0c108e3d05bc67ae734f b/fuzz/corpus/97fbb25a4fc896e423c47f54e3b1b8b4c79560e6
similarity index 91%
rename from fuzz/corpus/b272cbeed5b4fd5774ab0c108e3d05bc67ae734f
rename to fuzz/corpus/97fbb25a4fc896e423c47f54e3b1b8b4c79560e6
index cca02ac52..3bfc49c25 100644
Binary files a/fuzz/corpus/b272cbeed5b4fd5774ab0c108e3d05bc67ae734f and b/fuzz/corpus/97fbb25a4fc896e423c47f54e3b1b8b4c79560e6 differ
diff --git a/fuzz/corpus/9e17092d3faa95169b6eb6da9ac36d4650175281 b/fuzz/corpus/9e17092d3faa95169b6eb6da9ac36d4650175281
new file mode 100644
index 000000000..cab2ed3e0
--- /dev/null
+++ b/fuzz/corpus/9e17092d3faa95169b6eb6da9ac36d4650175281
@@ -0,0 +1,4 @@
+IREOGIogicr_gireAbody:
+ WM824c3
+sk_liv,e_gireogicracklea)pGI: !!binary |
+ H4sICv2xH9UVPREO
\ No newline at end of file
diff --git a/fuzz/corpus/9e5b0e206c5e57b929e482e6ae5abf8b036367ef b/fuzz/corpus/9e5b0e206c5e57b929e482e6ae5abf8b036367ef
new file mode 100644
index 000000000..2aacd9ac5
--- /dev/null
+++ b/fuzz/corpus/9e5b0e206c5e57b929e482e6ae5abf8b036367ef
@@ -0,0 +1,2 @@
+body:
+- stringna6@^ame\":---ordpress-wordp485
diff --git a/fuzz/corpus/6b8737b1e00c72a699151913c883a2a5df41dd76 b/fuzz/corpus/a3bbfcce88dcd5e14053bcb5b772e0df581c4baf
similarity index 70%
rename from fuzz/corpus/6b8737b1e00c72a699151913c883a2a5df41dd76
rename to fuzz/corpus/a3bbfcce88dcd5e14053bcb5b772e0df581c4baf
index 41e69ddf5..ffe184d50 100644
Binary files a/fuzz/corpus/6b8737b1e00c72a699151913c883a2a5df41dd76 and b/fuzz/corpus/a3bbfcce88dcd5e14053bcb5b772e0df581c4baf differ
diff --git a/fuzz/corpus/b9c8aa807a927cee3b6f4ec1475679385db74f9a b/fuzz/corpus/b4f57c1be7888ff72e68d0858ba92f9c68066b25
similarity index 84%
rename from fuzz/corpus/b9c8aa807a927cee3b6f4ec1475679385db74f9a
rename to fuzz/corpus/b4f57c1be7888ff72e68d0858ba92f9c68066b25
index 0960e866c..9c3fe1146 100644
Binary files a/fuzz/corpus/b9c8aa807a927cee3b6f4ec1475679385db74f9a and b/fuzz/corpus/b4f57c1be7888ff72e68d0858ba92f9c68066b25 differ
diff --git a/fuzz/corpus/bea99aa4703268b72289e7b88738c31160417b14 b/fuzz/corpus/bea99aa4703268b72289e7b88738c31160417b14
new file mode 100644
index 000000000..7eddd1eb4
Binary files /dev/null and b/fuzz/corpus/bea99aa4703268b72289e7b88738c31160417b14 differ
diff --git a/fuzz/corpus/c35f59c3f064a5c96da46a4f88887d41a85dd669 b/fuzz/corpus/c35f59c3f064a5c96da46a4f88887d41a85dd669
new file mode 100644
index 000000000..a0216d14b
Binary files /dev/null and b/fuzz/corpus/c35f59c3f064a5c96da46a4f88887d41a85dd669 differ
diff --git a/fuzz/corpus/c3dcde8d12dcc6579f2c38bbd54e28a0200babce b/fuzz/corpus/c3dcde8d12dcc6579f2c38bbd54e28a0200babce
new file mode 100644
index 000000000..3375270b5
Binary files /dev/null and b/fuzz/corpus/c3dcde8d12dcc6579f2c38bbd54e28a0200babce differ
diff --git a/fuzz/corpus/e1182b80ce7fddf42850af97edf022a5fd61e421 b/fuzz/corpus/c7b7936a53fa182a83740574917b635b8647a4fa
similarity index 78%
rename from fuzz/corpus/e1182b80ce7fddf42850af97edf022a5fd61e421
rename to fuzz/corpus/c7b7936a53fa182a83740574917b635b8647a4fa
index ac01e03f0..8b3cc05d4 100644
Binary files a/fuzz/corpus/e1182b80ce7fddf42850af97edf022a5fd61e421 and b/fuzz/corpus/c7b7936a53fa182a83740574917b635b8647a4fa differ
diff --git a/fuzz/corpus/ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d b/fuzz/corpus/ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d
deleted file mode 100644
index 62a03cb3a..000000000
Binary files a/fuzz/corpus/ccf24291cf3b79fdb76f5167ce906d88fc4f8f6d and /dev/null differ
diff --git a/fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28 b/fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28
deleted file mode 100644
index 4a8226f6a..000000000
--- a/fuzz/corpus/cd4f0538db030c9794809b18f32321b125d67b28
+++ /dev/null
@@ -1 +0,0 @@
-PK˙ö
\ No newline at end of file
diff --git a/fuzz/corpus/c5f4facd0a5b1a657433ca68154e33a7e48bb663 b/fuzz/corpus/cd625ab558a1b28ad90d1083d64b2096a52f401d
similarity index 92%
rename from fuzz/corpus/c5f4facd0a5b1a657433ca68154e33a7e48bb663
rename to fuzz/corpus/cd625ab558a1b28ad90d1083d64b2096a52f401d
index a98b5606e..5dd16dd20 100644
Binary files a/fuzz/corpus/c5f4facd0a5b1a657433ca68154e33a7e48bb663 and b/fuzz/corpus/cd625ab558a1b28ad90d1083d64b2096a52f401d differ
diff --git a/fuzz/corpus/eeaaa652952fef8117a481d1d7d2438fc815f646 b/fuzz/corpus/d2ec3276043232db9afb98075728e963a5b8e369
similarity index 65%
rename from fuzz/corpus/eeaaa652952fef8117a481d1d7d2438fc815f646
rename to fuzz/corpus/d2ec3276043232db9afb98075728e963a5b8e369
index 043d2955b..9f592e64c 100644
Binary files a/fuzz/corpus/eeaaa652952fef8117a481d1d7d2438fc815f646 and b/fuzz/corpus/d2ec3276043232db9afb98075728e963a5b8e369 differ
diff --git a/fuzz/corpus/1188838daba5b306b8c7bf4142b0e33e19567be1 b/fuzz/corpus/dfe1dd788a4b4cad55dd9803c46d0aa272d665d7
similarity index 69%
rename from fuzz/corpus/1188838daba5b306b8c7bf4142b0e33e19567be1
rename to fuzz/corpus/dfe1dd788a4b4cad55dd9803c46d0aa272d665d7
index 0b5d5d90f..049a9bcb0 100644
Binary files a/fuzz/corpus/1188838daba5b306b8c7bf4142b0e33e19567be1 and b/fuzz/corpus/dfe1dd788a4b4cad55dd9803c46d0aa272d665d7 differ
diff --git a/fuzz/corpus/e10c04a62961bd591639c79e484993930e205132 b/fuzz/corpus/e10c04a62961bd591639c79e484993930e205132
deleted file mode 100644
index 8b4a4d503..000000000
Binary files a/fuzz/corpus/e10c04a62961bd591639c79e484993930e205132 and /dev/null differ
diff --git a/fuzz/corpus/e245e2f884af2d01bea253673200a29f51363fa2 b/fuzz/corpus/e245e2f884af2d01bea253673200a29f51363fa2
deleted file mode 100644
index cf8ca01e0..000000000
Binary files a/fuzz/corpus/e245e2f884af2d01bea253673200a29f51363fa2 and /dev/null differ
diff --git a/fuzz/corpus/e5a8d3e8ac6b758fde8c6315baf28e825d029ea8 b/fuzz/corpus/e5a8d3e8ac6b758fde8c6315baf28e825d029ea8
deleted file mode 100644
index 3cfa55284..000000000
Binary files a/fuzz/corpus/e5a8d3e8ac6b758fde8c6315baf28e825d029ea8 and /dev/null differ
diff --git a/fuzz/corpus/e7f52f8df718abe22a791204ce57b71c6f2638e5 b/fuzz/corpus/e7f52f8df718abe22a791204ce57b71c6f2638e5
deleted file mode 100644
index 4201ea197..000000000
Binary files a/fuzz/corpus/e7f52f8df718abe22a791204ce57b71c6f2638e5 and /dev/null differ
diff --git a/fuzz/corpus/e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0 b/fuzz/corpus/e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0
deleted file mode 100644
index ef458cdf8..000000000
Binary files a/fuzz/corpus/e82b7fbdf33f60dbc111d5b05b1f5f9b0e93c1c0 and /dev/null differ
diff --git a/fuzz/corpus/f2cb21a8411c6b0dfb2ce1fce303b61126dea477 b/fuzz/corpus/f2cb21a8411c6b0dfb2ce1fce303b61126dea477
new file mode 100644
index 000000000..63baff04f
Binary files /dev/null and b/fuzz/corpus/f2cb21a8411c6b0dfb2ce1fce303b61126dea477 differ
diff --git a/fuzz/corpus/eaacd823db36dbd3ca4241c26eaf311de7528af4 b/fuzz/corpus/f7cf03173db4f86b52c8d5bd6369387535d4bb60
similarity index 93%
rename from fuzz/corpus/eaacd823db36dbd3ca4241c26eaf311de7528af4
rename to fuzz/corpus/f7cf03173db4f86b52c8d5bd6369387535d4bb60
index 2782d6b05..165fe32d9 100644
Binary files a/fuzz/corpus/eaacd823db36dbd3ca4241c26eaf311de7528af4 and b/fuzz/corpus/f7cf03173db4f86b52c8d5bd6369387535d4bb60 differ
diff --git a/fuzz/corpus/fd8463c8426b79d944d2ff54554982c98617c3c8 b/fuzz/corpus/fd8463c8426b79d944d2ff54554982c98617c3c8
deleted file mode 100644
index e06a86e49..000000000
Binary files a/fuzz/corpus/fd8463c8426b79d944d2ff54554982c98617c3c8 and /dev/null differ
diff --git a/tests/__init__.py b/tests/__init__.py
index 038064695..dbc8c0c01 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,7 +1,7 @@
from pathlib import Path
# total number of files in test samples, included .gitignore
-SAMPLES_FILES_COUNT: int = 52
+SAMPLES_FILES_COUNT: int = 54
# credentials count after scan
SAMPLES_CRED_COUNT: int = 51
@@ -12,8 +12,8 @@
# archived credentials that not found without --depth
SAMPLES_IN_DEEP_1 = 6
-SAMPLES_IN_DEEP_2 = 7
-SAMPLES_IN_DEEP_3 = 8
+SAMPLES_IN_DEEP_2 = 8
+SAMPLES_IN_DEEP_3 = 9
SAMPLES_FILTERED_BY_POST_COUNT = 1
diff --git a/tests/samples/binary.yaml b/tests/samples/binary.yaml
new file mode 100644
index 000000000..4b22e0351
--- /dev/null
+++ b/tests/samples/binary.yaml
@@ -0,0 +1,17 @@
+body:
+ string: !!binary |
+ H4sICIur8mIAA3BlbV9rZXkAbdM3kqNAAEDRnFNMTk3hEQo2oAG1sKIBASIbnLDC29OvifenP37f
+ 338CClStL8cVv2xH9UVP+dKV19/xjZmqKoWiCkRRB28kDVBf6gclZ5eziCoev5PDXHm1v2+e1K96
+ xmZSRN7sYSzJJKa1KA81Qn6/3Bu/PntsazUobD6K9CqDSSU/DO7ZTMsy3T6JdAYAXRzderrZ1CLH
+ dGHtxxTBVPhUR/xzDnBuIa/N3ZoqfkYcRk2Ua48SqLM0tnLS60kYm5p8OGx29Ug2ijZVFpEIxA6K
+ t7KqO47HB3hYgkk6/vHjiOGJ47s33IFRYMy8s/7bnEeEB8pbqorO2zqa0U0gLhp0Xx+n7UBkMo2Z
+ e3q2qrVYprayry8pbbn0NTCh1xl1baycQWO9qvqPmylDXFfcj3jzLw2d4MnndMyAxGM+F1qHkrQz
+ WnbfMHhE0vlqlBxHtLH72hUJITkTNz4vVRRicKmBymZmFM3sZ0oOuqNo/Xh9spHx+y5TcKunBzxi
+ +lU0U+LHOhERXIMfFbecPNmf2tjm9qbClmfKBhNrRdwlg7ujmI7RyIKjGxMzaIlCsWkzOp2Hf2GO
+ G0sV9uRI15bn9bHIHte77WlLxxDXievxaYD7o7lhBmnJM+vW3VS94aaJt7o5HGqJiM3WqoqnqCQF
+ yTk3djp0+zQh+CkEDpxSRSxMMIBeoddqPY71ULkaC/mzvrhkU+nzTFefg8ZJ0p9ANiINiBqUKPPN
+ PY6046xN5kHpPEZ7hx0d9168EHkxekIW32vvpLO+wZ5XHyEXnS+qi0w/FEqq5YKnZ9gnfRiaCpCf
+ hkNhSgjArlzczq1+8mfhX0oqUWAfC0LBWeAVnUEII4y5TBqHpgCftKOz0ozZ78KahsX5vGSeQMt8
+ SEzJdnWiEyf4UdLYnvyF/cOjWPJ/Uf0Gdno9KXQDAAA=
+ secret: |
+ we5345d0f3da48544z1t1e275y05i161x995q485
diff --git a/tests/samples/struct.json b/tests/samples/struct.json
new file mode 100644
index 000000000..873c92034
--- /dev/null
+++ b/tests/samples/struct.json
@@ -0,0 +1 @@
+{"test.domain.io/actual-configuration": "{\"apiVersion\":\"v1\",\"data\":{\"smtp-password\":\"\",\"wordpress-password\":\"Axt4T0eO0lm9sS==\"},\"kind\":\"Secret\",\"metadata\":{\"annotations\":{},\"labels\":{\"app\":\"wordpress-wordpress\",\"chart\":\"wordpress-5.0.1\",\"heritage\":\"Tiller\",\"release\":\"wordpress\"},\"name\":\"wordpress-wordpress\",\"namespace\":\"argocd\"},\"type\":\"Opaque\"}\n"}
\ No newline at end of file
diff --git a/tests/test_main.py b/tests/test_main.py
index 67a074b0d..27bc6bfc6 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -362,8 +362,57 @@ def test_zip_p(self) -> None:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+ def test_json_p(self) -> None:
+ # test for finding credentials in JSON
+ content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "struct.json"])
+ # depth must be set in constructor to remove .zip as ignored extension
+ cred_sweeper = CredSweeper(depth=5)
+ cred_sweeper.run(content_provider=content_provider)
+ found_credentials = cred_sweeper.credential_manager.get_credentials()
+ assert len(found_credentials) == 1
+ assert {"Password"} == set(i.rule_name for i in found_credentials)
+ assert {"Axt4T0eO0lm9sS=="} == set(i.line_data_list[0].value for i in found_credentials)
+
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+ def test_json_n(self) -> None:
+ # test to prove that no credentials are found without depth
+ content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "struct.json"])
+ # depth must be set in constructor to remove .zip as ignored extension
+ cred_sweeper = CredSweeper()
+ cred_sweeper.run(content_provider=content_provider)
+ found_credentials = cred_sweeper.credential_manager.get_credentials()
+ assert len(found_credentials) == 0
+
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+ def test_yaml_p(self) -> None:
+ # test for finding credentials in YAML
+ content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "binary.yaml"])
+ # depth must be set in constructor to remove .zip as ignored extension
+ cred_sweeper = CredSweeper(depth=5)
+ cred_sweeper.run(content_provider=content_provider)
+ found_credentials = cred_sweeper.credential_manager.get_credentials()
+ assert len(found_credentials) == 2
+ assert {"Secret", "PEM Certificate"} == set(i.rule_name for i in found_credentials)
+ assert {"we5345d0f3da48544z1t1e275y05i161x995q485\n", "-----BEGIN RSA PRIVATE"} == \
+ set(i.line_data_list[0].value for i in found_credentials)
+
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+ def test_yaml_n(self) -> None:
+ # test to prove that no credentials are found without depth
+ content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "binary.yaml"])
+ # depth must be set in constructor to remove .zip as ignored extension
+ cred_sweeper = CredSweeper()
+ cred_sweeper.run(content_provider=content_provider)
+ found_credentials = cred_sweeper.credential_manager.get_credentials()
+ assert len(found_credentials) == 0
+
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
def test_encoded_p(self) -> None:
- # test for finding credentials in docx
+ # test for finding credentials in ENCODED data
content_provider: FilesProvider = TextProvider([SAMPLES_DIR / "encoded"])
# depth must be set in constructor to remove .zip as ignored extension
cred_sweeper = CredSweeper(depth=5)