diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index ce7f637bf..dd7bdef17 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -58,7 +58,7 @@ jobs: - name: Check ml_model.onnx integrity if: ${{ always() && steps.code_checkout.conclusion == 'success' }} run: | - md5sum --binary credsweeper/ml_model/ml_model.onnx | grep 57ec152f6aa740456c742ecd5e7d9ef5 + md5sum --binary credsweeper/ml_model/ml_model.onnx | grep 8f277b2f4a67a9911a9a860f1b5c0489 # # # Python setup diff --git a/cicd/benchmark.txt b/cicd/benchmark.txt index fa8e460af..7c4da1499 100644 --- a/cicd/benchmark.txt +++ b/cicd/benchmark.txt @@ -1,4 +1,4 @@ -DATA: 16998279 interested lines. MARKUP: 63222 items +DATA: 16998279 interested lines. MARKUP: 63226 items FileType FileNumber ValidLines Positives Negatives Templates --------------- ------------ ------------ ----------- ----------- ----------- 194 28318 64 430 87 @@ -83,8 +83,8 @@ FileType FileNumber ValidLines Positives Negatives Templat .java 621 134132 311 1348 169 .jenkinsfile 1 58 1 7 .jinja2 1 64 2 -.js 658 536388 494 2628 338 -.json 860 13670750 817 10952 139 +.js 658 536388 494 2630 338 +.json 860 13670750 817 10953 139 .jsp 13 3202 1 42 .jsx 7 857 19 .jwt 6 8 7 @@ -123,7 +123,7 @@ FileType FileNumber ValidLines Positives Negatives Templat .mqh 1 1023 2 .msg 1 26644 1 1 .mysql 1 36 2 -.ndjson 2 5006 49 324 +.ndjson 2 5006 49 325 .nix 4 211 12 .nolint 1 2 1 .odd 1 1281 57 @@ -223,25 +223,25 @@ FileType FileNumber ValidLines Positives Negatives Templat .yml 418 36162 437 920 374 .zsh 6 872 12 .zsh-theme 1 97 1 -TOTAL: 10335 16998279 8097 60877 5159 -credsweeper result_cnt : 7519, lost_cnt : 0, true_cnt : 6817, false_cnt : 702 +TOTAL: 10335 16998279 8097 60881 5159 +credsweeper result_cnt : 7394, lost_cnt : 0, true_cnt : 6795, false_cnt : 599 Rules Positives Negatives Templates Reported TP FP TN FN FPR FNR ACC PRC RCL F1 ------------------------------ ----------- ----------- ----------- ---------- ---- ---- ----- ---- -------- -------- -------- -------- -------- -------- -API 117 3104 184 112 103 9 3279 14 0.002737 0.119658 0.993245 0.919643 0.880342 0.899563 +API 117 3104 184 105 101 4 3284 16 0.001217 0.136752 0.994126 0.961905 0.863248 0.909910 AWS Client ID 163 13 0 154 154 0 13 9 0.000000 0.055215 0.948864 1.000000 0.944785 0.971609 AWS Multi 71 12 0 83 71 11 1 0 0.916667 0.000000 0.867470 0.865854 1.000000 0.928105 AWS S3 Bucket 61 25 0 87 61 24 1 0 0.960000 0.000000 0.720930 0.717647 1.000000 0.835616 Atlassian Old PAT token 27 211 3 10 3 7 207 24 0.032710 0.888889 0.871369 0.300000 0.111111 0.162162 -Auth 318 2750 87 308 269 39 2798 49 0.013747 0.154088 0.972108 0.873377 0.845912 0.859425 +Auth 318 2750 87 293 267 26 2811 51 0.009165 0.160377 0.975594 0.911263 0.839623 0.873977 Azure Access Token 19 0 0 0 0 0 19 1.000000 0.000000 0.000000 BASE64 Private Key 7 2 0 7 7 0 2 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 BASE64 encoded PEM Private Key 7 0 0 5 5 0 0 2 0.285714 0.714286 1.000000 0.714286 0.833333 Bitbucket Client ID 147 1833 3 41 27 14 1822 120 0.007625 0.816327 0.932426 0.658537 0.183673 0.287234 Bitbucket Client Secret 239 535 0 44 33 11 524 206 0.020561 0.861925 0.719638 0.750000 0.138075 0.233216 -Certificate 22 456 1 20 15 5 452 7 0.010941 0.318182 0.974948 0.750000 0.681818 0.714286 -Credential 31 130 74 29 29 0 204 2 0.000000 0.064516 0.991489 1.000000 0.935484 0.966667 +Certificate 22 456 1 17 16 1 456 6 0.002188 0.272727 0.985386 0.941176 0.727273 0.820513 +Credential 31 130 74 31 28 3 201 3 0.014706 0.096774 0.974468 0.903226 0.903226 0.903226 Docker Swarm Token 2 0 0 2 2 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 -Dropbox App secret 62 112 0 45 37 7 105 25 0.062500 0.403226 0.816092 0.840909 0.596774 0.698113 +Dropbox App secret 62 114 0 45 37 7 107 25 0.061404 0.403226 0.818182 0.840909 0.596774 0.698113 Facebook Access Token 0 1 0 0 0 1 0 0.000000 1.000000 Firebase Domain 6 1 0 7 6 1 0 0 1.000000 0.000000 0.857143 0.857143 1.000000 0.923077 Github Old Token 1 0 0 1 1 0 0 0 0.000000 1.000000 1.000000 1.000000 1.000000 @@ -253,18 +253,18 @@ Google OAuth Access Token 3 0 0 Grafana Provisioned API Key 22 1 0 1 1 0 1 21 0.000000 0.954545 0.086957 1.000000 0.045455 0.086957 IPv4 691 365 0 1004 691 302 63 0 0.827397 0.000000 0.714015 0.695871 1.000000 0.820665 IPv6 33 135 0 33 33 0 135 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -JSON Web Token 284 10 2 280 272 8 4 12 0.666667 0.042254 0.932432 0.971429 0.957746 0.964539 +JSON Web Token 284 11 2 280 272 8 5 12 0.615385 0.042254 0.932660 0.971429 0.957746 0.964539 Jira / Confluence PAT token 0 4 0 0 0 4 0 0.000000 1.000000 Jira 2FA 7 6 0 3 3 0 6 4 0.000000 0.571429 0.692308 1.000000 0.428571 0.600000 -Key 427 7871 462 452 389 61 8272 38 0.007320 0.088993 0.988699 0.864444 0.911007 0.887115 -Nonce 43 89 0 60 32 28 61 11 0.314607 0.255814 0.704545 0.533333 0.744186 0.621359 +Key 427 7871 462 415 391 23 8310 36 0.002760 0.084309 0.993265 0.944444 0.915691 0.929845 +Nonce 43 89 0 42 36 6 83 7 0.067416 0.162791 0.901515 0.857143 0.837209 0.847059 PEM Private Key 1019 1483 0 1023 1019 4 1479 0 0.002697 0.000000 0.998401 0.996090 1.000000 0.998041 -Password 1902 7425 2675 1647 1554 93 10007 348 0.009208 0.182965 0.963256 0.943534 0.817035 0.875740 -Salt 42 72 2 42 38 4 70 4 0.054054 0.095238 0.931034 0.904762 0.904762 0.904762 -Secret 1353 29656 873 1264 1235 29 30500 118 0.000950 0.087214 0.995389 0.977057 0.912786 0.943829 +Password 1902 7425 2675 1636 1543 93 10007 359 0.009208 0.188749 0.962340 0.943154 0.811251 0.872244 +Salt 42 72 2 38 38 0 74 4 0.000000 0.095238 0.965517 1.000000 0.904762 0.950000 +Secret 1353 29656 873 1239 1229 10 30519 124 0.000328 0.091648 0.995797 0.991929 0.908352 0.948302 Seed 1 6 0 0 0 6 1 0.000000 1.000000 0.857143 0.000000 Slack Token 4 1 0 4 4 0 1 0 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 -Token 553 3975 448 517 489 28 4395 64 0.006331 0.115732 0.981511 0.945841 0.884268 0.914019 +Token 553 3976 448 499 476 23 4401 77 0.005199 0.139241 0.979908 0.953908 0.860759 0.904943 Twilio API Key 0 5 2 0 0 7 0 0.000000 1.000000 -URL Credentials 167 117 254 143 143 0 371 24 0.000000 0.143713 0.955390 1.000000 0.856287 0.922581 - 8097 60877 5159 7538 6817 702 60175 1280 0.011531 0.158083 0.971265 0.906637 0.841917 0.873079 +URL Credentials 167 117 254 153 149 4 367 18 0.010782 0.107784 0.959108 0.973856 0.892216 0.931250 + 8097 60881 5159 7412 6795 599 60282 1302 0.009839 0.160800 0.972440 0.918988 0.839200 0.877284 diff --git a/credsweeper/app.py b/credsweeper/app.py index 0e09ed2e4..890887cb6 100644 --- a/credsweeper/app.py +++ b/credsweeper/app.py @@ -4,7 +4,7 @@ import signal import sys from pathlib import Path -from typing import Any, List, Optional, Union, Dict, Sequence +from typing import Any, List, Optional, Union, Dict, Sequence, Tuple import pandas as pd @@ -13,7 +13,7 @@ from credsweeper.common.constants import KeyValidationOption, Severity, ThresholdPreset from credsweeper.config import Config -from credsweeper.credentials import Candidate, CredentialManager +from credsweeper.credentials import Candidate, CredentialManager, CandidateKey from credsweeper.deep_scanner.deep_scanner import DeepScanner from credsweeper.file_handler.diff_content_provider import DiffContentProvider from credsweeper.file_handler.file_path_extractor import FilePathExtractor @@ -336,32 +336,33 @@ def post_processing(self) -> None: """Machine learning validation for received credential candidates.""" if self._use_ml_validation(): logger.info(f"Grouping {len(self.credential_manager.candidates)} candidates") - new_cred_list = [] + new_cred_list: List[Candidate] = [] cred_groups = self.credential_manager.group_credentials() - ml_cred_groups = [] + ml_cred_groups: List[Tuple[CandidateKey, List[Candidate]]] = [] for group_key, group_candidates in cred_groups.items(): - # Analyze with ML if all candidates in group require ML + # Analyze with ML if any candidate in group require ML for candidate in group_candidates: - if not candidate.use_ml: + if candidate.use_ml: + ml_cred_groups.append((group_key, group_candidates)) break else: - ml_cred_groups.append((group_key.value, group_candidates)) - continue - # If at least one of credentials in the group do not require ML - automatically report to user - for candidate in group_candidates: - candidate.ml_validation = KeyValidationOption.NOT_AVAILABLE - new_cred_list += group_candidates + # all candidates do not require ML + new_cred_list.extend(group_candidates) # prevent extra ml_validator creation if ml_cred_groups is empty if ml_cred_groups: logger.info(f"Run ML Validation for {len(ml_cred_groups)} groups") is_cred, probability = self.ml_validator.validate_groups(ml_cred_groups, self.ml_batch_size) for i, (_, group_candidates) in enumerate(ml_cred_groups): - if is_cred[i]: - for candidate in group_candidates: - candidate.ml_validation = KeyValidationOption.VALIDATED_KEY - candidate.ml_probability = probability[i] - new_cred_list += group_candidates + for candidate in group_candidates: + if candidate.use_ml: + if is_cred[i]: + candidate.ml_validation = KeyValidationOption.VALIDATED_KEY + candidate.ml_probability = probability[i] + new_cred_list.append(candidate) + else: + candidate.ml_validation = KeyValidationOption.NOT_AVAILABLE + new_cred_list.append(candidate) else: logger.info("Skipping ML validation due not applicable") diff --git a/credsweeper/credentials/candidate_key.py b/credsweeper/credentials/candidate_key.py index 509ca7ec5..4dd65b151 100644 --- a/credsweeper/credentials/candidate_key.py +++ b/credsweeper/credentials/candidate_key.py @@ -12,8 +12,10 @@ class CandidateKey: def __init__(self, line_data: LineData): self.path: str = line_data.path self.line_num: int = line_data.line_num - self.value: str = line_data.value - self.key: Tuple[str, int, str] = (self.path, self.line_num, self.value) + self.value_start: int = line_data.value_start + self.value_end: int = line_data.value_end + self.key: Tuple[str, int, int, int] = (self.path, self.line_num, self.value_start, self.value_end) + self.__line = line_data.line def __hash__(self): return hash(self.key) @@ -23,3 +25,6 @@ def __eq__(self, other): def __ne__(self, other): return not (self == other) + + def __repr__(self) -> str: + return f"{self.key}:{self.__line}" diff --git a/credsweeper/ml_model/features.py b/credsweeper/ml_model/features.py index 7f7183a39..409dd4541 100644 --- a/credsweeper/ml_model/features.py +++ b/credsweeper/ml_model/features.py @@ -146,7 +146,7 @@ class PossibleComment(Feature): r"""Feature is true if candidate line starts with #,\*,/\*? (Possible comment).""" def extract(self, candidate: Candidate) -> bool: - for i in ["#", "*", "/*"]: + for i in ["#", "*", "/*", "//"]: if candidate.line_data_list[0].line.startswith(i): return True return False @@ -260,13 +260,13 @@ class FileExtension(Feature): def __init__(self, extensions: List[str]) -> None: super().__init__() - self.extensions = extensions + self.label_binarizer = LabelBinarizer() + self.label_binarizer.fit(extensions) def __call__(self, candidates: List[Candidate]) -> csr_matrix: - enc = LabelBinarizer() - enc.fit(self.extensions) extensions = [candidate.line_data_list[0].file_type for candidate in candidates] - return enc.transform(extensions) + result = self.label_binarizer.transform(extensions) + return result def extract(self, candidate: Candidate) -> Any: raise NotImplementedError @@ -282,13 +282,13 @@ class RuleName(Feature): def __init__(self, rule_names: List[str]) -> None: super().__init__() - self.rule_names = rule_names + self.label_binarizer = LabelBinarizer() + self.label_binarizer.fit(rule_names) def __call__(self, candidates: List[Candidate]) -> csr_matrix: - enc = LabelBinarizer() - enc.fit(self.rule_names) rule_names = [candidate.rule_name for candidate in candidates] - return enc.transform(rule_names) + result = self.label_binarizer.transform(rule_names) + return result def extract(self, candidate: Candidate) -> Any: raise NotImplementedError diff --git a/credsweeper/ml_model/ml_model.onnx b/credsweeper/ml_model/ml_model.onnx index e4a5b1e68..885d068bd 100644 Binary files a/credsweeper/ml_model/ml_model.onnx and b/credsweeper/ml_model/ml_model.onnx differ diff --git a/credsweeper/ml_model/ml_validator.py b/credsweeper/ml_model/ml_validator.py index e189ea443..857e403a1 100644 --- a/credsweeper/ml_model/ml_validator.py +++ b/credsweeper/ml_model/ml_validator.py @@ -1,13 +1,13 @@ import logging import os import string -from typing import List, Tuple, Union, Any +from typing import List, Tuple, Union import numpy as np import onnxruntime as ort from credsweeper.common.constants import ThresholdPreset -from credsweeper.credentials import Candidate +from credsweeper.credentials import Candidate, CandidateKey from credsweeper.ml_model import features from credsweeper.utils import Util @@ -16,6 +16,11 @@ class MlValidator: """ML validation class""" + HALF_LEN = 80 # limit of variable or value size + MAX_LEN = 2 * HALF_LEN # for whole line limit + NON_ASCII = '\xFF' + CHAR_INDEX = {char: index for index, char in enumerate('\0' + string.printable + NON_ASCII)} + NUM_CLASSES = len(CHAR_INDEX) def __init__(self, threshold: Union[float, ThresholdPreset], azure: bool = False, cuda: bool = False) -> None: """Init @@ -32,10 +37,6 @@ def __init__(self, threshold: Union[float, ThresholdPreset], azure: bool = False else: provider = "CPUExecutionProvider" self.model_session = ort.InferenceSession(model_file_path, providers=[provider]) - char_filtered = string.ascii_lowercase + string.digits + string.punctuation - - self.char_to_index = {char: index + 1 for index, char in enumerate(char_filtered)} - self.char_to_index['NON_ASCII'] = len(self.char_to_index) + 1 model_details = Util.json_load(os.path.join(dir_path, "model_config.json")) if isinstance(threshold, float): @@ -44,7 +45,7 @@ def __init__(self, threshold: Union[float, ThresholdPreset], azure: bool = False self.threshold = model_details["thresholds"][threshold.value] else: self.threshold = 0.5 - self.maxlen = int(model_details.get("max_len", 160)) + self.common_feature_list = [] self.unique_feature_list = [] logger.info("Init ML validator, model file path: %s", model_file_path) @@ -58,34 +59,74 @@ def __init__(self, threshold: Union[float, ThresholdPreset], azure: bool = False try: feature = feature_constructor(**kwargs) except TypeError: - raise TypeError( - f'Error while parsing model details. Cannot create feature "{feature_class}" with kwargs "{kwargs}"' - ) + raise TypeError(f'Error while parsing model details. Cannot create feature "{feature_class}"' + f' with kwargs "{kwargs}"') if feature_definition["type"] in ["RuleName"]: self.unique_feature_list.append(feature) else: self.common_feature_list.append(feature) - def encode(self, line, char_to_index) -> np.ndarray: - """Encodes line to array""" - num_classes = len(char_to_index) + 1 - result_array = np.zeros((self.maxlen, num_classes), dtype=np.float32) - line = line.strip().lower()[-self.maxlen:] - for i in range(self.maxlen): - if i < len(line): - c = line[i] - if c in char_to_index: - result_array[i, char_to_index[c]] = 1 - else: - result_array[i, char_to_index["NON_ASCII"]] = 1 + @staticmethod + def encode(text: str, limit: int) -> np.ndarray: + """Encodes prepared text to array""" + result_array = np.zeros(shape=(limit, MlValidator.NUM_CLASSES), dtype=np.float32) + if text is None: + return result_array + len_text = len(text) + if limit > len_text: + # fill empty part + text += '\0' * (limit - len_text) + for i, c in enumerate(text): + if c in MlValidator.CHAR_INDEX: + result_array[i, MlValidator.CHAR_INDEX[c]] = 1 else: - result_array[i, 0] = 1 + result_array[i, MlValidator.CHAR_INDEX[MlValidator.NON_ASCII]] = 1 return result_array - def _call_model(self, line_input: np.ndarray, feature_input: np.ndarray) -> Any: - line_input = line_input.astype(np.float32) - feature_input = feature_input.astype(np.float32) - return self.model_session.run(None, {"line_input": line_input, "feature_input": feature_input})[0] + @staticmethod + def subtext(text: str, pos: int, hunk_size: int) -> str: + """cut text symmetrically for given position or use remained quota to be fitted in 2x hunk_size""" + left_quota = 0 if hunk_size <= pos else hunk_size - pos + right_remain = len(text) - pos + right_quota = 0 if hunk_size <= right_remain else right_remain - hunk_size + left_pos = pos - hunk_size + right_pos = pos + hunk_size + if left_quota: + left_pos += left_quota + right_pos += left_quota + if right_quota: + left_pos += right_quota + right_pos += right_quota + return text[left_pos:right_pos] + + @staticmethod + def encode_line(text: str, position: int): + """Encodes line with balancing for position""" + offset = len(text) - len(text.lstrip()) + pos = position - offset + stripped = text.strip() + if MlValidator.MAX_LEN < len(stripped): + stripped = MlValidator.subtext(stripped, pos, MlValidator.HALF_LEN) + return MlValidator.encode(stripped, MlValidator.MAX_LEN) + + @staticmethod + def encode_value(text: str) -> np.ndarray: + """Encodes line with balancing for position""" + stripped = text.strip() + return MlValidator.encode(stripped[:MlValidator.HALF_LEN], MlValidator.HALF_LEN) + + def _call_model(self, line_input: np.ndarray, variable_input: np.ndarray, value_input: np.ndarray, + feature_input: np.ndarray) -> np.ndarray: + input_feed = { + "line_input": line_input.astype(np.float32), + "variable_input": variable_input.astype(np.float32), + "value_input": value_input.astype(np.float32), + "feature_input": feature_input.astype(np.float32), + } + result = self.model_session.run(output_names=None, input_feed=input_feed) + if result and isinstance(result[0], np.ndarray): + return result[0] + raise RuntimeError(f"Unexpected type {type(result[0])}") def extract_common_features(self, candidates: List[Candidate]) -> np.ndarray: """Extract features that are guaranteed to be the same for all candidates on the same line with same value.""" @@ -116,31 +157,48 @@ def extract_unique_features(self, candidates: List[Candidate]) -> np.ndarray: feature_array = feature_array | new_feature return feature_array - def validate(self, candidate: Candidate) -> Tuple[bool, float]: - """Validate single credential candidate.""" - sample_as_batch = [(candidate.line_data_list[0].value, [candidate])] - is_cred_batch, probability_batch = self.validate_groups(sample_as_batch, 1) - return is_cred_batch[0], probability_batch[0] - - def get_group_features(self, value: str, candidates: List[Candidate]) -> Tuple[np.ndarray, np.ndarray]: + def get_group_features(self, candidates: List[Candidate]) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """ `np.newaxis` used to add new dimension if front, so input will be treated as a batch """ - line_input = self.encode(value, self.char_to_index)[np.newaxis] - + # all candidates are from the same line + default_candidate = candidates[0] + line_input = MlValidator.encode_line(default_candidate.line_data_list[0].line, + default_candidate.line_data_list[0].value_start)[np.newaxis] + variable = "" + value = "" + for candidate in candidates: + if not variable and candidate.line_data_list[0].variable: + variable = candidate.line_data_list[0].variable + if not value and candidate.line_data_list[0].value: + value = candidate.line_data_list[0].value + if variable and value: + break + variable_input = MlValidator.encode_value(variable)[np.newaxis] + value_input = MlValidator.encode_value(value)[np.newaxis] + feature_array = self.extract_features(candidates) + return line_input, variable_input, value_input, feature_array + + def extract_features(self, candidates: List[Candidate]) -> np.ndarray: + """extracts common and unique features from list of candidates""" common_features = self.extract_common_features(candidates) unique_features = self.extract_unique_features(candidates) - feature_array = np.hstack([common_features, unique_features]) - feature_array = np.array([feature_array]) - return line_input, feature_array + feature_hstack = np.hstack([common_features, unique_features]) + feature_array = np.array([feature_hstack]) + return feature_array - def _batch_call_model(self, line_inputs, feature_array_list): + def _batch_call_model(self, line_input_list, variable_input_list, value_input_list, features_list) -> np.ndarray: """auxiliary method to invoke twice""" - line_inputs_stack = np.vstack(line_inputs) - feature_array_vstack = np.vstack(feature_array_list) - return self._call_model(line_inputs_stack, feature_array_vstack)[:, 0] - - def validate_groups(self, group_list: List[Tuple[str, List[Candidate]]], + line_inputs_vstack = np.vstack(line_input_list) + variable_inputs_vstack = np.vstack(variable_input_list) + value_inputs_vstack = np.vstack(value_input_list) + feature_array_vstack = np.vstack(features_list) + result_call = self._call_model(line_inputs_vstack, variable_inputs_vstack, value_inputs_vstack, + feature_array_vstack) + result = result_call[:, 0] + return result + + def validate_groups(self, group_list: List[Tuple[CandidateKey, List[Candidate]]], batch_size: int) -> Tuple[np.ndarray, np.ndarray]: """Use ml model on list of candidate groups. @@ -154,24 +212,33 @@ def validate_groups(self, group_list: List[Tuple[str, List[Candidate]]], """ line_input_list = [] + variable_input_list = [] + value_input_list = [] features_list = [] - probability = np.zeros(len(group_list)) + probability = np.zeros(len(group_list), dtype=np.float32) head = tail = 0 - for (value, candidates) in group_list: - line_input, feature_array = self.get_group_features(value, candidates) + for group_key, candidates in group_list: + line_input, variable_input, value_input, feature_array = self.get_group_features(candidates) line_input_list.append(line_input) + variable_input_list.append(variable_input) + value_input_list.append(value_input) features_list.append(feature_array) tail += 1 if 0 == tail % batch_size: # use the approach to reduce memory consumption for huge candidates list - probability[head:tail] = self._batch_call_model(line_input_list, features_list) + probability[head:tail] = self._batch_call_model(line_input_list, variable_input_list, value_input_list, + features_list) head = tail line_input_list.clear() + variable_input_list.clear() + value_input_list.clear() features_list.clear() if head != tail: - probability[head:tail] = self._batch_call_model(line_input_list, features_list) + probability[head:tail] = self._batch_call_model(line_input_list, variable_input_list, value_input_list, + features_list) is_cred = probability > self.threshold for i in range(len(is_cred)): logger.debug("ML decision: %s with prediction: %s for value: %s", is_cred[i], round(probability[i], 8), group_list[i][0]) - return is_cred, probability + # apply cast to float to avoid json export issue + return is_cred, probability.astype(float) diff --git a/credsweeper/ml_model/model_config.json b/credsweeper/ml_model/model_config.json index 55f092b94..24cdf21b7 100644 --- a/credsweeper/ml_model/model_config.json +++ b/credsweeper/ml_model/model_config.json @@ -6,7 +6,6 @@ "high": 0.79791, "highest": 0.92996 }, - "max_len": 160, "features": [ { "type": "WordInVariable", @@ -302,88 +301,140 @@ "kwargs": { "extensions": [ "", + ".1", ".adoc", ".asciidoc", + ".axaml", ".bash", ".bat", ".bats", + ".bazel", + ".bundle", + ".bzl", ".c", ".cc", - ".cfg", + ".cf", + ".cjs", + ".cljc", + ".cmd", ".cnf", + ".coffee", ".conf", ".config", ".cpp", + ".creds", + ".crt", ".cs", - ".diff", + ".csp", + ".dist", + ".doc", + ".dockerfile", + ".eex", ".env", + ".erb", + ".erl", ".ex", ".example", - ".ex", - ".tf", ".exs", ".ext", + ".gml", + ".gni", ".go", ".golden", ".gradle", ".groovy", ".h", - ".hpp", + ".haml", ".hs", ".html", + ".iml", ".in", - ".inc", ".ini", + ".j", ".j2", ".java", + ".jenkinsfile", ".js", ".json", ".jsp", + ".jsx", + ".jwt", ".kt", + ".las", + ".ldif", + ".ldml", + ".libsonnet", + ".lock", + ".log", ".lua", ".m", - ".markdown", ".markerb", ".md", + ".mjs", + ".mk", + ".ml", + ".mlir", + ".moo", ".ndjson", + ".nix", + ".odd", + ".patch", ".php", ".pl", + ".pm", + ".po", ".pod", ".postinst", ".pp", + ".ppk", ".properties", ".proto", ".ps1", ".pxd", ".py", + ".pyx", ".r", ".rb", + ".rexx", ".rnh", + ".rrc", ".rs", ".rsp", ".rst", ".sample", ".sbt", ".scala", - ".scss", + ".secrets", ".sh", ".slim", ".snap", ".sql", + ".storyboard", + ".strings", + ".swift", ".t", + ".tdf", ".template", + ".test", ".tf", ".tfstate", + ".tfvars", ".tl", ".tmpl", + ".token", ".toml", ".travis", ".ts", ".tsx", ".txt", ".vue", + ".xaml", + ".xib", + ".xml", ".yaml", - ".yml" + ".yml", + ".zsh", + ".zsh-theme" ] } }, @@ -391,21 +442,21 @@ "type": "RuleName", "kwargs": { "rule_names": [ - "Token", - "Secret", - "Github Old Token", "API", - "Credential", - "Password", - "Key", "Auth", + "Certificate", + "Credential", + "Github Old Token", "JSON Web Token", - "URL Credentials", + "Key", "Nonce", + "Password", "Salt", - "Certificate" + "Secret", + "Token", + "URL Credentials" ] } } ] -} +} \ No newline at end of file diff --git a/credsweeper/rules/config.yaml b/credsweeper/rules/config.yaml index d2ebc89c3..f3a234562 100644 --- a/credsweeper/rules/config.yaml +++ b/credsweeper/rules/config.yaml @@ -658,7 +658,7 @@ confidence: moderate type: pattern values: - - (?P["'])?\w{2,80}://[\w%.:-]*(?P:)(?P[^\s/\@:]{3,80})@[\w.-]+\\*(?P["'])? + - (?P["'])?(?P\w{2,80}://)[\w%.:-]*(?P:)(?P[^\s/\@:]{3,80})@[\w.-]+\\*(?P["'])? filter_type: UrlCredentialsGroup use_ml: true required_substrings: diff --git a/experiment/main.py b/experiment/main.py index a524800f1..980a7e5c3 100644 --- a/experiment/main.py +++ b/experiment/main.py @@ -1,6 +1,8 @@ import os import pathlib import random +import subprocess +import sys from argparse import ArgumentParser from datetime import datetime from typing import List @@ -12,12 +14,11 @@ from sklearn.model_selection import train_test_split from sklearn.utils import compute_class_weight -from credsweeper.app import APP_PATH -from credsweeper.utils import Util from experiment.plot import save_plot from experiment.src.data_loader import read_detected_data, read_metadata, join_label, get_y_labels from experiment.src.features import prepare_data -from experiment.src.lstm_model import get_model_string_features +from experiment.src.lstm_model import get_model +from experiment.src.model_config_preprocess import model_config_preprocess from experiment.src.prepare_data import prepare_train_data @@ -50,41 +51,56 @@ def evaluate_model(thresholds: dict, keras_model: Model, x_data: List[np.ndarray def main(cred_data_location: str, jobs: int) -> str: current_time = datetime.now().strftime("%Y%m%d_%H%M%S") - model_config = Util.json_load(APP_PATH / "ml_model" / "model_config.json") - thresholds = model_config["thresholds"] - assert isinstance(thresholds, dict), thresholds - print(f"Load thresholds: {thresholds}") - prepare_train_data(_cred_data_location, jobs) print(f"Train model on data from {cred_data_location}") # detected data means which data is passed to ML validator of credsweeper after filters with RuleName - detected_data = read_detected_data("data/result.json") + detected_data = read_detected_data("detected_data.json") print(f"CredSweeper detected {len(detected_data)} credentials without ML") # all markup data meta_data = read_metadata(f"{cred_data_location}/meta") print(f"Metadata markup: {len(meta_data)} items") df_all = join_label(detected_data, meta_data) + # to prevent extra memory consumption - delete unnecessary objects del detected_data del meta_data + # workaround for CI step + for i in range(3): + # there are 2 times possible fails due ml config was updated + try: + thresholds = model_config_preprocess(df_all) + break + except RuntimeError as exc: + if "RESTART:" in str(exc): + continue + else: + raise + else: + raise RuntimeError("Something went wrong") + print(f"Common dataset: {len(df_all)} items") - df_all = df_all.drop_duplicates(subset=["line", "type", "ext"]) + df_all = df_all.drop_duplicates(subset=["line", "variable", "value", "type", "ext"]) print(f"Common dataset: {len(df_all)} items after drop duplicates") # random split - df_train, df_test = train_test_split(df_all, test_size=0.2, random_state=42) + lucky_number = random.randint(1, 1 << 32) + print(f"Lucky number: {lucky_number}") + df_train, df_test = train_test_split(df_all, test_size=0.15, random_state=lucky_number) len_df_train = len(df_train) print(f"Train size: {len_df_train}") len_df_test = len(df_test) print(f"Test size: {len_df_test}") - x_eval_value, x_eval_features = prepare_data(df_all) - y_eval = get_y_labels(df_all) + + print(f"Prepare full data") + x_full_line, x_full_variable, x_full_value, x_full_features = prepare_data(df_all) + y_full = get_y_labels(df_all) del df_all - x_train_value, x_train_features = prepare_data(df_train) + print(f"Prepare train data") + x_train_line, x_train_variable, x_train_value, x_train_features = prepare_data(df_train) print("x_train_value dtype ", x_train_value.dtype) # dbg print("x_train_features dtype", x_train_features.dtype) # dbg y_train = get_y_labels(df_train) @@ -93,24 +109,30 @@ def main(cred_data_location: str, jobs: int) -> str: print(f"Class-1 prop on train: {np.mean(y_train):.4f}") - class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train) - class_weight = dict(enumerate(class_weights)) + classes = np.unique(y_train) + class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train) + max_weight = max(class_weights) + class_weights = [weight / max_weight for weight in class_weights] + print(f"y_train size:{len(y_train)}, 0: {np.count_nonzero(y_train == 0)}, 1: {np.count_nonzero(y_train == 1)}") + class_weight = dict(zip(classes, class_weights)) print(f"class_weight: {class_weight}") # information about class weights - print(f"y_train size:{len(y_train)}, 1: {np.count_nonzero(y_train == 1)}, 0: {np.count_nonzero(y_train == 0)}") - x_test_value, x_test_features = prepare_data(df_test) + print(f"Prepare test data") + x_test_line, x_test_variable, x_test_value, x_test_features = prepare_data(df_test) y_test = get_y_labels(df_test) print(f"Class-1 prop on test: {np.mean(y_test):.4f}") - keras_model = get_model_string_features(x_train_value.shape[-1], x_train_features.shape[-1]) - batch_size = 256 + keras_model = get_model(x_full_line.shape, x_full_variable.shape, x_full_value.shape, x_full_features.shape) + batch_size = 2048 + epochs = 16 - fit_history = keras_model.fit(x=[x_train_value, x_train_features], + fit_history = keras_model.fit(x=[x_train_line, x_train_variable, x_train_value, x_train_features], y=y_train, batch_size=batch_size, - epochs=17, + epochs=epochs, verbose=2, - validation_data=([x_test_value, x_test_features], y_test), + validation_data=([x_test_line, x_test_variable, x_test_value, + x_test_features], y_test), class_weight=class_weight, use_multiprocessing=True) @@ -119,19 +141,33 @@ def main(cred_data_location: str, jobs: int) -> str: model_file_name = dir_path / f"ml_model_at-{current_time}" keras_model.save(model_file_name, include_optimizer=False) - print("Validate results on the test subset") - print(f"Test size: {len(y_test)}") - print(f"Class-1 prop on eval: {np.mean(y_test):.4f}") - evaluate_model(thresholds, keras_model, [x_test_value, x_test_features], y_test) - - print("Validate results on the full set") - print(f"Test size: {len(y_eval)}") - print(f"Class-1 prop on eval: {np.mean(y_eval):.4f}") - evaluate_model(thresholds, keras_model, [x_eval_value, x_eval_features], y_eval) + print(f"Validate results on the train subset. Size: {len(y_train)} {np.mean(y_train):.4f}") + evaluate_model(thresholds, keras_model, [x_train_line, x_train_variable, x_train_value, x_train_features], y_train) + del x_train_line + del x_train_variable + del x_train_value + del x_train_features + del y_train + + print(f"Validate results on the test subset. Size: {len(y_test)} {np.mean(y_test):.4f}") + evaluate_model(thresholds, keras_model, [x_test_line, x_test_variable, x_test_value, x_test_features], y_test) + del x_test_line + del x_test_variable + del x_test_value + del x_test_features + del y_test + + print(f"Validate results on the full set. Size: {len(y_full)} {np.mean(y_full):.4f}") + evaluate_model(thresholds, keras_model, [x_full_line, x_full_variable, x_full_value, x_full_features], y_full) + del x_full_line + del x_full_variable + del x_full_value + del x_full_features + del y_full # ml history analysis save_plot(stamp=current_time, - title=f"batch:{batch_size} train:{len_df_train} test:{len(df_test)} weights:{class_weights}", + title=f"batch:{batch_size} train:{len_df_train} test:{len_df_test} weights:{class_weights}", history=fit_history, dir_path=dir_path) @@ -165,6 +201,10 @@ def main(cred_data_location: str, jobs: int) -> str: _jobs = int(args.jobs) _model_file_name = main(_cred_data_location, _jobs) - # print in last line result model - print(f"\nYou can find your model in: \n{_model_file_name}") + # print in last line the name + print(f"\nYou can find your model in:\n{_model_file_name}") + + command = f"{sys.executable} -m tf2onnx.convert --saved-model {_model_file_name}" \ + f" --output {pathlib.Path(__file__).parent.parent}/credsweeper/ml_model/ml_model.onnx --verbose" + subprocess.check_call(command, shell=True, cwd=pathlib.Path(__file__).parent) # python -m tf2onnx.convert --saved-model results/ml_model_at-20240201_073238 --output ../credsweeper/ml_model/ml_model.onnx --verbose diff --git a/experiment/main.sh b/experiment/main.sh index b37f4c933..5252a42df 100755 --- a/experiment/main.sh +++ b/experiment/main.sh @@ -1,22 +1,22 @@ #!/usr/bin/env bash -set -e +set -ex CREDSWEEPER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." > /dev/null 2>&1 && pwd )" export PYTHONPATH=${CREDSWEEPER_DIR}:$PYTHONPATH echo $PYTHONPATH -python -m credsweeper --banner +${CREDSWEEPER_DIR}/.venv/bin/python -m credsweeper --banner -rm -rf data +# rm -rf data -python main.py --data ~/q/DataCred/CredData -j 32 +${CREDSWEEPER_DIR}/.venv/bin/python main.py --data ~/q/DataCred/CredData --jobs 32 | tee train.log -tf_model=$(tail -n1 main.log) +#last_tf_model=$(cat train.log | tail -n1) -echo $tf_model +#echo $last_tf_model -pwd +#pwd -python -m tf2onnx.convert --saved-model $tf_model --output ../credsweeper/ml_model/ml_model.onnx --verbose +#python -m tf2onnx.convert --saved-model results/$last_tf_model --output ../credsweeper/ml_model/ml_model.onnx --verbose diff --git a/experiment/plot.py b/experiment/plot.py index 9da9369cb..6f788d44f 100644 --- a/experiment/plot.py +++ b/experiment/plot.py @@ -6,6 +6,10 @@ def save_plot(stamp: str, title: str, history: History, dir_path: pathlib.Path): + #dbg + with open('history.pkl', 'wb') as f: + pickle.dump(history, f) + #dbg plt.clf() fig, axes = plt.subplots(2, 2) @@ -28,3 +32,12 @@ def save_plot(stamp: str, title: str, history: History, dir_path: pathlib.Path): plt.gcf().set_size_inches(16, 9) plt.savefig(dir_path / f"{stamp}.png", dpi=96) + + +# dbg +if __name__ == "__main__": + _dir_path = pathlib.Path("results") + current_time = "20240321_190401" + with open(f"results/history-{current_time}.pickle", "rb") as f: + fit_history = pickle.load(f) + save_plot(current_time, fit_history, _dir_path) diff --git a/experiment/src/data_loader.py b/experiment/src/data_loader.py index c2d1a51e1..986504c41 100644 --- a/experiment/src/data_loader.py +++ b/experiment/src/data_loader.py @@ -7,44 +7,49 @@ import numpy as np import pandas as pd -identifier = Tuple[str, int] +from credsweeper.utils import Util -ml_categories = [ - "Authentication Credentials", # - "Cryptographic Primitives", # - "Generic Secret", # - "Generic Token", # - "Password", # - "Predefined Pattern", # -] +# path, line, val_start, val_end +identifier = Tuple[str, int, int, int] -def strip_data_path(file_path, split="CredData/"): +def transform_to_meta_path(file_path): + """Transform any path to 'data/xxxxxxxx/[type]/yyyyyyyy.ext' to find in meta markup""" file_path = pathlib.Path(file_path).as_posix() - return file_path.split(split, 1)[-1] + path_list = file_path.split('/') + meta_path = '/'.join(["data", path_list[-3], path_list[-2], path_list[-1]]) + return meta_path -def read_detected_data(file_path: str, split="CredData/") -> Dict[identifier, Dict]: +def read_detected_data(file_path: str) -> Dict[identifier, Dict]: print(f"Reading detections from {file_path}") with open(file_path) as f: detections = json.load(f) detected_lines = {} - for detection in detections: - if 1 != len(detection["line_data_list"]): - continue - for line_data in detection["line_data_list"]: - relative_path = strip_data_path(line_data["path"], split) - index = relative_path, line_data["line_num"] - data_to_save = deepcopy(line_data) - data_to_save["path"] = relative_path - data_to_save["RuleName"] = [detection["rule"]] - - if index not in detected_lines: - detected_lines[index] = data_to_save - else: - detected_lines[index]["RuleName"].append(detection["rule"]) + for cred in detections: + rule_name = cred["rule"] + # skip not ML values like private keys and so on. Unsupported for ml train. "use_ml" rules ONLY + assert 1 == len(cred["line_data_list"]), cred + line_data = deepcopy(cred["line_data_list"][0]) + line_data.pop("entropy_validation") + line_data.pop("info") + line = line_data["line"].lstrip() + offset = len(line_data["line"]) - len(line) + line_data["line"] = line.rstrip() + line_data["value_start"] -= offset + line_data["value_end"] -= offset + assert line_data["value"] == line_data["line"][line_data["value_start"]:line_data["value_end"]], line_data + meta_path = transform_to_meta_path(line_data["path"]) + line_data["path"] = meta_path + line_data["RuleName"] = [rule_name] + + index = meta_path, line_data["line_num"], line_data["value_start"], line_data["value_end"] + if index not in detected_lines: + detected_lines[index] = line_data + else: + detected_lines[index]["RuleName"].append(rule_name) print(f"Detected {len(detected_lines)} unique lines!") print(f"{len(detections)} detections in total") @@ -52,7 +57,7 @@ def read_detected_data(file_path: str, split="CredData/") -> Dict[identifier, Di return detected_lines -def read_metadata(meta_dir: str, split="CredData/") -> Dict[identifier, Dict]: +def read_metadata(meta_dir: str) -> Dict[identifier, Dict]: print(f"Reading meta from {meta_dir}") meta_lines = {} j = 0 @@ -62,29 +67,41 @@ def read_metadata(meta_dir: str, split="CredData/") -> Dict[identifier, Dict]: if not file_path.endswith(".csv"): print(f"skip garbage: {csv_file}") continue - file_meta = pd.read_csv(csv_file, dtype={'RepoName': str, 'GroundTruth': str}) - for i, row in file_meta.iterrows(): + df = pd.read_csv(csv_file, + dtype={ + "RepoName": str, + "GroundTruth": str, + "Category": str, + "LineStart": "Int64", + "LineEnd": "Int64", + "ValueStart": "Int64", + "ValueEnd": "Int64", + }) + # Int64 is important to change with NaN + df["LineStart"] = df["LineStart"].fillna(-1).astype(int) + df["LineEnd"] = df["LineEnd"].fillna(-1).astype(int) + df["ValueStart"] = df["ValueStart"].fillna(-1).astype(int) + df["ValueEnd"] = df["ValueEnd"].fillna(-1).astype(int) + # all templates are false + df.loc[df["GroundTruth"] == "Template", "GroundTruth"] = 'F' + for _, row in df.iterrows(): j += 1 - line_start = int(row["LineStart"]) - line_end = int(row["LineEnd"]) - if "Template" == row["GroundTruth"]: - print(f"WARNING: transform Template to FALSE\n{row}") - row["GroundTruth"] = "F" - if row["Category"] not in ml_categories: - print(f"WARNING: skip not ml category {row['FilePath']},{line_start},{line_end}" - f",{row['GroundTruth']},{row['Category']}") - continue - if line_start != line_end: - print(f"WARNING: skip multiline as train or test data {row}") + if row["LineStart"] != row["LineEnd"] or any(x in row["Category"] for x in ["AWS Multi", "Google Multi"]): + # print(f"WARNING: skip not ml category {row['FilePath']},{line_start},{line_end}" + # f",{row['GroundTruth']},{row['Category']}") continue - relative_path = strip_data_path(row["FilePath"], split) - index = relative_path, line_start + assert 'F' == row["GroundTruth"] or 'T' == row["GroundTruth"] and 0 <= row["ValueStart"], row + + meta_path = transform_to_meta_path(row["FilePath"]) + index = meta_path, row['LineStart'], row['ValueStart'], row['ValueEnd'] if index not in meta_lines: row_data = row.to_dict() - row_data["FilePath"] = relative_path + row_data["Used"] = False + row_data["FilePath"] = meta_path meta_lines[index] = row_data else: - print(f"WARNING: {index} already in meta_lines {row['GroundTruth']} {row['Category']}") + print(f"WARNING: {index} already in meta_lines {row['GroundTruth']} {row['Category']}" + f"\n{meta_lines[index]}") print(f"Loaded {len(meta_lines)} lines from meta of {j} total") @@ -94,22 +111,46 @@ def read_metadata(meta_dir: str, split="CredData/") -> Dict[identifier, Dict]: def join_label(detected_data: Dict[identifier, Dict], meta_data: Dict[identifier, Dict]) -> pd.DataFrame: values = [] for index, line_data in detected_data.items(): + if not line_data["value"]: + print(f"WARNING: empty value\n{line_data}") + continue label = False - if index not in meta_data: - print(f"WARNING: {index} is not in meta!!!\n{line_data}") - elif meta_data[index]["Category"] not in ml_categories: - # skip not ML values like private keys and so on - print(f"WARNING: {line_data} is not ML category! {meta_data[index]}") - else: - if 'T' == meta_data[index]["GroundTruth"]: + if markup := meta_data.get(index): + # it means index in meta_data with exactly match + if 'T' == markup["GroundTruth"]: + label = True + markup["Used"] = True + if not set(markup["Category"].split(':')).intersection(set(line_data["RuleName"])): + print("1.CHECK CATEGORIES", set(markup["Category"].split(':')), set(line_data["RuleName"]), str(markup)) + elif markup := meta_data.get((index[0], index[1], index[2], -1)): + # perhaps, the line has only start markup - so value end position is -1 + if 'T' == markup["GroundTruth"]: label = True + markup["Used"] = True + if not set(markup["Category"].split(':')).intersection(set(line_data["RuleName"])): + print("2.CHECK CATEGORIES", set(markup["Category"].split(':')), set(line_data["RuleName"]), str(markup)) + elif markup := meta_data.get((index[0], index[1], -1, -1)): + # perhaps, the line has false markup - so value start-end position is -1, -1 + if 'T' == markup["GroundTruth"]: + raise RuntimeError(f"ERROR: markup {markup} cannot be TRUE\n{line_data}") + markup["Used"] = True + if not set(markup["Category"].split(':')).intersection(set(line_data["RuleName"])): + print("3.CHECK CATEGORIES", set(markup["Category"].split(':')), set(line_data["RuleName"]), str(markup)) + else: + print(f"WARNING: {index} is not in meta!!!\n{line_data}") + continue + line = line_data["line"] + # the line in detected data mus be striped + assert line == line.strip(), line_data + # check the value in detected data + assert line[line_data["value_start"]:line_data["value_end"]] == line_data["value"] + # todo: variable input has to be markup in meta too, or/and new feature "VariableExists" created ??? line_data["GroundTruth"] = label + line_data["ext"] = Util.get_extension(line_data["path"]) + line_data["type"] = line_data["path"].split('/')[-2] values.append(line_data) - # values = list(detected_data.values()) + df = pd.DataFrame(values) - df["repo"] = [repo.split("/")[1] for repo in df["path"]] - df["ext"] = [os.path.splitext(ext)[-1] for ext in df["path"]] - df["type"] = [repo.split("/")[2] for repo in df["path"]] # src, test, other return df diff --git a/experiment/src/features.py b/experiment/src/features.py index 75adf3c2e..bdfdba5ff 100644 --- a/experiment/src/features.py +++ b/experiment/src/features.py @@ -9,8 +9,6 @@ from credsweeper.ml_model import MlValidator from credsweeper.utils import Util -ml_validator = MlValidator(0.5) # Initialize global MLValidator object - class CustomLineData(LineData): """Object that allows to create LineData from scanner results""" @@ -30,36 +28,77 @@ def get_candidates(line_data: dict): line_data["variable"]) candidates = [] for rule in line_data["RuleName"]: - candidates.append(Candidate([ld], [], rule, Severity.MEDIUM, None, None, True)) - + candidates.append( + Candidate( + line_data_list=[ld], + patterns=[], + rule_name=rule, + severity=Severity.MEDIUM, + use_ml=True, + )) return candidates -def get_features(line_data: Union[dict, pd.Series]): +def get_features(line_data: Union[dict, pd.Series], + ml_validator: MlValidator) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Get features from a single detection using CredSweeper.MlValidator module""" - value = line_data["value"] + candidates = get_candidates(line_data) - line_input = ml_validator.encode(value, ml_validator.char_to_index) + line_input = MlValidator.encode_line(line_data["line"], line_data["value_start"]) + if variable := line_data["variable"]: + if len(variable) > MlValidator.HALF_LEN: + variable = variable[:MlValidator.HALF_LEN] + variable_input = MlValidator.encode_value(variable) + else: + variable_input = MlValidator.encode_value('') - common_features = ml_validator.extract_common_features(candidates) - unique_features = ml_validator.extract_unique_features(candidates) + if value := line_data["value"]: + if len(value) > MlValidator.HALF_LEN: + value = value[:MlValidator.HALF_LEN] + value_input = MlValidator.encode_value(value) + else: + raise RuntimeError(f"Empty value is not allowed {line_data}") - extracted_features = np.hstack([common_features, unique_features]) + line = line_data["line"] + assert line[line_data["value_start"]:].startswith(line_data["value"]), line_data - return line_input, extracted_features + extracted_features = ml_validator.extract_features(candidates) + return line_input, variable_input, value_input, extracted_features -def prepare_data(df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]: + +def prepare_data(df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: """Get features from a DataFrame detection using CredSweeper.MlValidator module""" + + ml_validator = MlValidator(0.5) # MLValidator object loads config (MAY be updated!) with features + x_size = len(df) - x_values = np.zeros([x_size, 160, 70], dtype=np.float32) - x_features = np.zeros([x_size, 131], dtype=np.float32) + x_line_input = np.zeros(shape=[x_size, MlValidator.MAX_LEN, MlValidator.NUM_CLASSES], dtype=np.float32) + x_variable_input = np.zeros(shape=[x_size, MlValidator.HALF_LEN, MlValidator.NUM_CLASSES], dtype=np.float32) + x_value_input = np.zeros(shape=[x_size, MlValidator.HALF_LEN, MlValidator.NUM_CLASSES], dtype=np.float32) + # features size preprocess to calculate the dimension automatically + features = get_features( # + line_data={ # + "path": "", # + "line_num": 1, # + "line": "ABC123", # + "value": "123", # + "value_start": 3, # + "variable": None, # + "RuleName": ["API"], # + }, # + ml_validator=ml_validator) + features_size = features[3].shape[1] + print(f"Features size: {features_size}", flush=True) + x_features = np.zeros(shape=[x_size, features_size], dtype=np.float32) n = 0 for i, row in df.iterrows(): - assert row["line"] is not None, row - line_input, extracted_features = get_features(row) - x_values[n] = line_input + assert bool(row["line"]) and bool(row["value"]), row + line_input, variable_input, value_input, extracted_features = get_features(row, ml_validator) + x_line_input[n] = line_input + x_variable_input[n] = variable_input + x_value_input[n] = value_input x_features[n] = extracted_features n += 1 - return x_values, x_features + return x_line_input, x_variable_input, x_value_input, x_features diff --git a/experiment/src/lstm_model.py b/experiment/src/lstm_model.py index e0a4aa1f4..31e47b9c5 100644 --- a/experiment/src/lstm_model.py +++ b/experiment/src/lstm_model.py @@ -1,38 +1,53 @@ -import tensorflow as tf -from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Input, Concatenate +from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Input, Concatenate, Dropout from tensorflow.keras.models import Model +from tensorflow.python.keras.metrics import BinaryAccuracy, Precision, Recall -DEFAULT_METRICS = [tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()] +from credsweeper import MlValidator -def get_model_string_features(vocab_size: int, feature_size: int) -> Model: - """Get keras model with string and feature input and single binary out +def get_model( + line_shape: tuple, + variable_shape: tuple, + value_shape: tuple, + feature_shape: tuple, +) -> Model: + """Get keras model with string and feature input and single binary out""" + d_type = "float32" - Args: - vocab_size: Datasets vocabulary size - feature_size: numbers of features used for training + line_input = Input(shape=(None, line_shape[2]), name="line_input", dtype=d_type) + line_lstm = LSTM(units=line_shape[1], dtype=d_type) + line_bidirectional = Bidirectional(layer=line_lstm) + line_lstm_branch = line_bidirectional(line_input) - Return: - Keras model - """ - d_type = "float32" - lstm_input = Input(shape=(None, vocab_size), name="line_input", dtype=d_type) - bidirectional = Bidirectional(layer=LSTM(units=123, dtype=d_type)) - lstm_branch = bidirectional(lstm_input) + variable_input = Input(shape=(None, variable_shape[2]), name="variable_input", dtype=d_type) + variable_lstm = LSTM(units=variable_shape[1], dtype=d_type) + variable_bidirectional = Bidirectional(layer=variable_lstm) + variable_lstm_branch = variable_bidirectional(variable_input) + + value_input = Input(shape=(None, value_shape[2]), name="value_input", dtype=d_type) + value_lstm = LSTM(units=value_shape[1], dtype=d_type) + value_bidirectional = Bidirectional(layer=value_lstm) + value_lstm_branch = value_bidirectional(value_input) + + feature_input = Input(shape=(feature_shape[1], ), name="feature_input", dtype=d_type) - feature_input = Input(shape=(feature_size, ), name="feature_input", dtype=d_type) + joined_features = Concatenate()([line_lstm_branch, variable_lstm_branch, value_lstm_branch, feature_input]) - concatenation = Concatenate() - joined_features = concatenation([lstm_branch, feature_input]) - dense_a = Dense(units=63, activation='relu', name="dense", dtype=d_type) + # 3 bidirectional + features + dense_units = 2 * MlValidator.MAX_LEN + 2 * 2 * MlValidator.HALF_LEN + feature_shape[1] + # check after model compilation. Should be matched the combined size. + dense_a = Dense(units=dense_units, activation='relu', name="dense", dtype=d_type) joined_layers = dense_a(joined_features) + dropout = Dropout(0.33) + dropout_layer = dropout(joined_layers) dense_b = Dense(units=1, activation='sigmoid', name="prediction", dtype=d_type) - output = dense_b(joined_layers) + output = dense_b(dropout_layer) - model = Model(inputs=[lstm_input, feature_input], outputs=output) + model = Model(inputs=[line_input, variable_input, value_input, feature_input], outputs=output) - model.compile(optimizer='adam', loss='binary_crossentropy', metrics=DEFAULT_METRICS) + metrics = [BinaryAccuracy(name="binary_accuracy"), Precision(name="precision"), Recall(name="recall")] + model.compile(optimizer="adam", loss='binary_crossentropy', metrics=metrics) - model.summary() + model.summary(line_length=120, expand_nested=True, show_trainable=True) return model diff --git a/experiment/src/model_config_preprocess.py b/experiment/src/model_config_preprocess.py new file mode 100644 index 000000000..4ad50b30d --- /dev/null +++ b/experiment/src/model_config_preprocess.py @@ -0,0 +1,70 @@ +from typing import Set, Dict + +import pandas as pd + +from credsweeper.app import APP_PATH +from credsweeper.utils import Util + + +def model_config_preprocess(df_all: pd.DataFrame) -> Dict[str, float]: + model_config_path = APP_PATH / "ml_model" / "model_config.json" + model_config = Util.json_load(model_config_path) + + # check whether all extensions from meta are in model_config.json + + for x in model_config["features"]: + if "FileExtension" == x["type"]: + config_extensions = x["kwargs"]["extensions"] + config_extensions_set = set(config_extensions) + if len(config_extensions) != len(config_extensions_set): + print("WARNING: duplicates in config extensions list") + if any(x != x.lower() for x in config_extensions_set): + print("WARNING: file extensions in config must be in lowercase") + break + else: + raise RuntimeError(f"FileExtension was not found in config ({model_config_path}) features!") + + data_extension_set = set(df_all["ext"].unique()) + + if config_extensions_set != data_extension_set: + for x in model_config["features"]: + if "FileExtension" == x["type"]: + x["kwargs"]["extensions"] = sorted(list(data_extension_set)) + Util.json_dump(model_config, model_config_path) + break + # the process must be restarted with updated config + raise RuntimeError(f"RESTART: differences in extensions:" + f"\nconfig:{config_extensions_set.difference(data_extension_set)}" + f"\ndata:{data_extension_set.difference(config_extensions_set)}" + f"\nFile {model_config_path} was updated.") + + # append all rule names for the feature + + for x in model_config["features"]: + if "RuleName" == x["type"]: + config_rules = x["kwargs"]["rule_names"] + config_rules_set = set(config_rules) + if len(config_rules) != len(config_rules_set): + print("WARNING: duplicates in config rule_names list") + break + else: + raise RuntimeError(f"FileExtension was not found in config ({model_config_path}) features!") + + data_rules_set = set(df_all["RuleName"].explode().unique()) + + if config_rules_set != data_rules_set: + for x in model_config["features"]: + if "RuleName" == x["type"]: + x["kwargs"]["rule_names"] = sorted(list(data_rules_set)) + Util.json_dump(model_config, model_config_path) + break + # the process must be restarted with updated config + raise RuntimeError(f"RESTART: differences in extensions:" + f"\nconfig:{config_rules_set.difference(data_rules_set)}" + f"\ndata:{data_rules_set.difference(config_rules_set)}" + f"\nFile {model_config_path} was updated.") + + thresholds = model_config["thresholds"] + assert isinstance(thresholds, dict), thresholds + print(f"Load thresholds: {thresholds}") + return thresholds diff --git a/experiment/src/prepare_data.py b/experiment/src/prepare_data.py index e67cb2e8c..e4139c44f 100644 --- a/experiment/src/prepare_data.py +++ b/experiment/src/prepare_data.py @@ -16,16 +16,15 @@ def execute_scanner(dataset_location: str, result_location_str, j): def prepare_train_data(cred_data_location: str, j: int): print("Start train data preparation...") - os.makedirs("data", exist_ok=True) if not os.path.exists("train_config.yaml"): - # use only rules which marked as use_ml may be valuable + # use pattern or keyword type rules = Util.yaml_load("../credsweeper/rules/config.yaml") new_rules = [x for x in rules if x.get("use_ml")] Util.yaml_dump(new_rules, "train_config.yaml") - if not os.path.exists("data/result.json"): + if not os.path.exists("detected_data.json"): print(f"Get CredSweeper results from {cred_data_location}. May take some time") - execute_scanner(cred_data_location, "data/result.json", j) + execute_scanner(cred_data_location, "detected_data.json", j) print("Train data prepared!") diff --git a/tests/__init__.py b/tests/__init__.py index 669ba3190..b3e8973a9 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,18 +7,18 @@ NEGLIGIBLE_ML_THRESHOLD = 0.00001 # credentials count after scan -SAMPLES_CRED_COUNT: int = 412 -SAMPLES_CRED_LINE_COUNT: int = 429 +SAMPLES_CRED_COUNT: int = 409 +SAMPLES_CRED_LINE_COUNT: int = 426 # credentials count after post-processing -SAMPLES_POST_CRED_COUNT: int = 397 +SAMPLES_POST_CRED_COUNT: int = 377 # with option --doc SAMPLES_IN_DOC = 404 # archived credentials that are not found without --depth SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 21 -SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 19 +SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 18 SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 1 # well known string with all latin letters diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json index 291004b97..94fd46af6 100644 --- a/tests/data/depth_3.json +++ b/tests/data/depth_3.json @@ -112,7 +112,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99316, + "ml_probability": 0.9997, "rule": "API", "severity": "medium", "confidence": "moderate", @@ -187,7 +187,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99154, + "ml_probability": 0.92134, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -212,7 +212,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99315, + "ml_probability": 0.99778, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -237,7 +237,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99528, + "ml_probability": 0.99717, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -262,7 +262,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99667, + "ml_probability": 0.99902, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -287,7 +287,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99711, + "ml_probability": 0.98929, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -312,7 +312,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99711, + "ml_probability": 0.98929, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -541,8 +541,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.83144, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -616,8 +616,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.99078, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -667,7 +667,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9998, + "ml_probability": 0.99927, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -817,7 +817,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98984, + "ml_probability": 0.99899, "rule": "Certificate", "severity": "medium", "confidence": "moderate", @@ -917,7 +917,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99879, + "ml_probability": 0.9974, "rule": "Credential", "severity": "medium", "confidence": "moderate", @@ -992,7 +992,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94217, + "ml_probability": 0.95881, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1017,57 +1017,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78111, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID:master PW:dipPr11Gg!", - "line_num": 2, - "path": "tests/samples/doc_id_pair_passwd_pair", - "info": "tests/samples/doc_id_pair_passwd_pair|RAW", - "value": "dipPr11Gg!", - "value_start": 13, - "value_end": 23, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.7897352853986264, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.81375, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ANYID:master PW:dipPr12Gg!", - "line_num": 3, - "path": "tests/samples/doc_id_pair_passwd_pair", - "info": "tests/samples/doc_id_pair_passwd_pair|RAW", - "value": "dipPr12Gg!", - "value_start": 16, - "value_end": 26, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.989735285398626, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97553, + "ml_probability": 0.95202, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1092,7 +1042,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98957, + "ml_probability": 0.98333, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1117,7 +1067,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98178, + "ml_probability": 0.85971, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1142,7 +1092,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97951, + "ml_probability": 0.98429, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1167,7 +1117,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98937, + "ml_probability": 0.9944, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1192,7 +1142,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99239, + "ml_probability": 0.98281, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1217,7 +1167,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96152, + "ml_probability": 0.93926, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1242,7 +1192,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94337, + "ml_probability": 0.9076, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1267,7 +1217,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90819, + "ml_probability": 0.73841, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1292,7 +1242,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96069, + "ml_probability": 0.97226, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1317,7 +1267,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9855, + "ml_probability": 0.98255, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1342,7 +1292,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99273, + "ml_probability": 0.9886, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1367,7 +1317,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97227, + "ml_probability": 0.97937, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1392,7 +1342,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98513, + "ml_probability": 0.97742, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1417,7 +1367,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98967, + "ml_probability": 0.98383, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1442,7 +1392,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98433, + "ml_probability": 0.98626, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1467,7 +1417,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96661, + "ml_probability": 0.98951, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1492,7 +1442,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9157, + "ml_probability": 0.97504, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1517,7 +1467,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90242, + "ml_probability": 0.78353, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1542,7 +1492,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93864, + "ml_probability": 0.90892, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1567,7 +1517,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99393, + "ml_probability": 0.9669, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1592,7 +1542,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98374, + "ml_probability": 0.95874, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1617,7 +1567,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9868, + "ml_probability": 0.97931, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1642,7 +1592,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97173, + "ml_probability": 0.94527, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1667,7 +1617,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98572, + "ml_probability": 0.97648, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1692,7 +1642,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96866, + "ml_probability": 0.8991, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1717,7 +1667,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97155, + "ml_probability": 0.95024, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1742,7 +1692,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9814, + "ml_probability": 0.93358, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1767,7 +1717,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9683, + "ml_probability": 0.97012, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1792,7 +1742,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98092, + "ml_probability": 0.93757, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1817,7 +1767,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98723, + "ml_probability": 0.98501, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1842,7 +1792,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.995, + "ml_probability": 0.98351, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1867,7 +1817,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99217, + "ml_probability": 0.99495, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1892,7 +1842,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99366, + "ml_probability": 0.9885, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1917,7 +1867,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98726, + "ml_probability": 0.96882, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1942,7 +1892,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98166, + "ml_probability": 0.99279, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1967,7 +1917,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98929, + "ml_probability": 0.98884, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1992,7 +1942,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9941, + "ml_probability": 0.99041, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2017,7 +1967,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99533, + "ml_probability": 0.99487, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2042,7 +1992,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99094, + "ml_probability": 0.98717, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2067,7 +2017,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99616, + "ml_probability": 0.99065, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2092,7 +2042,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98161, + "ml_probability": 0.96843, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2142,7 +2092,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93832, + "ml_probability": 0.78091, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2167,7 +2117,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96866, + "ml_probability": 0.86764, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2192,7 +2142,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97171, + "ml_probability": 0.8841, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2217,7 +2167,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.99083, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2242,7 +2192,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99749, + "ml_probability": 0.99799, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2267,7 +2217,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99451, + "ml_probability": 0.99342, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2292,7 +2242,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99606, + "ml_probability": 0.99672, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2317,7 +2267,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99761, + "ml_probability": 0.99586, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2342,7 +2292,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99759, + "ml_probability": 0.98982, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2367,7 +2317,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99134, + "ml_probability": 0.97956, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2392,7 +2342,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97394, + "ml_probability": 0.98157, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2417,7 +2367,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97418, + "ml_probability": 0.96885, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2442,7 +2392,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98534, + "ml_probability": 0.91279, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2467,7 +2417,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99043, + "ml_probability": 0.99518, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2492,7 +2442,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99413, + "ml_probability": 0.99354, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2517,7 +2467,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98612, + "ml_probability": 0.94077, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2542,7 +2492,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9925, + "ml_probability": 0.99246, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2567,7 +2517,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98443, + "ml_probability": 0.97722, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2592,7 +2542,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98844, + "ml_probability": 0.97789, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2617,7 +2567,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99429, + "ml_probability": 0.99818, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2642,7 +2592,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99609, + "ml_probability": 0.99392, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2667,7 +2617,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99555, + "ml_probability": 0.98892, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2692,7 +2642,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98078, + "ml_probability": 0.98848, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2717,7 +2667,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99698, + "ml_probability": 0.99372, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2742,7 +2692,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99697, + "ml_probability": 0.99115, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2767,7 +2717,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99288, + "ml_probability": 0.9926, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2792,7 +2742,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98647, + "ml_probability": 0.95203, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2817,7 +2767,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96062, + "ml_probability": 0.74213, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2842,7 +2792,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98849, + "ml_probability": 0.93994, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2867,7 +2817,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98849, + "ml_probability": 0.93994, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2892,7 +2842,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99555, + "ml_probability": 0.99221, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2917,7 +2867,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98872, + "ml_probability": 0.95412, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2942,7 +2892,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9858, + "ml_probability": 0.96542, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2967,7 +2917,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97663, + "ml_probability": 0.80353, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3017,7 +2967,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97658, + "ml_probability": 0.9681, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3067,7 +3017,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98485, + "ml_probability": 0.97278, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3092,7 +3042,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9931, + "ml_probability": 0.89492, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3117,132 +3067,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98537, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "id:master@example.com,pw:IHQSB1GG!", - "line_num": 102, - "path": "tests/samples/doc_id_pair_passwd_pair", - "info": "tests/samples/doc_id_pair_passwd_pair|RAW", - "value": "IHQSB1GG!", - "value_start": 25, - "value_end": 34, - "variable": "master@example.com,pw", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.595488890170944, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.82012, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/PW:master/iPp0@GRq", - "line_num": 1, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp0@GRq", - "value_start": 6, - "value_end": 21, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.853, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/Password:master/iPp2@GRq", - "line_num": 3, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp2@GRq", - "value_start": 12, - "value_end": 27, - "variable": "Password", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93163, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/Pass:master/iPp3@GRq", - "line_num": 4, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp3@GRq", - "value_start": 8, - "value_end": 23, - "variable": "Pass", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94939, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/PW=master/iPp5@GRq", - "line_num": 6, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp5@GRq", - "value_start": 6, - "value_end": 21, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96971, + "ml_probability": 0.95141, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3267,7 +3092,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9804, + "ml_probability": 0.94637, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3314,56 +3139,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.81258, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "\uc544\uc774\ub514/PW:master/iPp16@GRq", - "line_num": 17, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp16@GRq", - "value_start": 7, - "value_end": 23, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90956, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "\uacc4\uc815/PW:master/iPp17@GRq", - "line_num": 18, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp17@GRq", - "value_start": 6, - "value_end": 22, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -3392,32 +3167,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86488, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "98.76.54.32 id/pw:master/iPp19@GRq", - "line_num": 20, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp19@GRq", - "value_start": 18, - "value_end": 34, - "variable": "pw", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.88303, + "ml_probability": 0.64725, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3442,57 +3192,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83284, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/PWD:master/iPp21@GRq", - "line_num": 22, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp21@GRq", - "value_start": 7, - "value_end": 23, - "variable": "PWD", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83065, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "user/pwd:master/iPp22@GRq", - "line_num": 23, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp22@GRq", - "value_start": 9, - "value_end": 25, - "variable": "pwd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.625, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98612, + "ml_probability": 0.77219, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3517,7 +3217,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97221, + "ml_probability": 0.89744, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3542,32 +3242,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94576, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/PW:master/iPp28@GRq", - "line_num": 29, - "path": "tests/samples/doc_id_passwd_pair", - "info": "tests/samples/doc_id_passwd_pair|RAW", - "value": "master/iPp28@GRq", - "value_start": 6, - "value_end": 22, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.97248, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3592,7 +3267,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6758, + "ml_probability": 0.83211, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3617,7 +3292,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9785, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3642,7 +3317,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.95095, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3667,7 +3342,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9891, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3692,7 +3367,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.96869, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3717,7 +3392,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9862, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3767,7 +3442,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.75257, + "ml_probability": 0.98415, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3792,7 +3467,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6758, + "ml_probability": 0.92058, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3817,7 +3492,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.96353, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3842,7 +3517,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.98606, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3867,7 +3542,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.9317, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3892,7 +3567,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.99035, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3917,7 +3592,32 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.76605, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "Password:Prl23Db#@,pwd=Prl23Db#@", + "line_num": 32, + "path": "tests/samples/doc_passwd_pair", + "info": "tests/samples/doc_passwd_pair|RAW", + "value": "Prl23Db#@,pwd=Prl23Db#@", + "value_start": 9, + "value_end": 32, + "variable": "Password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.931483269957663, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98411, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3942,7 +3642,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.98308, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3967,7 +3667,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.93944, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3992,7 +3692,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.9219, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4017,7 +3717,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.94673, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4042,7 +3742,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.99169, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4067,7 +3767,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.99226, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4092,7 +3792,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9922, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4117,7 +3817,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99405, + "ml_probability": 0.99781, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4142,7 +3842,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99422, + "ml_probability": 0.99411, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4167,7 +3867,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99416, + "ml_probability": 0.99745, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4192,7 +3892,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99416, + "ml_probability": 0.99745, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4217,7 +3917,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99893, + "ml_probability": 0.99944, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4242,7 +3942,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99885, + "ml_probability": 0.99933, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4267,7 +3967,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99349, + "ml_probability": 0.99836, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4292,7 +3992,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99863, + "ml_probability": 0.99931, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4317,7 +4017,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99902, + "ml_probability": 0.99866, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4342,7 +4042,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99908, + "ml_probability": 0.99897, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4367,7 +4067,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99572, + "ml_probability": 0.98914, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4392,7 +4092,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99809, + "ml_probability": 0.99814, "rule": "API", "severity": "medium", "confidence": "moderate", @@ -4417,7 +4117,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99809, + "ml_probability": 0.99814, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4442,7 +4142,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96195, + "ml_probability": 0.98817, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4467,7 +4167,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99482, + "ml_probability": 0.99834, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4492,7 +4192,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99482, + "ml_probability": 0.99834, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4517,7 +4217,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97555, + "ml_probability": 0.98394, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4542,7 +4242,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9969, + "ml_probability": 0.99717, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4567,7 +4267,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9969, + "ml_probability": 0.99717, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4592,7 +4292,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99157, + "ml_probability": 0.99626, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4617,7 +4317,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99506, + "ml_probability": 0.99912, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4642,7 +4342,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95206, + "ml_probability": 0.98197, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -4667,7 +4367,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95206, + "ml_probability": 0.98197, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4767,7 +4467,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85092, + "ml_probability": 0.97276, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4792,7 +4492,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.91921, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4817,7 +4517,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7574, + "ml_probability": 0.96022, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4867,7 +4567,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78754, + "ml_probability": 0.98548, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4917,7 +4617,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99913, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4967,7 +4667,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99924, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5042,7 +4742,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99691, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5067,7 +4767,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78926, + "ml_probability": 0.85317, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5117,7 +4817,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7517, + "ml_probability": 0.91066, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5167,7 +4867,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98813, + "ml_probability": 0.95322, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5192,7 +4892,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.96597, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5217,7 +4917,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.9762, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5267,7 +4967,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78754, + "ml_probability": 0.98493, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5317,7 +5017,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78754, + "ml_probability": 0.97065, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5342,7 +5042,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.98331, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5392,7 +5092,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99901, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5442,7 +5142,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99388, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5467,7 +5167,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99714, + "ml_probability": 0.99155, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5492,7 +5192,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92567, + "ml_probability": 0.80778, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -5517,7 +5217,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92567, + "ml_probability": 0.80778, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -5567,7 +5267,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9819, + "ml_probability": 0.98799, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5617,7 +5317,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7517, + "ml_probability": 0.83698, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5667,7 +5367,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99524, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5692,7 +5392,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99514, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5742,17 +5442,17 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78926, + "ml_probability": 0.92603, "rule": "Password", "severity": "medium", "confidence": "moderate", "line_data_list": [ { - "line": "id:xxxx(ANYpw:IhqSb1Gg)", + "line": "id:xxxx(ANYpw:IhqSb1Ga)", "line_num": 46, "path": "tests/samples/doc_various", "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg)", + "value": "IhqSb1Ga)", "value_start": 14, "value_end": 23, "variable": "ANYpw", @@ -5792,7 +5492,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99334, + "ml_probability": 0.89208, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5917,7 +5617,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7517, + "ml_probability": 0.9446, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5942,7 +5642,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7574, + "ml_probability": 0.90454, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5992,7 +5692,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98522, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6042,7 +5742,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99809, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6067,7 +5767,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99768, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6092,7 +5792,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.70183, + "ml_probability": 0.98499, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6117,7 +5817,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99273, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6142,7 +5842,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92685, + "ml_probability": 0.9906, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6189,6 +5889,31 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.72512, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "(ID&PWD):master/IhqSb1Gg", + "line_num": 71, + "path": "tests/samples/doc_various", + "info": "tests/samples/doc_various|RAW", + "value": "master/IhqSb1Gg", + "value_start": 9, + "value_end": 24, + "variable": "PWD)", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.906890595608518, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -6217,7 +5942,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85092, + "ml_probability": 0.97276, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6267,7 +5992,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99334, + "ml_probability": 0.86693, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6317,7 +6042,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9819, + "ml_probability": 0.96165, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6342,7 +6067,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99082, + "ml_probability": 0.99898, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6417,7 +6142,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.99288, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6442,7 +6167,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98185, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6492,7 +6217,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98044, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6542,7 +6267,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.9976, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6617,7 +6342,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99261, + "ml_probability": 0.99285, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6717,7 +6442,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.99448, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6817,7 +6542,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99751, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6867,7 +6592,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99652, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6917,7 +6642,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99617, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6967,7 +6692,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.98978, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7042,7 +6767,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.99415, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7092,7 +6817,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99783, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7117,7 +6842,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.9983, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7167,7 +6892,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99943, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7192,7 +6917,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.94807, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7217,7 +6942,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98603, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7292,7 +7017,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78636, + "ml_probability": 0.98327, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7430,32 +7155,7 @@ "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", "value_start": 28, "value_end": 70, - "variable": null, - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, - "valid": true - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", - "line_num": 1, - "path": "tests/samples/facebook_key", - "info": "tests/samples/facebook_key|RAW", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", - "value_start": 28, - "value_end": 70, - "variable": "GI_REO_GI_FACEBOOK_TOKEN", + "variable": null, "entropy_validation": { "iterator": "BASE64_CHARS", "entropy": 4.766968315481371, @@ -7567,7 +7267,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99798, + "ml_probability": 0.9999, "rule": "Github Old Token", "severity": "high", "confidence": "moderate", @@ -7592,7 +7292,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99798, + "ml_probability": 0.9999, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -7919,56 +7619,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Auth", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", - "line_num": 1, - "path": "tests/samples/google_oauth_key", - "info": "tests/samples/google_oauth_key|RAW", - "value": "ya29.gi_reo_gi_crackle_ln22", - "value_start": 20, - "value_end": 47, - "variable": "google_oauth_key", - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.1797273164975133, - "valid": true - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Key", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", - "line_num": 1, - "path": "tests/samples/google_oauth_key", - "info": "tests/samples/google_oauth_key|RAW", - "value": "ya29.gi_reo_gi_crackle_ln22", - "value_start": 20, - "value_end": 47, - "variable": "google_oauth_key", - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.1797273164975133, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -8021,8 +7671,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.99849, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -8372,7 +8022,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.98178, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8497,7 +8147,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99105, + "ml_probability": 0.98196, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -8522,7 +8172,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99911, + "ml_probability": 0.95345, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -8547,7 +8197,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99907, + "ml_probability": 0.99918, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -8572,7 +8222,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99958, + "ml_probability": 0.9992, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -8597,7 +8247,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99955, + "ml_probability": 0.9993, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -8622,7 +8272,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98676, + "ml_probability": 0.96582, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -8647,7 +8297,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98973, + "ml_probability": 0.99946, "rule": "API", "severity": "medium", "confidence": "moderate", @@ -8672,7 +8322,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98973, + "ml_probability": 0.99946, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -8812,7 +8462,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99792, + "ml_probability": 0.9987, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -8902,7 +8552,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99792, + "ml_probability": 0.9987, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -8927,7 +8577,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9812, + "ml_probability": 0.99839, "rule": "Nonce", "severity": "medium", "confidence": "moderate", @@ -9002,7 +8652,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98995, + "ml_probability": 0.99062, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9027,7 +8677,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99448, + "ml_probability": 0.99437, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9052,57 +8702,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92289, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "def connect(passwd: str = \"cq2tPr1a2\"): # python default arg", - "line_num": 4, - "path": "tests/samples/pass_valid", - "info": "tests/samples/pass_valid|RAW", - "value": "cq2tPr1a2", - "value_start": 27, - "value_end": 36, - "variable": "passwd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.9477027792200903, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96972, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "if passworsd == \"q4c1a2oPd\": # __eq__ separator", - "line_num": 5, - "path": "tests/samples/pass_valid", - "info": "tests/samples/pass_valid|RAW", - "value": "q4c1a2oPd", - "value_start": 17, - "value_end": 26, - "variable": "passworsd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.169925001442312, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99646, + "ml_probability": 0.97424, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9127,7 +8727,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99933, + "ml_probability": 0.9998, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9152,7 +8752,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99674, + "ml_probability": 0.99048, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9177,7 +8777,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99674, + "ml_probability": 0.99164, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9202,7 +8802,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.66622, + "ml_probability": 0.88851, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9227,7 +8827,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.98178, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9252,7 +8852,32 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.99745, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "password = \"MYPSWRD!@#$%^&*\"", + "line_num": 1, + "path": "tests/samples/password.tfvars", + "info": "tests/samples/password.tfvars|RAW", + "value": "MYPSWRD!@#$%^&*", + "value_start": 12, + "value_end": 27, + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 1.8232156112839757, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98178, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9277,7 +8902,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99077, + "ml_probability": 0.93797, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9302,7 +8927,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.70641, + "ml_probability": 0.80933, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9327,7 +8952,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.77103, + "ml_probability": 0.74784, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9352,7 +8977,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.68544, + "ml_probability": 0.74784, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -10534,31 +10159,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.66622, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "pwd = \"cackle!\"", - "line_num": 1, - "path": "tests/samples/pwd.gradle", - "info": "tests/samples/pwd.gradle|RAW", - "value": "cackle!", - "value_start": 7, - "value_end": 14, - "variable": "pwd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.120589933192232, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -10587,32 +10187,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99266, - "rule": "Salt", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "var Himalayan_salt = \"$hal$1te$TnnGdhednJsdQ5nfetwZ\";", - "line_num": 1, - "path": "tests/samples/salt.hs", - "info": "tests/samples/salt.hs|RAW", - "value": "$hal$1te$TnnGdhednJsdQ5nfetwZ", - "value_start": 22, - "value_end": 51, - "variable": "Himalayan_salt", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.613714857551378, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98571, + "ml_probability": 0.7833, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -10637,32 +10212,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.721, - "rule": "Salt", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "salt2 = r\"\"\"\\0x12\\0x3s\"\"\"", - "line_num": 2, - "path": "tests/samples/salt.py", - "info": "tests/samples/salt.py|RAW", - "value": "\\0x12\\0x3s", - "value_start": 12, - "value_end": 22, - "variable": "salt2", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.25754247590989, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98487, + "ml_probability": 0.90088, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -10687,7 +10237,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.69654, + "ml_probability": 0.94635, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -10712,7 +10262,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90913, + "ml_probability": 0.80989, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -10762,7 +10312,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90913, + "ml_probability": 0.80989, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -10812,7 +10362,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99934, + "ml_probability": 0.99997, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -10862,7 +10412,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98391, + "ml_probability": 0.99855, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -11212,7 +10762,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99532, + "ml_probability": 0.9996, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -11287,7 +10837,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85719, + "ml_probability": 0.99739, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -11312,7 +10862,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.89814, + "ml_probability": 0.99002, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -11337,7 +10887,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99958, + "ml_probability": 0.93902, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -11362,7 +10912,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97834, + "ml_probability": 0.97259, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -11387,7 +10937,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99981, + "ml_probability": 0.99976, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -11412,32 +10962,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99891, - "rule": "Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "token = H72gsdv2dswPneHduwhfd", - "line_num": 65, - "path": "tests/samples/test.html", - "info": "tests/samples/test.html|HTML", - "value": "H72gsdv2dswPneHduwhfd", - "value_start": 8, - "value_end": 29, - "variable": "token", - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.0981768385722708, - "valid": true - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99769, + "ml_probability": 0.99681, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -11661,8 +11186,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.9941, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -11686,8 +11211,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.9941, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -11736,8 +11261,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98435, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -11761,8 +11286,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98435, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -11787,57 +11312,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99401, - "rule": "Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "gi_reo_gi_token = \"G1Re06G1BdgNseiJDN21Z094M\"", - "line_num": 1, - "path": "tests/samples/token.toml", - "info": "tests/samples/token.toml|RAW", - "value": "G1Re06G1BdgNseiJDN21Z094M", - "value_start": 19, - "value_end": 44, - "variable": "gi_reo_gi_token", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.133660689688186, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98366, - "rule": "Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "Token-> DemoToken: Nxs094M3ed2s1Re0F4M3ed2GZ8M= <- for User : demo", - "line_num": 2, - "path": "tests/samples/token.toml", - "info": "tests/samples/token.toml|RAW", - "value": "Nxs094M3ed2s1Re0F4M3ed2GZ8M=", - "value_start": 19, - "value_end": 47, - "variable": "DemoToken", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.039148671903071, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99833, + "ml_probability": 0.99767, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -11887,7 +11362,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99877, + "ml_probability": 0.99501, "rule": "URL Credentials", "severity": "high", "confidence": "moderate", @@ -11900,7 +11375,7 @@ "value": "dh3sjr8b", "value_start": 22, "value_end": 30, - "variable": null, + "variable": "mongodb://", "entropy_validation": { "iterator": "BASE64_CHARS", "entropy": 3.0, @@ -11912,7 +11387,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99634, + "ml_probability": 0.99957, "rule": "URL Credentials", "severity": "high", "confidence": "moderate", @@ -11925,7 +11400,7 @@ "value": "5WdF4f2jE76a", "value_start": 55, "value_end": 67, - "variable": null, + "variable": "dbconnection://", "entropy_validation": { "iterator": "BASE64_CHARS", "entropy": 3.584962500721156, @@ -11962,7 +11437,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.87752, + "ml_probability": 0.90119, "rule": "Password", "severity": "medium", "confidence": "moderate", diff --git a/tests/data/doc.json b/tests/data/doc.json index adc664b66..60c3741c2 100644 --- a/tests/data/doc.json +++ b/tests/data/doc.json @@ -8728,11 +8728,11 @@ "confidence": "moderate", "line_data_list": [ { - "line": "id:xxxx(ANYpw:IhqSb1Gg)", + "line": "id:xxxx(ANYpw:IhqSb1Ga)", "line_num": 46, "path": "tests/samples/doc_various", "info": "tests/samples/doc_various|RAW", - "value": "IhqSb1Gg)", + "value": "IhqSb1Ga)", "value_start": 14, "value_end": 23, "variable": "pw", @@ -9842,7 +9842,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99798, + "ml_probability": 0.99929, "rule": "Github Old Token", "severity": "high", "confidence": "moderate", diff --git a/tests/data/ml_threshold.json b/tests/data/ml_threshold.json index 389648435..e8baa1d9c 100644 --- a/tests/data/ml_threshold.json +++ b/tests/data/ml_threshold.json @@ -27,7 +27,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99316, + "ml_probability": 0.9997, "rule": "API", "severity": "medium", "confidence": "moderate", @@ -102,7 +102,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99154, + "ml_probability": 0.92134, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -127,7 +127,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99315, + "ml_probability": 0.99778, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -152,7 +152,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99528, + "ml_probability": 0.99717, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -177,7 +177,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99667, + "ml_probability": 0.99902, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -202,7 +202,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99711, + "ml_probability": 0.98929, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -227,7 +227,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99711, + "ml_probability": 0.98929, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -456,8 +456,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.83144, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -531,8 +531,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.99078, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -707,7 +707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98984, + "ml_probability": 0.99899, "rule": "Certificate", "severity": "medium", "confidence": "moderate", @@ -732,7 +732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99879, + "ml_probability": 0.9974, "rule": "Credential", "severity": "medium", "confidence": "moderate", @@ -807,7 +807,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94217, + "ml_probability": 0.95881, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -832,7 +832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78111, + "ml_probability": 0.58204, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -857,7 +857,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.81375, + "ml_probability": 0.57547, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -882,7 +882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97553, + "ml_probability": 0.95202, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -907,7 +907,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98957, + "ml_probability": 0.98333, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -932,7 +932,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98178, + "ml_probability": 0.85971, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -957,7 +957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97951, + "ml_probability": 0.98429, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -982,7 +982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98937, + "ml_probability": 0.9944, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1007,7 +1007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99239, + "ml_probability": 0.98281, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1032,7 +1032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96152, + "ml_probability": 0.93926, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1057,7 +1057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94337, + "ml_probability": 0.9076, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1082,7 +1082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90819, + "ml_probability": 0.73841, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1107,7 +1107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96069, + "ml_probability": 0.97226, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1132,7 +1132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9855, + "ml_probability": 0.98255, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1157,7 +1157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99273, + "ml_probability": 0.9886, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1182,7 +1182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97227, + "ml_probability": 0.97937, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1207,7 +1207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98513, + "ml_probability": 0.97742, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1232,7 +1232,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98967, + "ml_probability": 0.98383, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1257,7 +1257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98433, + "ml_probability": 0.98626, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1282,7 +1282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96661, + "ml_probability": 0.98951, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1307,7 +1307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9157, + "ml_probability": 0.97504, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1332,7 +1332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90242, + "ml_probability": 0.78353, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1357,7 +1357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93864, + "ml_probability": 0.90892, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1382,7 +1382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99393, + "ml_probability": 0.9669, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1407,7 +1407,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98374, + "ml_probability": 0.95874, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1432,7 +1432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9868, + "ml_probability": 0.97931, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1457,7 +1457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97173, + "ml_probability": 0.94527, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1482,7 +1482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98572, + "ml_probability": 0.97648, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1507,7 +1507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96866, + "ml_probability": 0.8991, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1532,7 +1532,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97155, + "ml_probability": 0.95024, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1557,7 +1557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9814, + "ml_probability": 0.93358, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1582,7 +1582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9683, + "ml_probability": 0.97012, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1607,7 +1607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98092, + "ml_probability": 0.93757, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1632,7 +1632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98723, + "ml_probability": 0.98501, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1657,7 +1657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.995, + "ml_probability": 0.98351, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1682,7 +1682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99217, + "ml_probability": 0.99495, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1707,7 +1707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99366, + "ml_probability": 0.9885, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1732,7 +1732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98726, + "ml_probability": 0.96882, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1757,7 +1757,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98166, + "ml_probability": 0.99279, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1782,7 +1782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98929, + "ml_probability": 0.98884, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1807,7 +1807,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9941, + "ml_probability": 0.99041, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1832,7 +1832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99533, + "ml_probability": 0.99487, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1857,7 +1857,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99094, + "ml_probability": 0.98717, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1882,7 +1882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99616, + "ml_probability": 0.99065, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1907,7 +1907,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98161, + "ml_probability": 0.96843, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1957,7 +1957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93832, + "ml_probability": 0.78091, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1982,7 +1982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96866, + "ml_probability": 0.86764, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2007,7 +2007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97171, + "ml_probability": 0.8841, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2032,7 +2032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.99083, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2057,7 +2057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99749, + "ml_probability": 0.99799, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2082,7 +2082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99451, + "ml_probability": 0.99342, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2107,7 +2107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99606, + "ml_probability": 0.99672, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2132,7 +2132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99761, + "ml_probability": 0.99586, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2157,7 +2157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99759, + "ml_probability": 0.98982, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2182,7 +2182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99134, + "ml_probability": 0.97956, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2207,7 +2207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97394, + "ml_probability": 0.98157, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2232,7 +2232,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97418, + "ml_probability": 0.96885, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2257,7 +2257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98534, + "ml_probability": 0.91279, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2282,7 +2282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99043, + "ml_probability": 0.99518, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2307,7 +2307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99413, + "ml_probability": 0.99354, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2332,7 +2332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98612, + "ml_probability": 0.94077, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2357,7 +2357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9925, + "ml_probability": 0.99246, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2382,7 +2382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98443, + "ml_probability": 0.97722, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2407,7 +2407,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98844, + "ml_probability": 0.97789, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2432,7 +2432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99429, + "ml_probability": 0.99818, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2457,7 +2457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99609, + "ml_probability": 0.99392, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2482,7 +2482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99555, + "ml_probability": 0.98892, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2507,7 +2507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98078, + "ml_probability": 0.98848, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2532,7 +2532,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99698, + "ml_probability": 0.99372, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2557,7 +2557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99697, + "ml_probability": 0.99115, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2582,7 +2582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99288, + "ml_probability": 0.9926, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2607,7 +2607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98647, + "ml_probability": 0.95203, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2632,7 +2632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96062, + "ml_probability": 0.74213, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2657,7 +2657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98849, + "ml_probability": 0.93994, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2682,7 +2682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98849, + "ml_probability": 0.93994, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2707,7 +2707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99555, + "ml_probability": 0.99221, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2732,7 +2732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98872, + "ml_probability": 0.95412, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2757,7 +2757,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9858, + "ml_probability": 0.96542, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2782,7 +2782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97663, + "ml_probability": 0.80353, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2832,7 +2832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97658, + "ml_probability": 0.9681, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2882,7 +2882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98485, + "ml_probability": 0.97278, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2907,7 +2907,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9931, + "ml_probability": 0.89492, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2932,7 +2932,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98537, + "ml_probability": 0.44221, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2957,7 +2957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.82012, + "ml_probability": 0.21898, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2982,7 +2982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.853, + "ml_probability": 0.5764, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3007,7 +3007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93163, + "ml_probability": 0.40505, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3032,7 +3032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.46234, + "ml_probability": 0.35361, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3057,7 +3057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94939, + "ml_probability": 0.35132, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3082,7 +3082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96971, + "ml_probability": 0.95141, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3107,7 +3107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9804, + "ml_probability": 0.94637, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3157,7 +3157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.36505, + "ml_probability": 0.56867, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3182,7 +3182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.81258, + "ml_probability": 0.11576, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3207,7 +3207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90956, + "ml_probability": 0.30599, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3257,7 +3257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86488, + "ml_probability": 0.56475, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3282,7 +3282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.88303, + "ml_probability": 0.64725, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3307,7 +3307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83284, + "ml_probability": 0.1686, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3332,7 +3332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83065, + "ml_probability": 0.18081, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3357,7 +3357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98612, + "ml_probability": 0.77219, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3382,7 +3382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97221, + "ml_probability": 0.89744, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3407,7 +3407,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94576, + "ml_probability": 0.19146, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3432,7 +3432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.97248, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3457,7 +3457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6758, + "ml_probability": 0.83211, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3482,7 +3482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9785, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3507,7 +3507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.95095, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3532,7 +3532,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9891, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3557,7 +3557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.96869, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3582,7 +3582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9862, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3632,7 +3632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.75257, + "ml_probability": 0.98415, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3657,7 +3657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6758, + "ml_probability": 0.92058, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3682,7 +3682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.96353, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3707,7 +3707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.98606, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3732,7 +3732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.9317, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3757,7 +3757,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.00758, + "ml_probability": 0.00717, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3782,7 +3782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.11604, + "ml_probability": 0.40041, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3807,7 +3807,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.99035, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3832,7 +3832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.00654, + "ml_probability": 0.312, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3857,7 +3857,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.00839, + "ml_probability": 0.76605, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3882,7 +3882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.98411, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3907,7 +3907,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.98308, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3932,7 +3932,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.93944, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3957,7 +3957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.9219, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3982,7 +3982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.94673, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4007,7 +4007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.99169, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4032,7 +4032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.99226, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4057,7 +4057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9922, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4082,7 +4082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.31407, + "ml_probability": 0.20901, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4107,7 +4107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99405, + "ml_probability": 0.99781, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4132,7 +4132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99422, + "ml_probability": 0.99411, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4157,7 +4157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99416, + "ml_probability": 0.99745, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4182,7 +4182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99416, + "ml_probability": 0.99745, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4207,7 +4207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99893, + "ml_probability": 0.99944, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4232,7 +4232,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99885, + "ml_probability": 0.99933, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4257,7 +4257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99349, + "ml_probability": 0.99836, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4282,7 +4282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99863, + "ml_probability": 0.99931, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4307,7 +4307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99902, + "ml_probability": 0.99866, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4332,7 +4332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99908, + "ml_probability": 0.99897, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4357,7 +4357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99572, + "ml_probability": 0.98914, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4382,7 +4382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99809, + "ml_probability": 0.99814, "rule": "API", "severity": "medium", "confidence": "moderate", @@ -4407,7 +4407,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99809, + "ml_probability": 0.99814, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4432,7 +4432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96195, + "ml_probability": 0.98817, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4457,7 +4457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99482, + "ml_probability": 0.99834, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4482,7 +4482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99482, + "ml_probability": 0.99834, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4507,7 +4507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97555, + "ml_probability": 0.98394, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4532,7 +4532,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9969, + "ml_probability": 0.99717, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4557,7 +4557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9969, + "ml_probability": 0.99717, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4582,7 +4582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99157, + "ml_probability": 0.99626, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4607,7 +4607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99506, + "ml_probability": 0.99912, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4632,7 +4632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95206, + "ml_probability": 0.98197, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -4657,7 +4657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95206, + "ml_probability": 0.98197, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4682,7 +4682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.01957, + "ml_probability": 0.00319, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4704,31 +4704,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.00012, - "rule": "Key", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "\"key\":\"attached_file_0\"", - "line_num": 41, - "path": "tests/samples/doc_secret_pair", - "info": "", - "value": "attached_file_0", - "value_start": 7, - "value_end": 22, - "variable": "key", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.985971849527383, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -4807,7 +4782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85092, + "ml_probability": 0.97276, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4832,7 +4807,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.91921, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4857,7 +4832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7574, + "ml_probability": 0.96022, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4907,7 +4882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78754, + "ml_probability": 0.98548, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4957,7 +4932,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99913, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5007,7 +4982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99924, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5082,7 +5057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99691, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5107,7 +5082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78926, + "ml_probability": 0.85317, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5157,7 +5132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7517, + "ml_probability": 0.91066, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5207,7 +5182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98813, + "ml_probability": 0.95322, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5232,7 +5207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.96597, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5257,7 +5232,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.9762, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5307,7 +5282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78754, + "ml_probability": 0.98493, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5357,7 +5332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78754, + "ml_probability": 0.97065, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5382,7 +5357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.98331, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5432,7 +5407,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99901, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5482,7 +5457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99388, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5507,7 +5482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99714, + "ml_probability": 0.99155, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5532,7 +5507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92567, + "ml_probability": 0.80778, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -5557,7 +5532,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92567, + "ml_probability": 0.80778, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -5607,7 +5582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9819, + "ml_probability": 0.98799, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5657,7 +5632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7517, + "ml_probability": 0.83698, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5707,7 +5682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99524, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5732,7 +5707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99514, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5782,17 +5757,17 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78926, + "ml_probability": 0.92603, "rule": "Password", "severity": "medium", "confidence": "moderate", "line_data_list": [ { - "line": "id:xxxx(ANYpw:IhqSb1Gg)", + "line": "id:xxxx(ANYpw:IhqSb1Ga)", "line_num": 46, "path": "tests/samples/doc_various", "info": "", - "value": "IhqSb1Gg)", + "value": "IhqSb1Ga)", "value_start": 14, "value_end": 23, "variable": "ANYpw", @@ -5832,7 +5807,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99334, + "ml_probability": 0.89208, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5957,7 +5932,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7517, + "ml_probability": 0.9446, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5982,7 +5957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7574, + "ml_probability": 0.90454, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6032,7 +6007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98522, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6082,7 +6057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99809, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6107,7 +6082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99768, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6132,7 +6107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.70183, + "ml_probability": 0.98499, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6157,7 +6132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99273, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6182,7 +6157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92685, + "ml_probability": 0.9906, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6232,7 +6207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.00461, + "ml_probability": 0.22877, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6257,7 +6232,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.51005, + "ml_probability": 0.72512, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6307,7 +6282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85092, + "ml_probability": 0.97276, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6357,7 +6332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99334, + "ml_probability": 0.86693, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6407,7 +6382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9819, + "ml_probability": 0.96165, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6432,7 +6407,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99082, + "ml_probability": 0.99898, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6507,7 +6482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.99288, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6532,7 +6507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98185, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6582,7 +6557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98044, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6632,7 +6607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.9976, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6707,7 +6682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99261, + "ml_probability": 0.99285, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6807,7 +6782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.99448, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6907,7 +6882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99751, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6957,7 +6932,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99652, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7007,7 +6982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99617, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7057,7 +7032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.98978, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7132,7 +7107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.99415, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7182,7 +7157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99783, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7207,7 +7182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.9983, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7257,7 +7232,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99943, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7282,7 +7257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.94807, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7307,7 +7282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98603, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7382,7 +7357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78636, + "ml_probability": 0.98327, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7531,8 +7506,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.13134, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -7657,7 +7632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99798, + "ml_probability": 0.9999, "rule": "Github Old Token", "severity": "high", "confidence": "moderate", @@ -7682,7 +7657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99798, + "ml_probability": 0.9999, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -8009,56 +7984,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Auth", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", - "line_num": 1, - "path": "tests/samples/google_oauth_key", - "info": "", - "value": "ya29.gi_reo_gi_crackle_ln22", - "value_start": 20, - "value_end": 47, - "variable": "google_oauth_key", - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.1797273164975133, - "valid": true - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Key", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", - "line_num": 1, - "path": "tests/samples/google_oauth_key", - "info": "", - "value": "ya29.gi_reo_gi_crackle_ln22", - "value_start": 20, - "value_end": 47, - "variable": "google_oauth_key", - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.1797273164975133, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -8111,8 +8036,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.99849, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -8462,7 +8387,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.98178, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8587,7 +8512,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99105, + "ml_probability": 0.98196, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -8612,7 +8537,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99911, + "ml_probability": 0.95345, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -8637,7 +8562,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99907, + "ml_probability": 0.99918, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -8662,7 +8587,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99958, + "ml_probability": 0.9992, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -8687,7 +8612,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99955, + "ml_probability": 0.9993, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -8712,7 +8637,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98676, + "ml_probability": 0.96582, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -8852,7 +8777,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99792, + "ml_probability": 0.9987, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -8942,7 +8867,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99792, + "ml_probability": 0.9987, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -8967,7 +8892,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9812, + "ml_probability": 0.99839, "rule": "Nonce", "severity": "medium", "confidence": "moderate", @@ -9042,7 +8967,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.30171, + "ml_probability": 0.05275, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9067,7 +8992,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98995, + "ml_probability": 0.99062, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9092,7 +9017,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99448, + "ml_probability": 0.99437, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9117,7 +9042,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92289, + "ml_probability": 0.44523, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9142,7 +9067,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96972, + "ml_probability": 0.40746, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9167,7 +9092,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99646, + "ml_probability": 0.97424, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9192,7 +9117,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99933, + "ml_probability": 0.9998, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9217,7 +9142,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99674, + "ml_probability": 0.99048, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9242,7 +9167,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99674, + "ml_probability": 0.99164, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9267,7 +9192,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.66622, + "ml_probability": 0.88851, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9292,7 +9217,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.98178, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9317,7 +9242,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.24405, + "ml_probability": 0.99745, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9342,7 +9267,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.98178, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9367,7 +9292,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99077, + "ml_probability": 0.93797, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9682,7 +9607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.66622, + "ml_probability": 0.38979, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9732,7 +9657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99266, + "ml_probability": 0.0002, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -9757,7 +9682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98571, + "ml_probability": 0.7833, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -9782,7 +9707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.721, + "ml_probability": 0.29383, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -9807,7 +9732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98487, + "ml_probability": 0.90088, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -9832,7 +9757,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.69654, + "ml_probability": 0.94635, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -9857,7 +9782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.4468, + "ml_probability": 0.28329, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -10232,7 +10157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99704, + "ml_probability": 0.90542, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -10281,8 +10206,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98435, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -10306,8 +10231,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98435, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -10332,7 +10257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99401, + "ml_probability": 0.564, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -10357,7 +10282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98366, + "ml_probability": 0.16929, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -10382,7 +10307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99833, + "ml_probability": 0.99767, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -10432,7 +10357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99877, + "ml_probability": 0.99501, "rule": "URL Credentials", "severity": "high", "confidence": "moderate", @@ -10445,7 +10370,7 @@ "value": "dh3sjr8b", "value_start": 22, "value_end": 30, - "variable": null, + "variable": "mongodb://", "entropy_validation": { "iterator": "BASE64_CHARS", "entropy": 3.0, @@ -10457,7 +10382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99634, + "ml_probability": 0.99957, "rule": "URL Credentials", "severity": "high", "confidence": "moderate", @@ -10470,7 +10395,7 @@ "value": "5WdF4f2jE76a", "value_start": 55, "value_end": 67, - "variable": null, + "variable": "dbconnection://", "entropy_validation": { "iterator": "BASE64_CHARS", "entropy": 3.584962500721156, @@ -10507,7 +10432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.87752, + "ml_probability": 0.90119, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -10532,7 +10457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.00852, + "ml_probability": 0.0007, "rule": "Password", "severity": "medium", "confidence": "moderate", diff --git a/tests/data/output.json b/tests/data/output.json index b32f55041..c81d54e28 100644 --- a/tests/data/output.json +++ b/tests/data/output.json @@ -27,7 +27,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99316, + "ml_probability": 0.9997, "rule": "API", "severity": "medium", "confidence": "moderate", @@ -102,7 +102,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99154, + "ml_probability": 0.92134, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -127,7 +127,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99315, + "ml_probability": 0.99778, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -152,7 +152,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99528, + "ml_probability": 0.99717, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -177,7 +177,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99667, + "ml_probability": 0.99902, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -202,7 +202,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99711, + "ml_probability": 0.98929, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -227,7 +227,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99711, + "ml_probability": 0.98929, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -456,8 +456,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.83144, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -531,8 +531,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.99078, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -707,7 +707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98984, + "ml_probability": 0.99899, "rule": "Certificate", "severity": "medium", "confidence": "moderate", @@ -732,7 +732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99879, + "ml_probability": 0.9974, "rule": "Credential", "severity": "medium", "confidence": "moderate", @@ -807,7 +807,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94217, + "ml_probability": 0.95881, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -832,57 +832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78111, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID:master PW:dipPr11Gg!", - "line_num": 2, - "path": "tests/samples/doc_id_pair_passwd_pair", - "info": "", - "value": "dipPr11Gg!", - "value_start": 13, - "value_end": 23, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.7897352853986264, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.81375, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ANYID:master PW:dipPr12Gg!", - "line_num": 3, - "path": "tests/samples/doc_id_pair_passwd_pair", - "info": "", - "value": "dipPr12Gg!", - "value_start": 16, - "value_end": 26, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.989735285398626, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97553, + "ml_probability": 0.95202, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -907,7 +857,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98957, + "ml_probability": 0.98333, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -932,7 +882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98178, + "ml_probability": 0.85971, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -957,7 +907,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97951, + "ml_probability": 0.98429, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -982,7 +932,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98937, + "ml_probability": 0.9944, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1007,7 +957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99239, + "ml_probability": 0.98281, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1032,7 +982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96152, + "ml_probability": 0.93926, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1057,7 +1007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94337, + "ml_probability": 0.9076, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1082,7 +1032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90819, + "ml_probability": 0.73841, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1107,7 +1057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96069, + "ml_probability": 0.97226, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1132,7 +1082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9855, + "ml_probability": 0.98255, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1157,7 +1107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99273, + "ml_probability": 0.9886, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1182,7 +1132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97227, + "ml_probability": 0.97937, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1207,7 +1157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98513, + "ml_probability": 0.97742, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1232,7 +1182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98967, + "ml_probability": 0.98383, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1257,7 +1207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98433, + "ml_probability": 0.98626, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1282,7 +1232,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96661, + "ml_probability": 0.98951, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1307,7 +1257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9157, + "ml_probability": 0.97504, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1332,7 +1282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90242, + "ml_probability": 0.78353, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1357,7 +1307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93864, + "ml_probability": 0.90892, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1382,7 +1332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99393, + "ml_probability": 0.9669, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1407,7 +1357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98374, + "ml_probability": 0.95874, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1432,7 +1382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9868, + "ml_probability": 0.97931, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1457,7 +1407,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97173, + "ml_probability": 0.94527, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1482,7 +1432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98572, + "ml_probability": 0.97648, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1507,7 +1457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96866, + "ml_probability": 0.8991, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1532,7 +1482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97155, + "ml_probability": 0.95024, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1557,7 +1507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9814, + "ml_probability": 0.93358, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1582,7 +1532,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9683, + "ml_probability": 0.97012, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1607,7 +1557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98092, + "ml_probability": 0.93757, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1632,7 +1582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98723, + "ml_probability": 0.98501, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1657,7 +1607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.995, + "ml_probability": 0.98351, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1682,7 +1632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99217, + "ml_probability": 0.99495, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1707,7 +1657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99366, + "ml_probability": 0.9885, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1732,7 +1682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98726, + "ml_probability": 0.96882, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1757,7 +1707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98166, + "ml_probability": 0.99279, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1782,7 +1732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98929, + "ml_probability": 0.98884, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1807,7 +1757,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9941, + "ml_probability": 0.99041, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1832,7 +1782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99533, + "ml_probability": 0.99487, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1857,7 +1807,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99094, + "ml_probability": 0.98717, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1882,7 +1832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99616, + "ml_probability": 0.99065, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1907,7 +1857,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98161, + "ml_probability": 0.96843, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1957,7 +1907,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93832, + "ml_probability": 0.78091, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -1982,7 +1932,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96866, + "ml_probability": 0.86764, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2007,7 +1957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97171, + "ml_probability": 0.8841, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2032,7 +1982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.99083, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2057,7 +2007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99749, + "ml_probability": 0.99799, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2082,7 +2032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99451, + "ml_probability": 0.99342, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2107,7 +2057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99606, + "ml_probability": 0.99672, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2132,7 +2082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99761, + "ml_probability": 0.99586, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2157,7 +2107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99759, + "ml_probability": 0.98982, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2182,7 +2132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99134, + "ml_probability": 0.97956, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2207,7 +2157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97394, + "ml_probability": 0.98157, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2232,7 +2182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97418, + "ml_probability": 0.96885, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2257,7 +2207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98534, + "ml_probability": 0.91279, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2282,7 +2232,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99043, + "ml_probability": 0.99518, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2307,7 +2257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99413, + "ml_probability": 0.99354, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2332,7 +2282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98612, + "ml_probability": 0.94077, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2357,7 +2307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9925, + "ml_probability": 0.99246, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2382,7 +2332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98443, + "ml_probability": 0.97722, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2407,7 +2357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98844, + "ml_probability": 0.97789, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2432,7 +2382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99429, + "ml_probability": 0.99818, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2457,7 +2407,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99609, + "ml_probability": 0.99392, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2482,7 +2432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99555, + "ml_probability": 0.98892, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2507,7 +2457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98078, + "ml_probability": 0.98848, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2532,7 +2482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99698, + "ml_probability": 0.99372, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2557,7 +2507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99697, + "ml_probability": 0.99115, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2582,7 +2532,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99288, + "ml_probability": 0.9926, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2607,7 +2557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98647, + "ml_probability": 0.95203, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2632,7 +2582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96062, + "ml_probability": 0.74213, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2657,7 +2607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98849, + "ml_probability": 0.93994, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2682,7 +2632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98849, + "ml_probability": 0.93994, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2707,7 +2657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99555, + "ml_probability": 0.99221, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2732,7 +2682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98872, + "ml_probability": 0.95412, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2757,7 +2707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9858, + "ml_probability": 0.96542, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2782,7 +2732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97663, + "ml_probability": 0.80353, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2832,7 +2782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97658, + "ml_probability": 0.9681, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2882,7 +2832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98485, + "ml_probability": 0.97278, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2907,7 +2857,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9931, + "ml_probability": 0.89492, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -2932,132 +2882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98537, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "id:master@example.com,pw:IHQSB1GG!", - "line_num": 102, - "path": "tests/samples/doc_id_pair_passwd_pair", - "info": "", - "value": "IHQSB1GG!", - "value_start": 25, - "value_end": 34, - "variable": "master@example.com,pw", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.595488890170944, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.82012, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/PW:master/iPp0@GRq", - "line_num": 1, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp0@GRq", - "value_start": 6, - "value_end": 21, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.853, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/Password:master/iPp2@GRq", - "line_num": 3, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp2@GRq", - "value_start": 12, - "value_end": 27, - "variable": "Password", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.93163, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/Pass:master/iPp3@GRq", - "line_num": 4, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp3@GRq", - "value_start": 8, - "value_end": 23, - "variable": "Pass", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94939, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/PW=master/iPp5@GRq", - "line_num": 6, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp5@GRq", - "value_start": 6, - "value_end": 21, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.64643122256795, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96971, + "ml_probability": 0.95141, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3082,7 +2907,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9804, + "ml_probability": 0.94637, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3129,56 +2954,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.81258, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "\uc544\uc774\ub514/PW:master/iPp16@GRq", - "line_num": 17, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp16@GRq", - "value_start": 7, - "value_end": 23, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.90956, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "\uacc4\uc815/PW:master/iPp17@GRq", - "line_num": 18, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp17@GRq", - "value_start": 6, - "value_end": 22, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -3207,32 +2982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86488, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "98.76.54.32 id/pw:master/iPp19@GRq", - "line_num": 20, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp19@GRq", - "value_start": 18, - "value_end": 34, - "variable": "pw", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.88303, + "ml_probability": 0.64725, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3257,57 +3007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83284, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/PWD:master/iPp21@GRq", - "line_num": 22, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp21@GRq", - "value_start": 7, - "value_end": 23, - "variable": "PWD", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.83065, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "user/pwd:master/iPp22@GRq", - "line_num": 23, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp22@GRq", - "value_start": 9, - "value_end": 25, - "variable": "pwd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.625, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98612, + "ml_probability": 0.77219, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3332,7 +3032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97221, + "ml_probability": 0.89744, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3357,32 +3057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.94576, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "ID/PW:master/iPp28@GRq", - "line_num": 29, - "path": "tests/samples/doc_id_passwd_pair", - "info": "", - "value": "master/iPp28@GRq", - "value_start": 6, - "value_end": 22, - "variable": "PW", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.75, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.97248, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3407,7 +3082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6758, + "ml_probability": 0.83211, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3432,7 +3107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9785, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3457,7 +3132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.95095, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3482,7 +3157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9891, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3507,7 +3182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.96869, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3532,7 +3207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9862, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3582,7 +3257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.75257, + "ml_probability": 0.98415, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3607,7 +3282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.6758, + "ml_probability": 0.92058, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3632,7 +3307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.96353, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3657,7 +3332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.98606, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3682,7 +3357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.9317, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3707,7 +3382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.99035, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3732,7 +3407,32 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.76605, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "Password:Prl23Db#@,pwd=Prl23Db#@", + "line_num": 32, + "path": "tests/samples/doc_passwd_pair", + "info": "", + "value": "Prl23Db#@,pwd=Prl23Db#@", + "value_start": 9, + "value_end": 32, + "variable": "Password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 2.931483269957663, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98411, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3757,7 +3457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.98308, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3782,7 +3482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.93944, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3807,7 +3507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7933, + "ml_probability": 0.9219, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3832,7 +3532,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.94673, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3857,7 +3557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.99169, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3882,7 +3582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.99226, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3907,7 +3607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.86693, + "ml_probability": 0.9922, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -3932,7 +3632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99405, + "ml_probability": 0.99781, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -3957,7 +3657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99422, + "ml_probability": 0.99411, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -3982,7 +3682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99416, + "ml_probability": 0.99745, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4007,7 +3707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99416, + "ml_probability": 0.99745, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4032,7 +3732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99893, + "ml_probability": 0.99944, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4057,7 +3757,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99885, + "ml_probability": 0.99933, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4082,7 +3782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99349, + "ml_probability": 0.99836, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4107,7 +3807,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99863, + "ml_probability": 0.99931, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4132,7 +3832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99902, + "ml_probability": 0.99866, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4157,7 +3857,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99908, + "ml_probability": 0.99897, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4182,7 +3882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99572, + "ml_probability": 0.98914, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4207,7 +3907,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99809, + "ml_probability": 0.99814, "rule": "API", "severity": "medium", "confidence": "moderate", @@ -4232,7 +3932,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99809, + "ml_probability": 0.99814, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4257,7 +3957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96195, + "ml_probability": 0.98817, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4282,7 +3982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99482, + "ml_probability": 0.99834, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4307,7 +4007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99482, + "ml_probability": 0.99834, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4332,7 +4032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.97555, + "ml_probability": 0.98394, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4357,7 +4057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9969, + "ml_probability": 0.99717, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4382,7 +4082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9969, + "ml_probability": 0.99717, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -4407,7 +4107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99157, + "ml_probability": 0.99626, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4432,7 +4132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99506, + "ml_probability": 0.99912, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -4457,7 +4157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95206, + "ml_probability": 0.98197, "rule": "Auth", "severity": "medium", "confidence": "moderate", @@ -4482,7 +4182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.95206, + "ml_probability": 0.98197, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -4582,7 +4282,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85092, + "ml_probability": 0.97276, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4607,7 +4307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.91921, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4632,7 +4332,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7574, + "ml_probability": 0.96022, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4682,7 +4382,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78754, + "ml_probability": 0.98548, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4732,7 +4432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99913, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4782,7 +4482,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99924, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4857,7 +4557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99691, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4882,7 +4582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78926, + "ml_probability": 0.85317, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4932,7 +4632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7517, + "ml_probability": 0.91066, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -4982,7 +4682,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98813, + "ml_probability": 0.95322, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5007,7 +4707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.96597, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5032,7 +4732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.9762, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5082,7 +4782,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78754, + "ml_probability": 0.98493, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5132,7 +4832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78754, + "ml_probability": 0.97065, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5157,7 +4857,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.98331, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5207,7 +4907,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99901, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5257,7 +4957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99388, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5282,7 +4982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99714, + "ml_probability": 0.99155, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5307,7 +5007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92567, + "ml_probability": 0.80778, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -5332,7 +5032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92567, + "ml_probability": 0.80778, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -5382,7 +5082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9819, + "ml_probability": 0.98799, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5432,7 +5132,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7517, + "ml_probability": 0.83698, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5482,7 +5182,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99524, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5507,7 +5207,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99514, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5557,17 +5257,17 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78926, + "ml_probability": 0.92603, "rule": "Password", "severity": "medium", "confidence": "moderate", "line_data_list": [ { - "line": "id:xxxx(ANYpw:IhqSb1Gg)", + "line": "id:xxxx(ANYpw:IhqSb1Ga)", "line_num": 46, "path": "tests/samples/doc_various", "info": "", - "value": "IhqSb1Gg)", + "value": "IhqSb1Ga)", "value_start": 14, "value_end": 23, "variable": "ANYpw", @@ -5607,7 +5307,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99334, + "ml_probability": 0.89208, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5732,7 +5432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7517, + "ml_probability": 0.9446, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5757,7 +5457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.7574, + "ml_probability": 0.90454, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5807,7 +5507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98522, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5857,7 +5557,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99809, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5882,7 +5582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99768, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5907,7 +5607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.70183, + "ml_probability": 0.98499, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5932,7 +5632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99563, + "ml_probability": 0.99273, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -5957,7 +5657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92685, + "ml_probability": 0.9906, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6004,6 +5704,31 @@ } ] }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.72512, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "(ID&PWD):master/IhqSb1Gg", + "line_num": 71, + "path": "tests/samples/doc_various", + "info": "", + "value": "master/IhqSb1Gg", + "value_start": 9, + "value_end": 24, + "variable": "PWD)", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 3.906890595608518, + "valid": false + } + } + ] + }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -6032,7 +5757,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.85092, + "ml_probability": 0.97276, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6082,7 +5807,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99334, + "ml_probability": 0.86693, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6132,7 +5857,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9819, + "ml_probability": 0.96165, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6157,7 +5882,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99082, + "ml_probability": 0.99898, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6232,7 +5957,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.99288, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6257,7 +5982,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98185, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6307,7 +6032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98044, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6357,7 +6082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.9976, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6432,7 +6157,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99261, + "ml_probability": 0.99285, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6532,7 +6257,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.99448, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6632,7 +6357,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99751, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6682,7 +6407,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.99652, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6732,7 +6457,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99617, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6782,7 +6507,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.91394, + "ml_probability": 0.98978, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6857,7 +6582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.99415, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6907,7 +6632,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99783, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6932,7 +6657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.9983, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -6982,7 +6707,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99678, + "ml_probability": 0.99943, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7007,7 +6732,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.94807, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7032,7 +6757,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98975, + "ml_probability": 0.98603, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7107,7 +6832,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.78636, + "ml_probability": 0.98327, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -7254,31 +6979,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "GI_REO_GI_FACEBOOK_TOKEN = \"EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF\"", - "line_num": 1, - "path": "tests/samples/facebook_key", - "info": "", - "value": "EAACEdEose0cBAlGy7KeQ5Yna9Coup39tiYdoQ4jHF", - "value_start": 28, - "value_end": 70, - "variable": "GI_REO_GI_FACEBOOK_TOKEN", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.766968315481371, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -7382,7 +7082,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99798, + "ml_probability": 0.9999, "rule": "Github Old Token", "severity": "high", "confidence": "moderate", @@ -7407,7 +7107,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99798, + "ml_probability": 0.9999, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -7734,56 +7434,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Auth", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", - "line_num": 1, - "path": "tests/samples/google_oauth_key", - "info": "", - "value": "ya29.gi_reo_gi_crackle_ln22", - "value_start": 20, - "value_end": 47, - "variable": "google_oauth_key", - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.1797273164975133, - "valid": true - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, - "rule": "Key", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "google_oauth_key = \"ya29.gi_reo_gi_crackle_ln22\"", - "line_num": 1, - "path": "tests/samples/google_oauth_key", - "info": "", - "value": "ya29.gi_reo_gi_crackle_ln22", - "value_start": 20, - "value_end": 47, - "variable": "google_oauth_key", - "entropy_validation": { - "iterator": "BASE36_CHARS", - "entropy": 3.1797273164975133, - "valid": true - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -7836,8 +7486,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.99849, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -8187,7 +7837,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.98178, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8312,7 +7962,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99105, + "ml_probability": 0.98196, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -8337,7 +7987,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99911, + "ml_probability": 0.95345, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -8362,7 +8012,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99907, + "ml_probability": 0.99918, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -8387,7 +8037,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99958, + "ml_probability": 0.9992, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -8412,7 +8062,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99955, + "ml_probability": 0.9993, "rule": "Secret", "severity": "medium", "confidence": "moderate", @@ -8437,7 +8087,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98676, + "ml_probability": 0.96582, "rule": "Key", "severity": "medium", "confidence": "moderate", @@ -8577,7 +8227,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99792, + "ml_probability": 0.9987, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -8667,7 +8317,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99792, + "ml_probability": 0.9987, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -8692,7 +8342,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.9812, + "ml_probability": 0.99839, "rule": "Nonce", "severity": "medium", "confidence": "moderate", @@ -8767,7 +8417,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98995, + "ml_probability": 0.99062, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8792,7 +8442,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99448, + "ml_probability": 0.99437, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8817,57 +8467,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.92289, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "def connect(passwd: str = \"cq2tPr1a2\"): # python default arg", - "line_num": 4, - "path": "tests/samples/pass_valid", - "info": "", - "value": "cq2tPr1a2", - "value_start": 27, - "value_end": 36, - "variable": "passwd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.9477027792200903, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.96972, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "if passworsd == \"q4c1a2oPd\": # __eq__ separator", - "line_num": 5, - "path": "tests/samples/pass_valid", - "info": "", - "value": "q4c1a2oPd", - "value_start": 17, - "value_end": 26, - "variable": "passworsd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.169925001442312, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99646, + "ml_probability": 0.97424, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8892,7 +8492,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99933, + "ml_probability": 0.9998, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8917,7 +8517,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99674, + "ml_probability": 0.99048, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8942,7 +8542,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99674, + "ml_probability": 0.99164, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8967,7 +8567,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.66622, + "ml_probability": 0.88851, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -8992,7 +8592,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.98178, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9017,7 +8617,32 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99545, + "ml_probability": 0.99745, + "rule": "Password", + "severity": "medium", + "confidence": "moderate", + "line_data_list": [ + { + "line": "password = \"MYPSWRD!@#$%^&*\"", + "line_num": 1, + "path": "tests/samples/password.tfvars", + "info": "", + "value": "MYPSWRD!@#$%^&*", + "value_start": 12, + "value_end": 27, + "variable": "password", + "entropy_validation": { + "iterator": "BASE64_CHARS", + "entropy": 1.8232156112839757, + "valid": false + } + } + ] + }, + { + "api_validation": "NOT_AVAILABLE", + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98178, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9042,7 +8667,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99077, + "ml_probability": 0.93797, "rule": "Password", "severity": "medium", "confidence": "moderate", @@ -9354,31 +8979,6 @@ } ] }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.66622, - "rule": "Password", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "pwd = \"cackle!\"", - "line_num": 1, - "path": "tests/samples/pwd.gradle", - "info": "", - "value": "cackle!", - "value_start": 7, - "value_end": 14, - "variable": "pwd", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.120589933192232, - "valid": false - } - } - ] - }, { "api_validation": "NOT_AVAILABLE", "ml_validation": "NOT_AVAILABLE", @@ -9407,32 +9007,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99266, - "rule": "Salt", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "var Himalayan_salt = \"$hal$1te$TnnGdhednJsdQ5nfetwZ\";", - "line_num": 1, - "path": "tests/samples/salt.hs", - "info": "", - "value": "$hal$1te$TnnGdhednJsdQ5nfetwZ", - "value_start": 22, - "value_end": 51, - "variable": "Himalayan_salt", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 3.613714857551378, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98571, + "ml_probability": 0.7833, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -9457,32 +9032,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.721, - "rule": "Salt", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "salt2 = r\"\"\"\\0x12\\0x3s\"\"\"", - "line_num": 2, - "path": "tests/samples/salt.py", - "info": "", - "value": "\\0x12\\0x3s", - "value_start": 12, - "value_end": 22, - "variable": "salt2", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 2.25754247590989, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98487, + "ml_probability": 0.90088, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -9507,7 +9057,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.69654, + "ml_probability": 0.94635, "rule": "Salt", "severity": "medium", "confidence": "moderate", @@ -9882,7 +9432,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99704, + "ml_probability": 0.90542, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -9931,8 +9481,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98435, "rule": "JSON Web Token", "severity": "medium", "confidence": "moderate", @@ -9956,8 +9506,8 @@ }, { "api_validation": "NOT_AVAILABLE", - "ml_validation": "NOT_AVAILABLE", - "ml_probability": null, + "ml_validation": "VALIDATED_KEY", + "ml_probability": 0.98435, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -9982,57 +9532,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99401, - "rule": "Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "gi_reo_gi_token = \"G1Re06G1BdgNseiJDN21Z094M\"", - "line_num": 1, - "path": "tests/samples/token.toml", - "info": "", - "value": "G1Re06G1BdgNseiJDN21Z094M", - "value_start": 19, - "value_end": 44, - "variable": "gi_reo_gi_token", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.133660689688186, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.98366, - "rule": "Token", - "severity": "medium", - "confidence": "moderate", - "line_data_list": [ - { - "line": "Token-> DemoToken: Nxs094M3ed2s1Re0F4M3ed2GZ8M= <- for User : demo", - "line_num": 2, - "path": "tests/samples/token.toml", - "info": "", - "value": "Nxs094M3ed2s1Re0F4M3ed2GZ8M=", - "value_start": 19, - "value_end": 47, - "variable": "DemoToken", - "entropy_validation": { - "iterator": "BASE64_CHARS", - "entropy": 4.039148671903071, - "valid": false - } - } - ] - }, - { - "api_validation": "NOT_AVAILABLE", - "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99833, + "ml_probability": 0.99767, "rule": "Token", "severity": "medium", "confidence": "moderate", @@ -10082,7 +9582,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99877, + "ml_probability": 0.99501, "rule": "URL Credentials", "severity": "high", "confidence": "moderate", @@ -10095,7 +9595,7 @@ "value": "dh3sjr8b", "value_start": 22, "value_end": 30, - "variable": null, + "variable": "mongodb://", "entropy_validation": { "iterator": "BASE64_CHARS", "entropy": 3.0, @@ -10107,7 +9607,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.99634, + "ml_probability": 0.99957, "rule": "URL Credentials", "severity": "high", "confidence": "moderate", @@ -10120,7 +9620,7 @@ "value": "5WdF4f2jE76a", "value_start": 55, "value_end": 67, - "variable": null, + "variable": "dbconnection://", "entropy_validation": { "iterator": "BASE64_CHARS", "entropy": 3.584962500721156, @@ -10157,7 +9657,7 @@ { "api_validation": "NOT_AVAILABLE", "ml_validation": "VALIDATED_KEY", - "ml_probability": 0.87752, + "ml_probability": 0.90119, "rule": "Password", "severity": "medium", "confidence": "moderate", diff --git a/tests/ml_model/test_ml_validator.py b/tests/ml_model/test_ml_validator.py index 76a209e80..e469ef261 100644 --- a/tests/ml_model/test_ml_validator.py +++ b/tests/ml_model/test_ml_validator.py @@ -1,18 +1,23 @@ +import copy import unittest +from typing import Tuple + +import numpy as np from credsweeper import ThresholdPreset from credsweeper.app import APP_PATH from credsweeper.config import Config -from credsweeper.credentials import Candidate +from credsweeper.credentials import Candidate, CandidateKey from credsweeper.ml_model import MlValidator from credsweeper.utils import Util +from tests import AZ_STRING class TestMlValidator(unittest.TestCase): - def test_ml_validator_simple_p(self): - ml_validator = MlValidator(threshold=ThresholdPreset.medium) - assert ml_validator is not None + def setUp(self): + self.ml_validator = MlValidator(threshold=ThresholdPreset.medium) + assert self.ml_validator is not None file_name = APP_PATH / "secret" / "config.json" config_dict = Util.json_load(file_name) config_dict["validation"] = {} @@ -23,26 +28,66 @@ def test_ml_validator_simple_p(self): config_dict["doc"] = False config_dict["find_by_ext_list"] = [] config_dict["size_limit"] = None - config = Config(config_dict) - candidate = Candidate.get_dummy_candidate(config, "main.py", ".py", "test_info") + self.config = Config(config_dict) + + def test_ml_validator_simple_p(self): + + def validate(_candidate: Candidate) -> Tuple[bool, float]: + """Validate single credential candidate.""" + candidate_key = CandidateKey(_candidate.line_data_list[0]) + sample_as_batch = [(candidate_key, [_candidate])] + is_cred_batch, probability_batch = self.ml_validator.validate_groups(sample_as_batch, 1) + return is_cred_batch[0], probability_batch[0] + + candidate = Candidate.get_dummy_candidate(self.config, "main.py", ".py", "info") + candidate.rule_name = "Password" candidate.line_data_list[0].line = 'password="Ahga%$FiQ@Ei8"' candidate.line_data_list[0].variable = "password" + candidate.line_data_list[0].value_start = 16 + candidate.line_data_list[0].value_end = 25 candidate.line_data_list[0].value = "Ahga%$FiQ@Ei8" - decision, probability = ml_validator.validate(candidate) - self.assertAlmostEqual(probability, 0.9676, delta=0.0001) + decision, probability = validate(candidate) + self.assertAlmostEqual(probability, 0.9980274438858032, delta=0.0001) candidate.line_data_list[0].path = "sample.py" candidate.line_data_list[0].file_type = ".yaml" - decision, probability = ml_validator.validate(candidate) - self.assertAlmostEqual(probability, 0.9548, delta=0.0001) + decision, probability = validate(candidate) + self.assertAlmostEqual(probability, 0.9974609613418579, delta=0.0001) candidate.line_data_list[0].path = "test.zip" candidate.line_data_list[0].file_type = ".zip" - decision, probability = ml_validator.validate(candidate) - self.assertAlmostEqual(probability, 0.9308, delta=0.0001) + decision, probability = validate(candidate) + self.assertAlmostEqual(probability, 0.9963459372520447, delta=0.0001) candidate.line_data_list[0].path = "other.txt" candidate.line_data_list[0].file_type = ".txt" - decision, probability = ml_validator.validate(candidate) - self.assertAlmostEqual(probability, 0.8263, delta=0.0001) + decision, probability = validate(candidate) + self.assertAlmostEqual(probability, 0.9911893606185913, delta=0.0001) + + def test_subtext_n(self): + self.assertEqual("", MlValidator.subtext("", 0, 0)) + + def test_subtext_p(self): + self.assertEqual("The quick ", MlValidator.subtext(AZ_STRING, 0, 5)) + self.assertEqual("The quick ", MlValidator.subtext(AZ_STRING, 3, 5)) + self.assertEqual(" fox jumps", MlValidator.subtext(AZ_STRING, 20, 5)) + self.assertEqual("e lazy dog", MlValidator.subtext(AZ_STRING, len(AZ_STRING) - 2, 5)) + self.assertEqual("the lazy dog", MlValidator.subtext(AZ_STRING, len(AZ_STRING) - 2, 6)) + + def test_extract_features_p(self): + candidate1 = Candidate.get_dummy_candidate(self.config, "main.py", ".py", "info") + candidate1.line_data_list[0].line = 'ABC123' + candidate1.line_data_list[0].variable = "ABC" + candidate1.line_data_list[0].value_start = 3 + candidate1.line_data_list[0].value_end = 6 + candidate1.line_data_list[0].value = "123" + candidate1.rule_name = "Password" + features1 = self.ml_validator.extract_features([candidate1]) + self.assertEqual(15, np.count_nonzero(features1)) + candidate2 = copy.deepcopy(candidate1) + features2 = self.ml_validator.extract_features([candidate1, candidate2]) + self.assertEqual(15, np.count_nonzero(features2)) + candidate2.rule_name = "Secret" + features3 = self.ml_validator.extract_features([candidate1, candidate2]) + self.assertEqual(16, np.count_nonzero(features3)) diff --git a/tests/samples/doc_various b/tests/samples/doc_various index c912bbced..cdcfec8ef 100644 --- a/tests/samples/doc_various +++ b/tests/samples/doc_various @@ -43,7 +43,7 @@ ssh -p 2222 # port number - not a password password for master:IhqSb1Gg gildong.hong@98.76.54.32(master/IhqSb1Gg) ID/Pass:xxxx:master/IhqSb1Gg xxxx:master/IhqSb1Gg -id:xxxx(ANYpw:IhqSb1Gg) +id:xxxx(ANYpw:IhqSb1Ga) gildong.hong@98.76.54.32,pw:IhqSb1Gg 98.76.54.32(master/IhqSb1Gg,master/IhqSb1Gg) 98.76.54.32(master/IhqSb1Gg master/IhqSb1Gg) diff --git a/tests/test_main.py b/tests/test_main.py index 4c01070a9..19a649c4f 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -798,7 +798,7 @@ def test_param_n(self) -> None: def test_param_p(self) -> None: # internal parametrized tests for quick debug items = [ # - ("prod.py", b"secret_api_key='Ah\\tga%$FiQ@Ei8'", "secret_api_key", "Ah\\tga%$FiQ@Ei8"), # + ("prod.py", b"secret_api_key='Ahga%$FiQ@Ei8'", "secret_api_key", "Ahga%$FiQ@Ei8"), # ("x.sh", b"connect 'odbc:proto://localhost:3289/connectrfs;user=admin1;password=bdsi73hsa;super=true", "password", "bdsi73hsa"), # ("main.sh", b" otpauth://totp/alice%40google.com?secretik=JK2XPEH0BYXA3DPP&digits=8 ", "secretik",