Skip to content

Commit

Permalink
Use RegexBasedDetector.assign_regex_generator (Yelp#215)
Browse files Browse the repository at this point in the history
* feat: use assign regex in cloudant

* feat: use assign regex in db2

* feat: use assign regex in gh

* feat: use assign regex in iam

* feat: use assign regex in sl

* address comments

* address comments

* address comments
  • Loading branch information
XIANJUN ZHU authored and justineyster committed Jun 24, 2020
1 parent 5558ea0 commit 7b91d0e
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 221 deletions.
2 changes: 1 addition & 1 deletion detect_secrets/plugins/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def assign_regex_generator(prefix_regex, password_keyword_regex, password_regex)
opt_close_square_bracket = r'(?:\]|)'
opt_dash_undrscr = r'(?:_|-|)'
opt_space = r'(?: *)'
assignment = r'(?:=|:|:=|=>|::)'
assignment = r'(?:=|:|:=|=>| +|::)'
return re.compile(
r'{begin}{opt_open_square_bracket}{opt_quote}{prefix_regex}{opt_dash_undrscr}'
'{password_keyword_regex}{opt_quote}{opt_close_square_bracket}{opt_space}'
Expand Down
27 changes: 7 additions & 20 deletions detect_secrets/plugins/cloudant.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class CloudantDetector(RegexBasedDetector):

def verify(self, token, content, potential_secret=None):

hosts = get_host(content)
hosts = find_host(content)
if not hosts:
return VerifiedResult.UNVERIFIED

Expand All @@ -77,28 +77,15 @@ def verify(self, token, content, potential_secret=None):
return VerifiedResult.VERIFIED_FALSE


def get_host(content):

# opt means optional
opt_quote = r'(?:"|\'|)'
opt_cl = r'(?:cloudant|cl|)'
opt_dash_undrscr = r'(?:_|-|)'
def find_host(content):
opt_hostname_keyword = r'(?:hostname|host|username|id|user|userid|user-id|user-name|' \
'name|user_id|user_name|uname)'
opt_space = r'(?: |)'
assignment = r'(?:\=|:|:=|=>)+'
hostname = r'(\w(?:\w|_|-)+)'
regex = re.compile(
r'{opt_quote}{opt_cl}{opt_dash_undrscr}{opt_hostname_keyword}{opt_space}{opt_quote}'
'{assignment}{opt_space}{opt_quote}{hostname}{opt_quote}'.format(
opt_quote=opt_quote,
opt_cl=opt_cl,
opt_dash_undrscr=opt_dash_undrscr,
opt_hostname_keyword=opt_hostname_keyword,
opt_space=opt_space,
hostname=hostname,
assignment=assignment,
), flags=re.IGNORECASE,

regex = RegexBasedDetector.assign_regex_generator(
prefix_regex=CloudantDetector.cl,
password_keyword_regex=opt_hostname_keyword,
password_regex=hostname,
)

return [
Expand Down
22 changes: 4 additions & 18 deletions detect_secrets/plugins/db2.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,24 +133,10 @@ def verify_db2_credentials(


def find_other_factor(content, factor_keyword_regex, factor_regex):
begin = r'(?:(?<=\W)|(?<=^))'
opt_quote = r'(?:"|\'|)'
opt_db = r'(?:db2|dashdb|db|)'
opt_dash_undrscr = r'(?:_|-|)'
opt_space = r'(?: *)'
assignment = r'(?:=|:|:=|=>|::)'
regex = re.compile(
r'{begin}{opt_quote}{opt_db}{opt_dash_undrscr}{factor_keyword}{opt_quote}{opt_space}'
'{assignment}{opt_space}{opt_quote}{factor}{opt_quote}'.format(
begin=begin,
opt_quote=opt_quote,
opt_db=opt_db,
opt_dash_undrscr=opt_dash_undrscr,
factor_keyword=factor_keyword_regex,
opt_space=opt_space,
assignment=assignment,
factor=factor_regex,
), flags=re.IGNORECASE,
regex = RegexBasedDetector.assign_regex_generator(
prefix_regex=DB2Detector.opt_db,
password_keyword_regex=factor_keyword_regex,
password_regex=factor_regex,
)

return [
Expand Down
22 changes: 5 additions & 17 deletions detect_secrets/plugins/gh.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,20 @@ class GHDetector(RegexBasedDetector):

secret_type = 'GitHub Credentials'

opt_github = r'(?:github|gh|ghe|git|)'
opt_github_prefix = r'(?:github|gh|ghe|git|)(?:_|-|)(?:api|)'
opt_space = r'(?: *)'
opt_quote = r'(?:"|\'|)'
opt_assignment = r'(?:=|:|:=|=>|)'
opt_dash_undrscr = r'(?:_|-|)'
opt_api = r'(?:api|)'
header_keyword = r'(?:token|bearer|Basic)'
key_or_pass = r'(?:key|pwd|password|pass|token|oauth)'
api_endpoint = r'(?:github.ibm.com|api.github.ibm.com)'
forty_hex = r'(?:(?<=\W)|(?<=^))([0-9a-f]{40})(?:(?=\W)|(?=$))'
b64_encoded_token = r'(?:(?<=\W)|(?<=^))([A-Za-z0-9+/]{55}=)(?:(?=\W)|(?=$))'
opt_username = r'(?:[a-zA-Z0-9-]+:|)'
denylist = [
re.compile(
r'{opt_quote}{opt_github}{opt_dash_undrscr}{opt_api}{opt_dash_undrscr}{key_or_pass}'
'{opt_quote}{opt_space}{opt_assignment}{opt_space}{opt_quote}{forty_hex}'
'{opt_quote}'.format(
opt_quote=opt_quote,
opt_github=opt_github,
opt_dash_undrscr=opt_dash_undrscr,
opt_api=opt_api,
key_or_pass=key_or_pass,
opt_space=opt_space,
opt_assignment=opt_assignment,
forty_hex=forty_hex,
), flags=re.IGNORECASE,
RegexBasedDetector.assign_regex_generator(
prefix_regex=opt_github_prefix,
password_keyword_regex=key_or_pass,
password_regex=forty_hex,
),
re.compile(
r'https://{opt_username}{forty_hex}@{api_endpoint}'.format(
Expand Down
24 changes: 4 additions & 20 deletions detect_secrets/plugins/ibm_cloud_iam.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from __future__ import absolute_import

import re

import requests

from .base import RegexBasedDetector
Expand All @@ -13,31 +11,17 @@ class IBMCloudIAMDetector(RegexBasedDetector):
secret_type = 'IBM Cloud IAM Key'

# opt means optional
opt_quote = r'(?:"|\'|)'
opt_dashes = r'(?:--|)'
ibm_cloud_iam = r'(?:ibm(?:_|-|)cloud(?:_|-|)iam|cloud(?:_|-|)iam|' + \
r'ibm(?:_|-|)cloud|ibm(?:_|-|)iam|ibm|iam|cloud)'
opt_dash_undrscr = r'(?:_|-|)'
opt_api = r'(?:api|)'
key_or_pass = r'(?:key|pwd|password|pass|token)'
opt_space = r'(?: *)'
opt_assignment = r'(?:=|:|:=|=>|)'
secret = r'([a-zA-z0-9_\-]{44})'
denylist = [
re.compile(
r'{opt_quote}{opt_dashes}{ibm_cloud_iam}{opt_dash_undrscr}{opt_api}{opt_dash_undrscr}'
'{key_or_pass}{opt_quote}{opt_space}{opt_assignment}{opt_space}{opt_quote}'
'{secret}{opt_quote}'.format(
opt_quote=opt_quote,
opt_dashes=opt_dashes,
ibm_cloud_iam=ibm_cloud_iam,
opt_dash_undrscr=opt_dash_undrscr,
opt_api=opt_api,
key_or_pass=key_or_pass,
opt_space=opt_space,
opt_assignment=opt_assignment,
secret=secret,
), flags=re.IGNORECASE,
RegexBasedDetector.assign_regex_generator(
prefix_regex=ibm_cloud_iam + opt_dash_undrscr + opt_api,
password_keyword_regex=key_or_pass,
password_regex=secret,
),
]

Expand Down
36 changes: 18 additions & 18 deletions detect_secrets/plugins/ibm_cos_hmac.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,8 @@ class IBMCosHmacDetector(RegexBasedDetector):
),
)

def get_access_key_id(self, content):
key_id_keyword_regex = r'(?:access[-_]?(?:key)?[-_]?(?:id)?|key[-_]?id)'
key_id_regex = r'([a-f0-9]{32})'

regex = RegexBasedDetector.assign_regex_generator(
prefix_regex=self.token_prefix,
password_keyword_regex=key_id_keyword_regex,
password_regex=key_id_regex,
)

return [
match
for line in content.splitlines()
for match in regex.findall(line)
]

def verify(self, token, content, potential_secret=None):

key_id_matches = self.get_access_key_id(content)
key_id_matches = find_access_key_id(content)

if not key_id_matches:
return VerifiedResult.UNVERIFIED
Expand All @@ -65,6 +48,23 @@ def verify(self, token, content, potential_secret=None):
return VerifiedResult.VERIFIED_FALSE


def find_access_key_id(content):
key_id_keyword_regex = r'(?:access[-_]?(?:key)?[-_]?(?:id)?|key[-_]?id)'
key_id_regex = r'([a-f0-9]{32})'

regex = RegexBasedDetector.assign_regex_generator(
prefix_regex=IBMCosHmacDetector.token_prefix,
password_keyword_regex=key_id_keyword_regex,
password_regex=key_id_regex,
)

return [
match
for line in content.splitlines()
for match in regex.findall(line)
]


def hash(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()

Expand Down
58 changes: 15 additions & 43 deletions detect_secrets/plugins/softlayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,39 +11,24 @@ class SoftLayerDetector(RegexBasedDetector):
secret_type = 'SoftLayer Credentials'

# opt means optional
opt_quote = r'(?:"|\'|)'
opt_dashes = r'(?:--|)'
sl = r'(?:softlayer|sl)'
opt_dash_undrscr = r'(?:_|-|)'
opt_api = r'(?:api|)'
sl = r'(?:softlayer|sl)(?:_|-|)(?:api|)'
key_or_pass = r'(?:key|pwd|password|pass|token)'
opt_space = r'(?: *)'
opt_assignment = r'(?:=|:|:=|=>|)'
secret = r'([a-z0-9]{64})'
denylist = [
re.compile(
r'{opt_quote}{opt_dashes}{sl}{opt_dash_undrscr}{opt_api}{opt_dash_undrscr}{key_or_pass}'
'{opt_quote}{opt_space}{opt_assignment}{opt_space}{opt_quote}{secret}'
'{opt_quote}'.format(
opt_quote=opt_quote,
opt_dashes=opt_dashes,
sl=sl,
opt_dash_undrscr=opt_dash_undrscr,
opt_api=opt_api,
key_or_pass=key_or_pass,
opt_space=opt_space,
opt_assignment=opt_assignment,
secret=secret,
), flags=re.IGNORECASE,
RegexBasedDetector.assign_regex_generator(
prefix_regex=sl,
password_keyword_regex=key_or_pass,
password_regex=secret,
),

re.compile(
r'(?:http|https)://api.softlayer.com/soap/(?:v3|v3.1)/([a-z0-9]{64})',
flags=re.IGNORECASE,
),
]

def verify(self, token, content, potential_secret=None):
usernames = get_username(content)
usernames = find_username(content)
if not usernames:
return VerifiedResult.UNVERIFIED

Expand All @@ -53,30 +38,17 @@ def verify(self, token, content, potential_secret=None):
return VerifiedResult.VERIFIED_FALSE


def get_username(content):
def find_username(content):
# opt means optional
opt_quote = r'(?:"|\'|)'
opt_dashes = r'(?:--|)'
opt_sl = r'(?:softlayer|sl|)'
opt_dash_undrscr = r'(?:_|-|)'
opt_api = r'(?:api|)'
username_keyword = r'(?:username|id|user|userid|user-id|user-name|name|user_id|user_name|uname)'
opt_space = r'(?: |)'
seperator = r'(?: |=|:|:=|=>)+'
username_keyword = r'(?:username|id|user|userid|user-id|user-name|' + \
r'name|user_id|user_name|uname)'
username = r'(\w(?:\w|_|@|\.|-)+)'
regex = re.compile(
r'{opt_quote}{opt_dashes}{opt_sl}{opt_dash_undrscr}{opt_api}{opt_dash_undrscr}'
'{username_keyword}{opt_quote}{seperator}{opt_quote}{username}{opt_quote}'.format(
opt_quote=opt_quote,
opt_dashes=opt_dashes,
opt_sl=opt_sl,
opt_dash_undrscr=opt_dash_undrscr,
opt_api=opt_api,
username_keyword=username_keyword,
opt_space=opt_space,
username=username,
seperator=seperator,
), flags=re.IGNORECASE,
RegexBasedDetector.assign_regex_generator(
prefix_regex=SoftLayerDetector.sl,
password_keyword_regex=username_keyword,
password_regex=username,
),
)

return [
Expand Down
81 changes: 40 additions & 41 deletions tests/plugins/cloudant_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from detect_secrets.core.constants import VerifiedResult
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.plugins.cloudant import CloudantDetector
from detect_secrets.plugins.cloudant import get_host
from detect_secrets.plugins.cloudant import find_host

CL_HOST = 'testy_test' # also called user
# only detecting 64 hex CL generated password
Expand Down Expand Up @@ -111,50 +111,49 @@ def test_verify_no_secret(self):
'no_un={}'.format(CL_HOST),
) == VerifiedResult.UNVERIFIED


@pytest.mark.parametrize(
'content, expected_output',
(
@pytest.mark.parametrize(
'content, expected_output',
(
textwrap.dedent("""
--cloudant-hostname = {}
""")[1:-1].format(
CL_HOST,
(
textwrap.dedent("""
--cloudant-hostname = {}
""")[1:-1].format(
CL_HOST,
),
[CL_HOST],
),
[CL_HOST],
),
# With quotes
(
textwrap.dedent("""
cl_host = "{}"
""")[1:-1].format(
CL_HOST,
# With quotes
(
textwrap.dedent("""
cl_host = "{}"
""")[1:-1].format(
CL_HOST,
),
[CL_HOST],
),
[CL_HOST],
),
# multiple candidates
(
textwrap.dedent("""
cloudant_id = '{}'
cl-user = '{}'
CLOUDANT_USERID = '{}'
cloudant-uname: {}
""")[1:-1].format(
CL_HOST,
'test2_testy_test',
'test3-testy-testy',
'notanemail',
# multiple candidates
(
textwrap.dedent("""
cloudant_id = '{}'
cl-user = '{}'
CLOUDANT_USERID = '{}'
cloudant-uname: {}
""")[1:-1].format(
CL_HOST,
'test2_testy_test',
'test3-testy-testy',
'notanemail',
),
[
CL_HOST,
'test2_testy_test',
'test3-testy-testy',
'notanemail',
],
),
[
CL_HOST,
'test2_testy_test',
'test3-testy-testy',
'notanemail',
],
),
),
)
def test_get_host(content, expected_output):
assert get_host(content) == expected_output
)
def test_find_host(self, content, expected_output):
assert find_host(content) == expected_output
Loading

0 comments on commit 7b91d0e

Please sign in to comment.