Skip to content

Commit

Permalink
Tighter GH detector
Browse files Browse the repository at this point in the history
* Use GHDetectorV2

Supports git-defenders/detect-secrets-discuss#166

Cover additional token contexts in GitHub V2 detector (Yelp#183)

Turn on GHDetectorV2 (Yelp#184)

Supports git-defenders/detect-secrets-discuss#124

Delete GHDetector V1 (Yelp#186)
  • Loading branch information
justineyster committed Sep 9, 2020
1 parent 2544118 commit da33255
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 22 deletions.
4 changes: 2 additions & 2 deletions detect_secrets/core/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,8 +509,8 @@ class PluginOptions:
PluginDescriptor(
classname='GHDetector',
disable_flag_text='--no-gh-scan',
disable_help_text='Disable scanning for GH credentials',
is_default=False,
disable_help_text='Disable v2 scanner for GH credentials',
is_default=True,
),
PluginDescriptor(
classname='SoftLayerDetector',
Expand Down
53 changes: 51 additions & 2 deletions detect_secrets/plugins/gh.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,58 @@ class GHDetector(RegexBasedDetector):

secret_type = 'GitHub Credentials'

opt_github = r'(?:github|gh|ghe|git|)'
opt_space = r'(?: |)'
opt_quote = r'(?:"|\'|)'
opt_assignment = r'(?:=|:|:=|=>|)'
opt_dash_undrscr = r'(?:_|-|)'
opt_api = r'(?:api|)'
header_keyword = r'(?:token|bearer|Basic)'
key_or_pass = r'(?:key|pwd|password|pass|token|oauth)'
api_endpoint = r'(?:github.ibm.com|api.github.ibm.com)'
forty_hex = r'(?:(?<=\W)|(?<=^))([0-9a-f]{40})(?:(?=\W)|(?=$))'
b64_encoded_token = r'(?:(?<=\W)|(?<=^))([A-Za-z0-9+/]{55}=)(?:(?=\W)|(?=$))'
opt_username = r'(?:[a-zA-Z0-9-]+:|)'
denylist = [
# GitHub tokens (PAT & OAuth) are 40 hex characters
re.compile(r'(?:(?<=\W)|(?<=^))([0-9a-f]{40})(?:(?=\W)|(?=$))'), # 40 hex
re.compile(
r'{opt_quote}{opt_github}{opt_dash_undrscr}{opt_api}{opt_dash_undrscr}{key_or_pass}'
'{opt_quote}{opt_space}{opt_assignment}{opt_space}{opt_quote}{forty_hex}'
'{opt_quote}'.format(
opt_quote=opt_quote,
opt_github=opt_github,
opt_dash_undrscr=opt_dash_undrscr,
opt_api=opt_api,
key_or_pass=key_or_pass,
opt_space=opt_space,
opt_assignment=opt_assignment,
forty_hex=forty_hex,
), flags=re.IGNORECASE,
),
re.compile(
r'https://{opt_username}{forty_hex}@{api_endpoint}'.format(
forty_hex=forty_hex,
api_endpoint=api_endpoint,
opt_username=opt_username,
), flags=re.IGNORECASE,
),
re.compile(
r'{opt_quote}Authorization{opt_quote}{opt_space}:{opt_space}{opt_quote}'
'{header_keyword}{opt_space}{forty_hex}{opt_quote}'.format(
opt_quote=opt_quote,
opt_space=opt_space,
header_keyword=header_keyword,
forty_hex=forty_hex,
), flags=re.IGNORECASE,
),
re.compile(
r'{opt_quote}Authorization{opt_quote}{opt_space}:{opt_space}{opt_quote}'
'Basic{opt_space}{b64_encoded_token}{opt_quote}'.format(
opt_quote=opt_quote,
opt_space=opt_space,
header_keyword=header_keyword,
b64_encoded_token=b64_encoded_token,
), flags=re.IGNORECASE,
),
]

def verify(self, token):
Expand Down
11 changes: 5 additions & 6 deletions detect_secrets/plugins/softlayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,23 @@ class SoftLayerDetector(RegexBasedDetector):
sl = r'(?:softlayer|sl)'
opt_dash_undrscr = r'(?:_|-|)'
opt_api = r'(?:api|)'
key_or_pass = r'(?:key|pwd|password|pass)'
key_or_pass = r'(?:key|pwd|password|pass|token)'
opt_space = r'(?: |)'
opt_equals = r'(?:=|:|:=|=>|)'
opt_assignment = r'(?:=|:|:=|=>|)'
secret = r'([a-z0-9]{64})'
denylist = [
re.compile(
r'{opt_quote}{opt_dashes}{sl}{opt_dash_undrscr}{opt_api}{opt_dash_undrscr}{key_or_pass}'
'{opt_quote}{opt_space}{opt_equals}{opt_space}{opt_quote}{secret}{opt_quote}'.format(
'{opt_quote}{opt_space}{opt_assignment}{opt_space}{opt_quote}{secret}'
'{opt_quote}'.format(
opt_quote=opt_quote,
opt_dashes=opt_dashes,
sl=sl,
opt_dash_undrscr=opt_dash_undrscr,
opt_api=opt_api,
key_or_pass=key_or_pass,
opt_space=opt_space,
opt_equals=opt_equals,
opt_assignment=opt_assignment,
secret=secret,
), flags=re.IGNORECASE,
),
Expand Down Expand Up @@ -60,7 +61,6 @@ def get_username(content):
opt_api = r'(?:api|)'
username_keyword = r'(?:username|id|user|userid|user-id|user-name|name|user_id|user_name|uname)'
opt_space = r'(?: |)'
opt_equals = r'(?:=|:|:=|=>|)'
seperator = r'(?: |=|:|:=|=>)+'
username = r'(\w(?:\w|_|@|\.|-)+)'
regex = re.compile(
Expand All @@ -73,7 +73,6 @@ def get_username(content):
opt_api=opt_api,
username_keyword=username_keyword,
opt_space=opt_space,
opt_equals=opt_equals,
username=username,
seperator=seperator,
), flags=re.IGNORECASE,
Expand Down
4 changes: 4 additions & 0 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def test_scan_string_basic_default(
AWSKeyDetector : False
ArtifactoryDetector: False
BasicAuthDetector : False
GHDetector : False
PrivateKeyDetector : False
SlackDetector : False
StripeDetector : False
Expand Down Expand Up @@ -659,6 +660,9 @@ def test_scan_with_default_plugin(self):
{
'name': 'BasicAuthDetector',
},
{
'name': 'GHDetector',
},
{
'name': 'PrivateKeyDetector',
},
Expand Down
46 changes: 37 additions & 9 deletions tests/plugins/gh_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from detect_secrets.core.constants import VerifiedResult
from detect_secrets.plugins.gh import GHDetector


GHE_TOKEN = 'abcdef0123456789abcdef0123456789abcdef01'
GHE_TOKEN_BYTES = b'abcdef0123456789abcdef0123456789abcdef01'

Expand All @@ -15,15 +16,42 @@ class TestGHDetector(object):
@pytest.mark.parametrize(
'payload, should_flag',
[
('2764d47e6bf540911b7da8fe55caa9451e783549', True), # not real key
('key :53d49d5081266d939bac57a3d86c517ded974b19', True), # not real key
('53d49dnotakeyata9bac57a3d86c517ded974b19', False), # has non-hex
('a654fd9e3758a65235c765cf51e10df0c80b7a9', False), # only 39
('a654fd9e3758a65235c765cf51e10df0c80b7a923', False), # 41
('2764d47e6bf540911b7da8fe55caa9451e783549 ', True), # not real key
('2764d47e6bf540911b7da8fe55caa9451e7835492 ', False), # not real key
('2764d47e6bf540911b7da8fe55caa9451e783549_ ', False), # not real key
('2764d47e6bf540911b7da8fe55caa9451e783549z ', False), # not real key
('github-key 2764d47e6bf540911b7da8fe55caa9451e783549', True),
('github_pwd :53d49d5081266d939bac57a3d86c517ded974b19', True),
('gh-api-key=2764d47e6bf540911b7da8fe55caa9451e783549 ', True),
('git-token => "abcdef0123456789abcdef0123456789abcdef01"', True),
('"GHE_API_KEY": "abcdef0123456789abcdef0123456789abcdef01"', True),
('GITHUB_API_TOKEN := "abcdef0123456789abcdef0123456789abcdef01"', True),
('https://username:abcdef0123456789abcdef0123456789abcdef01@github.ibm.com', True,),
(
'https://username:abcdef0123456789abcdef0123456789abcdef01@'
'api.github.ibm.com', True,
),
('Authorization: token abcdef0123456789abcdef0123456789abcdef01', True),
(
'Authorization: Basic '
'YWJjZWRmYWJlZmQzMzMzMTQ1OTA4YWJjZGRmY2JkZGUxMTQ1Njc4OQo=', True,
),
('password abcdef0123456789abcdef0123456789abcdef01', True,),
('git+https://abcdef0123456789abcdef0123456789abcdef01@github.ibm.com', True,),
('sonar.github.oauth=abcdef0123456789abcdef0123456789abcdef01', True,),
(
'https://x-oauth-basic:abcdef0123456789abcdef0123456789abcdef01'
'@github.ibm.com/org/repo.git', True,
),
('abcdef0123456789abcdef0123456789abcdef01', False), # no keyword prefix
('gh-token=53d49dnotakeyata9bac57a3d86c517ded974b19', False), # has non-hex
('GIT-KEY: a654fd9e3758a65235c765cf51e10df0c80b7a9', False), # only 39
('github_api_key: a654fd9e3758a65235c765cf51e10df0c80b7a923', False), # 41
('gh_key:=2764d47e6bf540911b7da8fe55caa9451e7835492 ', False),
('github-api-token: 2764d47e6bf540911b7da8fe55caa9451e783549_ ', False),
('git_key=2764d47e6bf540911b7da8fe55caa9451e783549z ', False),
('https://<fake-username>:<fake-pass>@github.ibm.com', False),
(
'Authorization: llama '
'YWJjZWRmYWJlZmQzMzMzMTQ1OTA4YWJjZGRmY2JkZGUxMTQ1Njc4OQo=', False,
),
('Authorization: token %s', False),
],
)
def test_analyze_string(self, payload, should_flag):
Expand Down
8 changes: 5 additions & 3 deletions tests/pre_commit_hook_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,15 +242,17 @@ def test_baseline_gets_updated(
'name': 'Base64HighEntropyString',
},
{
<<<<<<< HEAD
=======
'name': 'BasicAuthDetector',
},
{
'name': 'GHDetector',
},
{
>>>>>>> Add GH detector
'name': 'GHDetectorV2',
},
{
=======
>>>>>>> Delete GHDetector V1 (#186)
'hex_limit': 3,
'name': 'HexHighEntropyString',
},
Expand Down

0 comments on commit da33255

Please sign in to comment.