Skip to content
This repository has been archived by the owner on Jun 5, 2023. It is now read-only.

Commit

Permalink
Support bindings in BigQuery rules (#1977)
Browse files Browse the repository at this point in the history
* Default non-existent fields in bigquery acl to glob to make it easier to write rules

* use dict .get for default value instead

* Update tests to use bindings

* add comment to specify which fields default to glob

* update remaining tests

* implement bindings

* uncomment tests

* fix test files

* support backwards compatibility

* make whitelists and blacklists work correctly for unset fields

* make dataset_id required

* update doc

* check for bindings earlier

* simplify applicable rules check

* bring back old bindings check, simplify has applicable rules check

* simplify get members

* lint fixes

* lint fixes

* lint fixes, merge, remove unnecessary pop

* allow only one member field to be set

* minor refactoring

* lint fixes
  • Loading branch information
umairidris authored and joecheuk committed Sep 5, 2018
1 parent af66fd5 commit b105396
Show file tree
Hide file tree
Showing 13 changed files with 293 additions and 100 deletions.
3 changes: 3 additions & 0 deletions google/cloud/forseti/common/util/regular_exp.py
Expand Up @@ -35,6 +35,9 @@ def escape_and_globify(pattern_string):
str: The pattern string, escaped except for the "*", which is
transformed into ".+" (match on one or more characters).
"""
if pattern_string is None:
return None

# pylint: enable=anomalous-backslash-in-string
if pattern_string == '*':
return '^.*$'
Expand Down
222 changes: 145 additions & 77 deletions google/cloud/forseti/scanner/audit/bigquery_rules_engine.py
Expand Up @@ -16,10 +16,7 @@
import collections
import enum
import itertools
import json

from google.cloud.forseti.common.gcp_type import (
bigquery_access_controls as bq_acls)
from google.cloud.forseti.common.gcp_type import resource_util
from google.cloud.forseti.common.gcp_type import resource as resource_mod
from google.cloud.forseti.common.util import logger
Expand All @@ -37,8 +34,14 @@ class Mode(enum.Enum):
BLACKLIST = 'blacklist'


# Rule definition wrappers.
# TODO: allow for multiple dataset ids.
RuleReference = collections.namedtuple(
'RuleReference', ['bigquery_acl', 'mode'])
'RuleReference', ['mode', 'dataset_id', 'bindings'])
Binding = collections.namedtuple('Binding', ['role', 'members'])
Member = collections.namedtuple(
'Member', ['domain', 'group_email', 'user_email', 'special_group'],
)


class BigqueryRulesEngine(bre.BaseRulesEngine):
Expand Down Expand Up @@ -124,57 +127,113 @@ def add_rules(self, rule_defs):
self.add_rule(rule, i)

@classmethod
def _build_rule(cls, rule_def, rule_index, raw_resource):
def _build_rule(cls, rule_def, rule_index):
"""Build a rule.
Args:
rule_def (dict): A dictionary containing rule definition
properties.
rule_index (int): The index of the rule from the rule definitions.
Assigned automatically when the rule book is built.
raw_resource (dict): Raw dict representing the resources the
rules apply for.
Returns:
Rule: rule for the given definition.
"""
dataset_id = rule_def.get('dataset_id', '*')
special_group = rule_def.get('special_group', '*')
user_email = rule_def.get('user_email', '*')
domain = rule_def.get('domain', '*')
group_email = rule_def.get('group_email', '*')
role = rule_def.get('role', '*')

def_mode = rule_def.get('mode')
if def_mode:
mode = Mode(def_mode)
else:
# Default mode to blacklist for backwards compatibility as that was
# the behaviour before mode was configurable.
# TODO: make mode required?
mode = Mode.BLACKLIST

rule_def_resource = RuleReference(
bigquery_acl=bq_acls.BigqueryAccessControls(
project_id='',
dataset_id=regular_exp.escape_and_globify(dataset_id),
full_name='',
special_group=regular_exp.escape_and_globify(special_group),
user_email=regular_exp.escape_and_globify(user_email),
domain=regular_exp.escape_and_globify(domain),
group_email=regular_exp.escape_and_globify(group_email),
role=regular_exp.escape_and_globify(role.upper()),
view={},
raw_json=json.dumps(raw_resource)),
mode=mode,
)
if 'dataset_id' not in rule_def:
raise audit_errors.InvalidRulesSchemaError(
'Missing dataset_id in rule {}'.format(rule_index))

dataset_id = regular_exp.escape_and_globify(rule_def['dataset_id'])

bindings = []

# TODO: stop supporting this.
binding = cls._get_binding_from_old_syntax(rule_def)
if binding:
bindings.append(binding)

# Default mode to blacklist for backwards compatibility as that was
# the behaviour before mode was configurable.
# TODO: make mode required?
mode = Mode(rule_def.get('mode', 'blacklist'))

for raw_binding in rule_def.get('bindings', []):
if 'role' not in raw_binding:
raise audit_errors.InvalidRulesSchemaError(
'Missing role in binding in rule {}'.format(rule_index))
role = regular_exp.escape_and_globify(raw_binding['role'])

if 'members' not in raw_binding:
raise audit_errors.InvalidRulesSchemaError(
'Missing members in binding in rule {}'.format(rule_index))

members = []
for raw_member in raw_binding['members']:
fields = {
field: regular_exp.escape_and_globify(raw_member.get(field))
for field in [
'domain', 'group_email', 'user_email', 'special_group'
]
}

# only one key should be set per member
num_fields_set = sum(
[val is not None for val in fields.values()]
)
if num_fields_set != 1:
raise audit_errors.InvalidRulesSchemaError(
'At most one member field may be set in rule {}'.format(
rule_index))
members.append(Member(**fields))

bindings.append(Binding(role, members))

if not bindings:
raise audit_errors.InvalidRulesSchemaError(
'Missing bindings in rule {}'.format(rule_index))

rule = Rule(rule_name=rule_def.get('name'),
rule_index=rule_index,
rules=rule_def_resource)
rule_reference=RuleReference(
dataset_id=dataset_id,
bindings=bindings,
mode=mode))

return rule

@classmethod
def _get_binding_from_old_syntax(cls, rule_def):
"""Get a binding for configs set with the old syntax.
Default fields to glob as default as that is what the fields used to be
set.
Args:
rule_def (dict): raw rule definition.
Returns:
Binding: If an old style config field is set, returns a single binding
with a single member.
"""
keys = ['role', 'domain', 'group_email', 'user_email', 'special_group']
for key in keys:
if key in rule_def:
return Binding(
role=regular_exp.escape_and_globify(
rule_def.get('role', '*')),
members=[Member(
regular_exp.escape_and_globify(
rule_def.get('domain', '*')),
regular_exp.escape_and_globify(
rule_def.get('group_email', '*')),
regular_exp.escape_and_globify(
rule_def.get('user_email', '*')),
regular_exp.escape_and_globify(
rule_def.get('special_group', '*')),
)]
)
return None

def add_rule(self, rule_def, rule_index):
"""Add a rule to the rule book.
Expand All @@ -193,8 +252,7 @@ def add_rule(self, rule_def, rule_index):
raise audit_errors.InvalidRulesSchemaError(
'Missing resource ids in rule {}'.format(rule_index))

rule = self._build_rule(
rule_def, rule_index, raw_resource)
rule = self._build_rule(rule_def, rule_index)

resource_type = raw_resource.get('type')
for resource_id in resource_ids:
Expand Down Expand Up @@ -247,35 +305,18 @@ class Rule(object):
'RuleViolation',
frozen_rule_attributes)

def __init__(self, rule_name, rule_index, rules):
def __init__(self, rule_name, rule_index, rule_reference):
"""Initialize.
Args:
rule_name (str): Name of the loaded rule.
rule_index (int): The index of the rule from the rule definitions.
rules (RuleReference): The rules from the file and corresponding
values.
rule_reference (RuleReference): The rules from the file and
corresponding values.
"""
self.rule_name = rule_name
self.rule_index = rule_index
self.rules = rules

def _is_applicable(self, bigquery_acl):
"""Determine whether the rules are applicable to the given acl.
Args:
bigquery_acl (BigqueryAccessControls): BigQuery ACL resource.
Returns:
bool: True if the rules are applicable to the given acl, False
otherwise.
"""
rule_bigquery_acl = self.rules.bigquery_acl
rule_regex_to_val = {
rule_bigquery_acl.dataset_id: bigquery_acl.dataset_id,
rule_bigquery_acl.role: bigquery_acl.role,
}

return regular_exp.all_match(rule_regex_to_val)
self.rule_reference = rule_reference

# TODO: The naming is confusing and needs to be fixed in all scanners.
def find_policy_violations(self, bigquery_acl):
Expand All @@ -287,23 +328,33 @@ def find_policy_violations(self, bigquery_acl):
Yields:
namedtuple: Returns RuleViolation named tuple.
"""
if not self._is_applicable(bigquery_acl):
return

rule_bigquery_acl = self.rules.bigquery_acl
rule_regex_to_val = {
rule_bigquery_acl.special_group: bigquery_acl.special_group,
rule_bigquery_acl.user_email: bigquery_acl.user_email,
rule_bigquery_acl.domain: bigquery_acl.domain,
rule_bigquery_acl.group_email: bigquery_acl.group_email,
}

all_matched = regular_exp.all_match(rule_regex_to_val)

has_violation = self.rules.mode == Mode.BLACKLIST and all_matched or (
self.rules.mode == Mode.WHITELIST and not all_matched)
matches = []

has_applicable_rules = False
for binding in self.rule_reference.bindings:
if not self._is_binding_applicable(binding, bigquery_acl):
continue

has_applicable_rules = True

for member in binding.members:
rule_regex_to_val = {
member.domain: bigquery_acl.domain,
member.user_email: bigquery_acl.user_email,
member.group_email: bigquery_acl.group_email,
member.special_group: bigquery_acl.special_group,
}

# only compare fields that were set
rule_regex_to_val.pop(None, None)
matches.append(regular_exp.all_match(rule_regex_to_val))

has_violation = (
self.rule_reference.mode == Mode.BLACKLIST and any(matches) or
self.rule_reference.mode == Mode.WHITELIST and not any(matches)
)

if has_violation:
if has_applicable_rules and has_violation:
yield self.RuleViolation(
resource_type=resource_mod.ResourceType.BIGQUERY,
resource_id=bigquery_acl.dataset_id,
Expand All @@ -320,3 +371,20 @@ def find_policy_violations(self, bigquery_acl):
view=bigquery_acl.view,
resource_data=bigquery_acl.json,
)

def _is_binding_applicable(self, binding, bigquery_acl):
"""Determine whether the binding is applicable to the acl.
Args:
binding (Binding): rules binding to check against.
bigquery_acl (BigqueryAccessControls): BigQuery ACL resource.
Returns:
bool: True if the rules are applicable to the given acl, False
otherwise.
"""
rule_regex_to_val = {
self.rule_reference.dataset_id: bigquery_acl.dataset_id,
binding.role: bigquery_acl.role,
}

return regular_exp.all_match(rule_regex_to_val)
24 changes: 19 additions & 5 deletions rules/bigquery_rules.yaml
Expand Up @@ -12,28 +12,42 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# The following fields will default to a catch-all glob ('*') if not present:
# - dataset_id, special_group, user_email, domain, group_email, role.
# The following optional fields can be set as binding members:
# domain, group_email, user_email, special_group.
# Only member fields that are set will be considered when searching for
# violations.

rules:
- name: BigQuery rule to search for public datasets
mode: blacklist
special_group: 'allAuthenticatedUsers'
resource:
- type: organization
resource_ids:
- {ORGANIZATION_ID}
dataset_id: '*'
bindings:
- role: '*'
members:
- special_group: 'allAuthenticatedUsers'
- name: BigQuery rule to search for datasets accessible by users with gmail.com addresses
mode: blacklist
user_email: '*@gmail.com'
resource:
- type: organization
resource_ids:
- {ORGANIZATION_ID}
dataset_id: '*'
bindings:
- role: '*'
members:
- user_email: '*@gmail.com'
- name: BigQuery rule to search for datasets accessible by groups with googlegroups.com addresses
mode: blacklist
group_email: '*@googlegroups.com'
resource:
- type: organization
resource_ids:
- {ORGANIZATION_ID}
dataset_id: '*'
bindings:
- role: '*'
members:
- group_email: '*@googlegroups.com'

0 comments on commit b105396

Please sign in to comment.