Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DLP code samples for custom info types #1524

Merged
merged 18 commits into from
Jul 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions dlp/deid.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


# [START dlp_deidentify_masking]
def deidentify_with_mask(project, string, masking_character=None,
def deidentify_with_mask(project, string, info_types, masking_character=None,
number_to_mask=0):
"""Uses the Data Loss Prevention API to deidentify sensitive data in a
string by masking it with a character.
Expand All @@ -44,6 +44,11 @@ def deidentify_with_mask(project, string, masking_character=None,
# Convert the project id into a full resource id.
parent = dlp.project_path(project)

# Construct inspect configuration dictionary
inspect_config = {
'info_types': [{'name': info_type} for info_type in info_types]
}

# Construct deidentify configuration dictionary
deidentify_config = {
'info_type_transformations': {
Expand All @@ -65,15 +70,16 @@ def deidentify_with_mask(project, string, masking_character=None,

# Call the API
response = dlp.deidentify_content(
parent, deidentify_config=deidentify_config, item=item)
parent, inspect_config=inspect_config,
deidentify_config=deidentify_config, item=item)

# Print out the results.
print(response.item.value)
# [END dlp_deidentify_masking]


# [START dlp_deidentify_fpe]
def deidentify_with_fpe(project, string, alphabet=None,
def deidentify_with_fpe(project, string, info_types, alphabet=None,
surrogate_type=None, key_name=None, wrapped_key=None):
"""Uses the Data Loss Prevention API to deidentify sensitive data in a
string using Format Preserving Encryption (FPE).
Expand Down Expand Up @@ -127,6 +133,11 @@ def deidentify_with_fpe(project, string, alphabet=None,
'name': surrogate_type
}

# Construct inspect configuration dictionary
inspect_config = {
'info_types': [{'name': info_type} for info_type in info_types]
}

# Construct deidentify configuration dictionary
deidentify_config = {
'info_type_transformations': {
Expand All @@ -146,7 +157,8 @@ def deidentify_with_fpe(project, string, alphabet=None,

# Call the API
response = dlp.deidentify_content(
parent, deidentify_config=deidentify_config, item=item)
parent, inspect_config=inspect_config,
deidentify_config=deidentify_config, item=item)

# Print results
print(response.item.value)
Expand Down Expand Up @@ -404,6 +416,13 @@ def write_data(data):
'deid_mask',
help='Deidentify sensitive data in a string by masking it with a '
'character.')
mask_parser.add_argument(
'--info_types', action='append',
help='Strings representing info types to look for. A full list of '
'info categories and types is available from the API. Examples '
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
'If unspecified, the three above examples will be used.',
default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
mask_parser.add_argument(
'project',
help='The Google Cloud project id to use as a parent resource.')
Expand All @@ -423,6 +442,13 @@ def write_data(data):
'deid_fpe',
help='Deidentify sensitive data in a string using Format Preserving '
'Encryption (FPE).')
fpe_parser.add_argument(
'--info_types', action='append',
help='Strings representing info types to look for. A full list of '
'info categories and types is available from the API. Examples '
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
'If unspecified, the three above examples will be used.',
default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
fpe_parser.add_argument(
'project',
help='The Google Cloud project id to use as a parent resource.')
Expand Down Expand Up @@ -532,11 +558,12 @@ def write_data(data):
args = parser.parse_args()

if args.content == 'deid_mask':
deidentify_with_mask(args.project, args.item,
deidentify_with_mask(args.project, args.item, args.info_types,
masking_character=args.masking_character,
number_to_mask=args.number_to_mask)
elif args.content == 'deid_fpe':
deidentify_with_fpe(args.project, args.item, alphabet=args.alphabet,
deidentify_with_fpe(args.project, args.item, args.info_types,
alphabet=args.alphabet,
wrapped_key=args.wrapped_key,
key_name=args.key_name,
surrogate_type=args.surrogate_type)
Expand Down
10 changes: 8 additions & 2 deletions dlp/deid_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def tempdir():


def test_deidentify_with_mask(capsys):
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING)
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'])

out, _ = capsys.readouterr()
assert 'My SSN is *********' in out
Expand All @@ -60,14 +61,17 @@ def test_deidentify_with_mask_masking_character_specified(capsys):
deid.deidentify_with_mask(
GCLOUD_PROJECT,
HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'],
masking_character='#')

out, _ = capsys.readouterr()
assert 'My SSN is #########' in out


def test_deidentify_with_mask_masking_number_specified(capsys):
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING, number_to_mask=7)
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'],
number_to_mask=7)

out, _ = capsys.readouterr()
assert 'My SSN is *******27' in out
Expand All @@ -77,6 +81,7 @@ def test_deidentify_with_fpe(capsys):
deid.deidentify_with_fpe(
GCLOUD_PROJECT,
HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'],
alphabet='NUMERIC',
wrapped_key=WRAPPED_KEY,
key_name=KEY_NAME)
Expand All @@ -90,6 +95,7 @@ def test_deidentify_with_fpe_uses_surrogate_info_types(capsys):
deid.deidentify_with_fpe(
GCLOUD_PROJECT,
HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'],
alphabet='NUMERIC',
wrapped_key=WRAPPED_KEY,
key_name=KEY_NAME,
Expand Down
Loading