In [106]:
!which jupyter

/home/patrick/anaconda3/envs/anonym/bin/jupyter


In [107]:
!python -V

Python 3.6.7 :: Anaconda, Inc.


In [108]:
import os
import json
from pathlib import Path
from dotenv import load_dotenv

import google.auth
from google.oauth2 import service_account
from google.cloud import dlp_v2
from google.cloud.dlp_v2 import enums

## Create a service account to authenticate
https://cloud.google.com/docs/authentication/getting-started

* Select or create a GCP project.
* enable billing and DLP API
* Generate the JSON file for authentication (`service_account_credentials.json`)


### Get the required permissions
https://cloud.google.com/dlp/docs/quickstart-json

Inspect content requires the `serviceusage.services.use` permission for the project that's specified in parent. The `roles/editor`, `roles/owner`, and `roles.dlp.user` roles contain the required permission 

#### Programmatically

1. To add a single binding to the project's Cloud IAM policy, type the following command. Replace project-id with your _**project ID**_.

`gcloud projects add-iam-policy-binding project-id --member group:test@example.com --role roles/dlp.user`

2. The command writes the updated policy to the console window:

```
bindings:
- members:
  - group:test@example.com
    role: roles/dlp.user
 ```

In [93]:
# Store and read authentication credentials
env_path = Path('.') / 'env_var'
load_dotenv(dotenv_path=env_path, verbose=True)

# where `env_var` contains your credentials:
# REDIS_ADDRESS=localhost:6379
# MEANING_OF_LIFE=42
# MULTILINE_VAR="hello\nworld"
# JSON_CREDENTIALS="/home/patrick/.../service_account_credentials.json"

PATH_JSON_CREDENTIALS = os.getenv("JSON_CREDENTIALS")
print(PATH_JSON_CREDENTIALS)
json_credentials=open(PATH_JSON_CREDENTIALS).read()
# print(json_data)
data = json.loads(json_credentials)
print(data["project_id"])

/home/patrick/Documents/COMPUTAS/dev/pseudonymization_service_eHelse/service_account_credentials.json
computas-pseudonymisation


In [95]:
# Generate OAuth2 object for your service account from Services JSON file
credentials = service_account.Credentials.from_service_account_file(PATH_JSON_CREDENTIALS)
credentials

<google.oauth2.service_account.Credentials at 0x7f28192a6ba8>

In [96]:
# Build the service with your newly created credentials 
client = dlp_v2.DlpServiceClient(credentials=credentials)

## Inspect a string for sensitive information

In [119]:
# Data for DLP Inspection just for test, used Google Example
name = 'EMAIL_ADDRESS'
info_types_element = {'name': name}
info_types = [info_types_element]
inspect_config = {'info_types': info_types}
print(inspect_config)

type_ = 'text/plain'
value = 'My email is example@example.com.'
items_element = {'type': type_, 'value': value}
items = [items_element]
print(items)

item = {'value': 'My email is example@example.com.'}
parent = client.project_path(project_id)
response = client.inspect_content(parent, inspect_config, item)
print(response)

{'info_types': [{'name': 'EMAIL_ADDRESS'}]}
[{'type': 'text/plain', 'value': 'My email is example@example.com.'}]
result {
  findings {
    info_type {
      name: "EMAIL_ADDRESS"
    }
    likelihood: LIKELY
    location {
      byte_range {
        start: 12
        end: 31
      }
      codepoint_range {
        start: 12
        end: 31
      }
    }
    create_time {
      seconds: 1542978932
      nanos: 198000000
    }
  }
}



In [125]:
# If you want to inspect several items at once, you need to provide the data as Union[dict, ContentItem]
type_ = 'text/plain'
value = 'My email is example@example.com.'
items_element = {'type': type_, 'value': value}
items = [items_element, items_element]
parent = client.project_path(project_id)
response = client.inspect_content(parent, inspect_config, items)
print(response)

TypeError: Parameter to MergeFrom() must be instance of same class: expected google.privacy.dlp.v2.ContentItem got list.

## Inspect a fields to infer their type and sensitivity

Define your `infoType` detectors

In [104]:
# client = dlp_v2.DlpServiceClient(credentials=credentials)
min_likelihood = enums.Likelihood.POSSIBLE
info_types = [{'name': 'FIRST_NAME'}, {'name': 'LAST_NAME'}]
inspect_config = {
    'info_types': info_types,
    'min_likelihood': min_likelihood,
}

In [105]:
item = {'value': 'Robert Frost'}
parent = client.project_path(project_id)
response = client.inspect_content(parent, inspect_config, item)
print(response)

result {
  findings {
    info_type {
      name: "FIRST_NAME"
    }
    likelihood: LIKELY
    location {
      byte_range {
        end: 6
      }
      codepoint_range {
        end: 6
      }
    }
    create_time {
      seconds: 1542974273
      nanos: 53000000
    }
  }
  findings {
    info_type {
      name: "LAST_NAME"
    }
    likelihood: LIKELY
    location {
      byte_range {
        start: 7
        end: 12
      }
      codepoint_range {
        start: 7
        end: 12
      }
    }
    create_time {
      seconds: 1542974273
      nanos: 53000000
    }
  }
}



## References

### Documentation for the Google Cloud library:

* https://media.readthedocs.org/pdf/google-cloud-python/latest/google-cloud-python.pdf
* https://github.com/googleapis/google-cloud-python/blob/a9f5d17ae480757f8da07a77a2ffd665790fb5df/dlp/tests/system/gapic/v2/test_system_dlp_service_v2.py
* https://cloud.google.com/dlp/docs/inspecting-text#dlp-inspect-text-python
* https://cloud.google.com/dlp/docs/deidentify-sensitive-data