### Deny list Test

In [11]:
titles_list = [
    "Sir",
    "Ma'am",
    "Madam",
    "Mr.",
    "Mrs.",
    "Ms.",
    "Miss",
    "Dr.",
    "Professor",
]
from presidio_analyzer import PatternRecognizer

titles_recognizer = PatternRecognizer(supported_entity="TITLE", deny_list=titles_list)

text1 = "I suspect Professor Plum, in the Dining Room, with the candlestick"
result = titles_recognizer.analyze(text1, entities=["TITLE"])
print(f"Result:\n {result}")

Result:
 [type: TITLE, start: 10, end: 19, score: 1.0]


In [12]:
from presidio_analyzer import AnalyzerEngine

analyzer = AnalyzerEngine()
analyzer.registry.add_recognizer(titles_recognizer)

In [13]:
results = analyzer.analyze(text=text1, language="en")

In [14]:
print("Results:")
print(results)

Results:
[type: TITLE, start: 10, end: 19, score: 1.0, type: PERSON, start: 20, end: 24, score: 0.85, type: LOCATION, start: 29, end: 44, score: 0.85, type: DATE_TIME, start: 55, end: 66, score: 0.85]


In [15]:
print("Identified these PII entities:")
for result in results:
    print(f"- {text1[result.start:result.end]} as {result.entity_type}")

Identified these PII entities:
- Professor as TITLE
- Plum as PERSON
- the Dining Room as LOCATION
- candlestick as DATE_TIME


### Allowlist Test

In [16]:
websites_list = [
    "bing.com",
    "microsoft.com"
]

In [17]:
from presidio_analyzer import AnalyzerEngine
text1 = "My favorite website is bing.com, his is microsoft.com"
analyzer = AnalyzerEngine()
result = analyzer.analyze(text = text1, language = 'en')
print(f"Result: \n {result}")

Result: 
 [type: URL, start: 23, end: 31, score: 0.85, type: URL, start: 40, end: 53, score: 0.85]


In [18]:
result = analyzer.analyze(text = text1, language = 'en', allow_list = ["bing.com"] )
print(f"Result:\n {result}")

Result:
 [type: URL, start: 40, end: 53, score: 0.85]


In [19]:
from presidio_anonymizer import AnonymizerEngine

# Analyze text using the provided text without allow_list
result_without = analyzer.analyze(text=text1, language='en')
print("Result without allow_list:")
print(result_without)

# Analyze text with allow_list set to ["bing.com"]
result_with_allow = analyzer.analyze(text=text1, language='en', allow_list=["bing.com"])
print("\nResult with allow_list ['bing.com']:")
print(result_with_allow)

# Create anonymizer instance
anonymizer = AnonymizerEngine()

# Anonymize text based on results without allow_list
anonymized_result_without = anonymizer.anonymize(text=text1, analyzer_results=result_without)
print("\nText with masked information (without allow_list):")
print(anonymized_result_without.text)

# Anonymize text based on results with allow_list
anonymized_result_with = anonymizer.anonymize(text=text1, analyzer_results=result_with_allow)
print("\nText with masked information (with allow_list):")
print(anonymized_result_with.text)

Result without allow_list:
[type: URL, start: 23, end: 31, score: 0.85, type: URL, start: 40, end: 53, score: 0.85]

Result with allow_list ['bing.com']:
[type: URL, start: 40, end: 53, score: 0.85]

Text with masked information (without allow_list):
My favorite website is <URL>, his is <URL>

Text with masked information (with allow_list):
My favorite website is bing.com, his is <URL>
