In [None]:
# download presidio
!pip install presidio_analyzer presidio_anonymizer -q
!python -m spacy download en_core_web_lg -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


# Presidio Analyzer
- https://microsoft.github.io/presidio/samples/python/customizing_presidio_analyzer/

- The Presidio-Anonymizer package contains both Anonymizers and Deanonymizers.

- **Anonymizers** are used to replace a PII entity text with some other value.
- **Deanonymizers** are used to revert the anonymization operation. For example, to decrypt an encrypted text.
- https://github.com/microsoft/presidio/tree/main/presidio-anonymizer

In [None]:
from typing import List
import pprint

from presidio_analyzer import (
    AnalyzerEngine,
    PatternRecognizer,
    EntityRecognizer,
    Pattern,
    RecognizerResult,
)
from presidio_analyzer.recognizer_registry import RecognizerRegistry
from presidio_analyzer.nlp_engine import NlpEngine, SpacyNlpEngine, NlpArtifacts
from presidio_analyzer.context_aware_enhancers import LemmaContextAwareEnhancer

## Function to Analyze

In [None]:
# Helper method to print results nicely


def print_analyzer_results(results: List[RecognizerResult], text: str):
    """Print the results in a human readable way."""

    for i, result in enumerate(results):
        print(f"Result {i}:")
        print(f" {result}, text: {text[result.start:result.end]}")

        if result.analysis_explanation is not None:
            print(f" {result.analysis_explanation.textual_explanation}")

## Example 1: Deny-list based PII recognition¶
- In this example, we will pass a short list of tokens which should be marked as PII if detected. First, let's define the tokens we want to treat as PII. In this case it would be a list of titles:


In [None]:
titles_list = [
    "Sir",
    "Ma'am",
    "Madam",
    "Mr.",
    "Mrs.",
    "Ms.",
    "Miss",
    "Dr.",
    "Professor",
]

In [None]:
titles_recognizer = PatternRecognizer(supported_entity="TITLE", deny_list=titles_list)

In [None]:
text1 = "I suspect Professor Plum, in the Dining Room, with the candlestick"
result = titles_recognizer.analyze(text1, entities=["TITLE"])
print(f"Result:\n {result}")

Result:
 [type: TITLE, start: 10, end: 19, score: 1.0]


> Finally, let's add this new recognizer to the list of recognizers used by the Presidio AnalyzerEngine:

### Add new recgnizer via - add_recognizer

In [None]:
analyzer = AnalyzerEngine()
analyzer.registry.add_recognizer(titles_recognizer)



### Run
- Let's run the analyzer with the new recognizer in place:

In [None]:
results = analyzer.analyze(text=text1, language="en")
print_analyzer_results(results, text=text1)

Result 0:
 type: TITLE, start: 10, end: 19, score: 1.0, text: Professor
Result 1:
 type: PERSON, start: 20, end: 24, score: 0.85, text: Plum
Result 2:
 type: LOCATION, start: 29, end: 44, score: 0.85, text: the Dining Room


In [None]:
print("Identified these PII entities:")
for result in results:
    print(f"- {text1[result.start:result.end]} as {result.entity_type}")

Identified these PII entities:
- Professor as TITLE
- Plum as PERSON
- the Dining Room as LOCATION


## Example 2: Regex based PII recognition
- Another simple recognizer we can add is based on regular expressions. Let's assume we want to be extremely conservative and treat any token which contains a number as PII.

In [None]:
# Define the regex pattern in a Presidio `Pattern` object:
numbers_pattern = Pattern(name="numbers_pattern", regex="\d+", score=0.5)

# Define the recognizer with one or more patterns
number_recognizer = PatternRecognizer(
    supported_entity="NUMBER", patterns=[numbers_pattern]
)

In [None]:
text2 = "I live in 510 Broad st."

numbers_result = number_recognizer.analyze(text=text2, entities=["NUMBER"])
print("Result:")
print(numbers_result)

Result:
[type: NUMBER, start: 10, end: 13, score: 0.5]


> It's important to mention that recognizers is likely to have errors, both false-positive and false-negative, which would impact the entire performance of Presidio. Consider testing each recognizer on a representative dataset prior to integrating it into Presidio. For more info, see the best practices for developing recognizers documentation.

## Example 3: Rule based logic recognizer
- Taking the numbers recognizer one step further, let's say we also would like to detect numbers within words, e.g. "Number One". We can leverage the underlying spaCy token attributes, or write our own logic to detect such entities.

In [None]:
class MyRecognizer(EntityRecognizer):

    def load(self) -> None:
        """No loading is required."""
        pass

    def analyze(
        self, text: str, entities: List[str], nlp_artifacts: NlpArtifacts
    ) -> List[RecognizerResult]:
        """
        Logic for detecting a specific PII
        """
        pass

For example, detecting numbers in either numerical or alphabetic (e.g. Forty five) form:

In [None]:
class NumbersRecognizer(EntityRecognizer):

    expected_confidence_level = 0.7  # expected confidence level for this recognizer

    def load(self) -> None:
        """No loading is required."""
        pass

    def analyze(
        self, text: str, entities: List[str], nlp_artifacts: NlpArtifacts
    ) -> List[RecognizerResult]:
        """
        Analyzes test to find tokens which represent numbers (either 123 or One Two Three).
        """
        results = []

        # iterate over the spaCy tokens, and call `token.like_num`
        for token in nlp_artifacts.tokens:
            if token.like_num:
                result = RecognizerResult(
                    entity_type="NUMBER",
                    start=token.idx,
                    end=token.idx + len(token),
                    score=self.expected_confidence_level,
                )
                results.append(result)
        return results

In [None]:
new_numbers_recognizer = NumbersRecognizer(supported_entities=["NUMBER"])

### Add new recgnizer via - add_recognizer

In [None]:
text3 = "Roberto lives in Five 10 Broad st."
analyzer = AnalyzerEngine()
analyzer.registry.add_recognizer(new_numbers_recognizer)

numbers_results2 = analyzer.analyze(text=text3, language="en")
print_analyzer_results(numbers_results2, text=text3)



Result 0:
 type: PERSON, start: 0, end: 7, score: 0.85, text: Roberto
Result 1:
 type: LOCATION, start: 25, end: 34, score: 0.85, text: Broad st.
Result 2:
 type: NUMBER, start: 17, end: 21, score: 0.7, text: Five
Result 3:
 type: NUMBER, start: 22, end: 24, score: 0.7, text: 10


## Example 4: Calling an external service for PII detection
- In a similar way to example 3, we can write logic to call external services for PII detection. For a detailed example,
- https://microsoft.github.io/presidio/analyzer/adding_recognizers/#creating-a-remote-recognizer

This is a sample implementation of such remote recognizer.
- https://github.com/microsoft/presidio/blob/main/docs/samples/python/example_remote_recognizer.py


## Example 5: Supporting new languages
Two main parts in Presidio handle the text, and should be adapted if a new language is required:

The NlpEngine containing the NLP model which performs tokenization, lemmatization, Named Entity Recognition and other NLP tasks.
The different PII recognizers (EntityRecognizer objects) should be adapted or created.

In [None]:
from presidio_analyzer.nlp_engine import NlpEngineProvider

# import spacy
# spacy.cli.download("es_core_news_md")

# Create configuration containing engine name and models
configuration = {
    "nlp_engine_name": "spacy",
    "models": [
        {"lang_code": "es", "model_name": "es_core_news_md"},
        {"lang_code": "en", "model_name": "en_core_web_lg"},
    ],
}

# Create NLP engine based on configuration
provider = NlpEngineProvider(nlp_configuration=configuration)
nlp_engine_with_spanish = provider.create_engine()

# Pass the created NLP engine and supported_languages to the AnalyzerEngine
analyzer = AnalyzerEngine(
    nlp_engine=nlp_engine_with_spanish, supported_languages=["en", "es"]
)

# Analyze in different languages
results_spanish = analyzer.analyze(text="Mi nombre es Morris", language="es")
print("Results from Spanish request:")
print(results_spanish)

results_english = analyzer.analyze(text="My name is Morris", language="en")
print("Results from English request:")
print(results_english)

- Results from Spanish request:
[type: PERSON, start: 13, end: 19, score: 0.85]
- Results from English request:
[type: PERSON, start: 11, end: 17, score: 0.85]

## Example 6: Using context words
- Presidio has a internal mechanism for leveraging context words. This mechanism would increse the detection confidence of a PII entity in case a specific word appears before or after it.

- In this example we would first implement a zip code recognizer without context, and then add context to see how the confidence changes. Zip regex patterns (essentially 5 digits) are very week, so we would want the initial confidence to be low, and increased with the existence of context words.

- In this example we would first implement a zip code recognizer without context, and then add context to see how the confidence changes. Zip regex patterns (essentially 5 digits) are very week, so we would want the initial confidence to be low, and increased with the existence of context words.

In [None]:
# Define the regex pattern
regex = r"(\b\d{5}(?:\-\d{4})?\b)"  # very weak regex pattern
zipcode_pattern = Pattern(name="zip code (weak)", regex=regex, score=0.01)

# Define the recognizer with the defined pattern
zipcode_recognizer = PatternRecognizer(
    supported_entity="US_ZIP_CODE", patterns=[zipcode_pattern]
)

registry = RecognizerRegistry()
registry.add_recognizer(zipcode_recognizer)
analyzer = AnalyzerEngine(registry=registry)

# Test
text = "My zip code is 90210"
results = analyzer.analyze(text=text, language="en")
print_analyzer_results(results, text=text)

Result 0:
 type: US_ZIP_CODE, start: 15, end: 20, score: 0.01, text: 90210


> So this is working, but would catch any 5 digit string. This is why we set the score to 0.01. Let's use context words to increase score:

In [None]:
# Define the recognizer with the defined pattern and context words
zipcode_recognizer = PatternRecognizer(
    supported_entity="US_ZIP_CODE",
    patterns=[zipcode_pattern],
    context=["zip", "zipcode"],
)

>When creating an AnalyzerEngine we can provide our own context enhancement logic by passing it to context_aware_enhancer parameter. AnalyzerEngine will create LemmaContextAwareEnhancer by default if not passed, which will enhance score of each matched result if it's recognizer holds context words and those words are found in context of the matched entity.

In [None]:
registry = RecognizerRegistry()
registry.add_recognizer(zipcode_recognizer)
analyzer = AnalyzerEngine(registry=registry)

In [None]:
# Test
results = analyzer.analyze(text="My zip code is 90210", language="en")
print("Result:")
print_analyzer_results(results, text=text)

Result:
Result 0:
 type: US_ZIP_CODE, start: 15, end: 20, score: 0.4, text: 90210


### context_aware_enhancer, context_similarity_factor

> The confidence score is now 0.4, instead of 0.01. because LemmaContextAwareEnhancer default context similarity factor is 0.35 and default minimum score with context similarity is 0.4, we can change that by passing context_similarity_factor and min_score_with_context_similarity parameters of LemmaContextAwareEnhancer to other than values, for example:

In [None]:
registry = RecognizerRegistry()
registry.add_recognizer(zipcode_recognizer)
analyzer = AnalyzerEngine(
    registry=registry,
    context_aware_enhancer=LemmaContextAwareEnhancer(
        context_similarity_factor=0.45, min_score_with_context_similarity=0.4
    ),
)

In [None]:
# Test
results = analyzer.analyze(text="My zip code is 90210", language="en")
print("Result:")
print_analyzer_results(results, text=text)

Result:
Result 0:
 type: US_ZIP_CODE, start: 15, end: 20, score: 0.46, text: 90210


> Presidio supports passing a list of outer context in analyzer level, this is useful if the text is coming from a specific column or a specific user input etc. notice how the "zip" context word doesn't appear in the text but still enhance the confidence score from 0.01 to 0.4:

In [None]:
# Define the recognizer with the defined pattern and context words
zipcode_recognizer = PatternRecognizer(
    supported_entity="US_ZIP_CODE",
    patterns=[zipcode_pattern],
    context=["zip", "zipcode"],
)

registry = RecognizerRegistry()
registry.add_recognizer(zipcode_recognizer)
analyzer = AnalyzerEngine(registry=registry)

# Test
text = "My code is 90210"
result = analyzer.analyze(text=text, language="en", context=["zip"])
print("Result:")
print_analyzer_results(result, text=text)

Result:
Result 0:
 type: US_ZIP_CODE, start: 11, end: 16, score: 0.4, text: 90210


## Example 7: Tracing the decision process

- Presidio-analyzer's decision process exposes information on why a specific PII was detected. Such information could contain:

  - Which recognizer detected the entity
  - Which regex pattern was used
  - Interpretability mechanisms in ML models
  - Which context words improved the score
  - Confidence scores before and after each step And more.
For more information, refer to the decision process documentation.

Let's use the decision process output to understand how the zip code value was detected:

In [None]:
results = analyzer.analyze(
    text="My zip code is 90210", language="en", return_decision_process=True
)
decision_process = results[0].analysis_explanation

pp = pprint.PrettyPrinter()
print("Decision process output:\n")
pp.pprint(decision_process.__dict__)

Decision process output:

{'original_score': 0.01,
 'pattern': '(\\b\\d{5}(?:\\-\\d{4})?\\b)',
 'pattern_name': 'zip code (weak)',
 'recognizer': 'PatternRecognizer',
 'regex_flags': regex.I|M|S,
 'score': 0.4,
 'score_context_improvement': 0.39,
 'supportive_context_word': 'zip',
 'textual_explanation': 'Detected by `PatternRecognizer` using pattern `zip '
                        'code (weak)`',
 'validation_result': None}


## Example 8: passing a list of words to keep¶
- We will use the built in recognizers that include the URLRecognizer and the NLP model EntityRecognizer and see the default functionality if we don't specify any list of words for the detector to allow to keep in the text.

In [None]:
websites_list = ["bing.com", "microsoft.com"]
text1 = "Bill's favorite website is bing.com, David's is microsoft.com"
analyzer = AnalyzerEngine()
results = analyzer.analyze(text=text1, language="en", return_decision_process=True)
print_analyzer_results(results, text=text1)



Result 0:
 type: PERSON, start: 0, end: 4, score: 0.85, text: Bill
 Identified as PERSON by Spacy's Named Entity Recognition
Result 1:
 type: URL, start: 27, end: 35, score: 0.85, text: bing.com
 Detected by `UrlRecognizer` using pattern `Non schema URL`
Result 2:
 type: PERSON, start: 37, end: 42, score: 0.85, text: David
 Identified as PERSON by Spacy's Named Entity Recognition
Result 3:
 type: URL, start: 48, end: 61, score: 0.85, text: microsoft.com
 Detected by `UrlRecognizer` using pattern `Non schema URL`


In [None]:
results = analyzer.analyze(
    text=text1,
    language="en",
    allow_list=["bing.com", "google.com"],
    return_decision_process=True,
)
print_analyzer_results(results, text=text1)

Result 0:
 type: PERSON, start: 0, end: 4, score: 0.85, text: Bill
 Identified as PERSON by Spacy's Named Entity Recognition
Result 1:
 type: PERSON, start: 37, end: 42, score: 0.85, text: David
 Identified as PERSON by Spacy's Named Entity Recognition
Result 2:
 type: URL, start: 48, end: 61, score: 0.85, text: microsoft.com
 Detected by `UrlRecognizer` using pattern `Non schema URL`


# Simple Analyzer example
- Refer this for more.
- https://github.com/microsoft/presidio/tree/main/presidio-anonymizer


In [None]:
# download presidio
!pip install presidio_analyzer presidio_anonymizer -q

In [None]:
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine

##1.Detect sensitive data in text

In [None]:
# Helper method to print results nicely

def print_analyzer_results(results: List[RecognizerResult], text: str):
    """Print the results in a human readable way."""

    for i, result in enumerate(results):
        print(f"Result {i}:")
        print(f" {result}, text: {text[result.start:result.end]}")

        if result.analysis_explanation is not None:
            print(f" {result.analysis_explanation.textual_explanation}")

In [None]:
text = "My name is Prabha I born in Singapore, my phone no is 91125393 and email id prababardwaj@gmail.com myy id is 123456"

analyzer = AnalyzerEngine()

#Using predefined entities
analyzer_result = analyzer.analyze(text=text,  entities=["PHONE_NUMBER", "PERSON", "EMAIL_ADDRESS", "LOCATION"], language="en")
print_analyzer_results(analyzer_result, text=text)



Result 0:
 type: EMAIL_ADDRESS, start: 76, end: 98, score: 1.0, text: prababardwaj@gmail.com
Result 1:
 type: PERSON, start: 11, end: 17, score: 0.85, text: Prabha
Result 2:
 type: LOCATION, start: 28, end: 37, score: 0.85, text: Singapore


In [None]:
analyzer_result

[type: EMAIL_ADDRESS, start: 76, end: 98, score: 1.0,
 type: PERSON, start: 11, end: 17, score: 0.85,
 type: LOCATION, start: 28, end: 37, score: 0.85]

##2.Add Custom Entity

In [None]:
from presidio_analyzer import (
    AnalyzerEngine,
    PatternRecognizer,
    # EntityRecognizer,
    Pattern,
    # RecognizerResult,
)

In [None]:
id_pattern = Pattern(name="id_pattern", regex="\d{6}", score=0.5)
id_recognizer = PatternRecognizer(supported_entity="ID", patterns=[id_pattern], context=["id no", "ID number"])

sg_phone_pattern = Pattern(name="ph_pattern", regex="\d{8}", score=0.5) #"^[0-9]{8}$"
sg_recognizer = PatternRecognizer(supported_entity="SG_PHONE_NUMBER", patterns=[sg_phone_pattern], context=["phone no", "phone number"])

#Add custom recognizer to analyzer
analyzer.registry.add_recognizer(id_recognizer)
analyzer.registry.add_recognizer(sg_recognizer)


In [None]:
analyzer_result = analyzer.analyze(text = text, entities=["PHONE_NUMBER", "PERSON", "EMAIL_ADDRESS", "LOCATION", "ID", "SG_PHONE_NUMBER"], language="en")
# print(f"Result:\n {result}")

# analyzer_result = analyzer.analyze(text=text,  entities=["PHONE_NUMBER", "PERSON", "EMAIL_ADDRESS", "LOCATION"], language="en")
print_analyzer_results(analyzer_result, text=text)

Result 0:
 type: EMAIL_ADDRESS, start: 76, end: 98, score: 1.0, text: prababardwaj@gmail.com
Result 1:
 type: PERSON, start: 11, end: 17, score: 0.85, text: Prabha
Result 2:
 type: LOCATION, start: 28, end: 37, score: 0.85, text: Singapore
Result 3:
 type: SG_PHONE_NUMBER, start: 54, end: 62, score: 0.5, text: 91125393
Result 4:
 type: ID, start: 54, end: 60, score: 0.5, text: 911253
Result 5:
 type: SG_PHONE_NUMBER, start: 109, end: 115, score: 0.5, text: 123456
Result 6:
 type: ID, start: 109, end: 115, score: 0.5, text: 123456


In [None]:
result

[type: EMAIL_ADDRESS, start: 76, end: 98, score: 1.0,
 type: PERSON, start: 11, end: 17, score: 0.85,
 type: LOCATION, start: 28, end: 37, score: 0.85,
 type: SG_PHONE_NUMBER, start: 54, end: 62, score: 0.5,
 type: ID, start: 54, end: 60, score: 0.5,
 type: SG_PHONE_NUMBER, start: 109, end: 115, score: 0.5,
 type: ID, start: 109, end: 115, score: 0.5]

> Now ID also added as PII

##3.Anonymize PII Data

In [None]:
anonymizer = AnonymizerEngine()

anonymized_result = anonymizer.anonymize(
    text = text,
    analyzer_results=result
)

print(f"text: {anonymized_result}")

text: text: My name is <PERSON> I born in <LOCATION>, my phone no is <SG_PHONE_NUMBER> and email id <EMAIL_ADDRESS> myy id is <ID>
items:
[
    {'start': 114, 'end': 118, 'entity_type': 'ID', 'text': '<ID>', 'operator': 'replace'},
    {'start': 88, 'end': 103, 'entity_type': 'EMAIL_ADDRESS', 'text': '<EMAIL_ADDRESS>', 'operator': 'replace'},
    {'start': 57, 'end': 74, 'entity_type': 'SG_PHONE_NUMBER', 'text': '<SG_PHONE_NUMBER>', 'operator': 'replace'},
    {'start': 30, 'end': 40, 'entity_type': 'LOCATION', 'text': '<LOCATION>', 'operator': 'replace'},
    {'start': 11, 'end': 19, 'entity_type': 'PERSON', 'text': '<PERSON>', 'operator': 'replace'}
]



##3.Custom Anonymize PII Data
- Mask: Replaces the PII with a sequence of a given character.

  - Parameters:

    - chars_to_mask: The amount of characters out of the PII that should be replaced.
    - masking_char: The character to be replaced with.
    - from_end: Whether to mask the PII from it's end.

In [None]:
from presidio_anonymizer.entities import OperatorConfig


operator = {"SG_PHONE_NUMBER": OperatorConfig("mask", {"type":"mask","masking_char":"*" ,"chars_to_mask":9 ,"from_end":True})
          ,"DEFAULT": OperatorConfig("replace",{"new_value":"<ANONYMIZED>"})
           }

anonymized_result = anonymizer.anonymize(
    text = text,
    analyzer_results=analyzer_result,
    operators= operator
)

print(f"text: {anonymized_result}")

text: text: My name is <ANONYMIZED> I born in <ANONYMIZED>, my phone no is ******** and email id <ANONYMIZED> myy id is <ANONYMIZED>
items:
[
    {'start': 108, 'end': 120, 'entity_type': 'ID', 'text': '<ANONYMIZED>', 'operator': 'replace'},
    {'start': 85, 'end': 97, 'entity_type': 'EMAIL_ADDRESS', 'text': '<ANONYMIZED>', 'operator': 'replace'},
    {'start': 63, 'end': 71, 'entity_type': 'SG_PHONE_NUMBER', 'text': '********', 'operator': 'mask'},
    {'start': 34, 'end': 46, 'entity_type': 'LOCATION', 'text': '<ANONYMIZED>', 'operator': 'replace'},
    {'start': 11, 'end': 23, 'entity_type': 'PERSON', 'text': '<ANONYMIZED>', 'operator': 'replace'}
]



##4.Encrpt and Deanonymizer PII data using specific crypto_key

- **How to get unque crypto_key??**

- Presidio deanonymizer currently contains one operator:

- Decrypt: Replace the encrypted text with decrypted text. Uses Advanced Encryption Standard (AES) as the encryption algorithm, also known as Rijndael.
Parameters:
- key - a cryptographic key used for the encryption. The length of the key needs to be of 128, 192 or 256 bits, in a string format.
- Please notice: you can use "DEFAULT" as an operator key to define an operator over all entities.

In [None]:
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import RecognizerResult, OperatorConfig

# Initialize the engine with logger.
engine = AnonymizerEngine()

# Invoke the anonymize function with the text,
# analyzer results (potentially coming from presidio-analyzer) and
# Operators to get the anonymization output:
result = engine.anonymize(
    text="My name is Bond, James Bond",
    analyzer_results=[
        RecognizerResult(entity_type="PERSON", start=11, end=15, score=0.8),
        RecognizerResult(entity_type="PERSON", start=17, end=27, score=0.8),
    ],
    operators={"PERSON": OperatorConfig("replace", {"new_value": "BIP"})},
)

print(result)

text: My name is BIP, BIP
items:
[
    {'start': 16, 'end': 19, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'},
    {'start': 11, 'end': 14, 'entity_type': 'PERSON', 'text': 'BIP', 'operator': 'replace'}
]



### Now encrypt it with encrypt feature

In [None]:
crypto_key = "WmZq4t7w!z%C&F)J"

engine = AnonymizerEngine()

# Invoke the anonymize function with the text,
# analyzer results (potentially coming from presidio-analyzer)
# and an 'encrypt' operator to get an encrypted anonymization output:
anonymize_result = engine.anonymize(
    text="My name is James Bond",
    analyzer_results=[
        RecognizerResult(entity_type="PERSON", start=11, end=21, score=0.8),
    ],
    operators={"PERSON": OperatorConfig("encrypt", {"key": crypto_key})},
)

anonymize_result

text: My name is iA0mK67RyJ64E4Hr7F7zfYgriAw4Smk3mZ42HxWN1/s=
items:
[
    {'start': 11, 'end': 55, 'entity_type': 'PERSON', 'text': 'iA0mK67RyJ64E4Hr7F7zfYgriAw4Smk3mZ42HxWN1/s=', 'operator': 'encrypt'}
]

> This example take the output of the AnonymizerEngine with encrypted PII entities, and decrypt it back to the original text:

In [None]:
from presidio_anonymizer import DeanonymizeEngine
from presidio_anonymizer.entities import OperatorResult, OperatorConfig

# Initialize the engine with logger.
engine = DeanonymizeEngine()

# Invoke the deanonymize function with the text, anonymizer results and
# Operators to define the deanonymization type.
result = engine.deanonymize(
    text="My name is iA0mK67RyJ64E4Hr7F7zfYgriAw4Smk3mZ42HxWN1/s=",
    entities=[
        OperatorResult(start=11, end=55, entity_type="PERSON"),
    ],
    operators={"DEFAULT": OperatorConfig("decrypt", {"key": "WmZq4t7w!z%C&F)J"})},
)

print(result)


text: My name is James Bond
items:
[
    {'start': 11, 'end': 21, 'entity_type': 'PERSON', 'text': 'James Bond', 'operator': 'decrypt'}
]



### Another Example

In [None]:
crypto_key = "WmZq4t7w!z%C&F)J"

engine = AnonymizerEngine()

# Invoke the anonymize function with the text,
# analyzer results (potentially coming from presidio-analyzer)
# and an 'encrypt' operator to get an encrypted anonymization output:
anonymize_result = engine.anonymize(
    text="My name is PRABHA BHARADWAJ",
    analyzer_results=[
        RecognizerResult(entity_type="PERSON", start=11, end=27, score=0.8),
    ],
    operators={"PERSON": OperatorConfig("encrypt", {"key": crypto_key})},
)

anonymize_result

text: My name is FUZgO8VQHXIO0lcnbAO6Hzt+bSqQCViP/WkoIDLaX5ycPHxVPkrr5VSUW4X6Je3b
items:
[
    {'start': 11, 'end': 75, 'entity_type': 'PERSON', 'text': 'FUZgO8VQHXIO0lcnbAO6Hzt+bSqQCViP/WkoIDLaX5ycPHxVPkrr5VSUW4X6Je3b', 'operator': 'encrypt'}
]

In [None]:
# Fetch the anonymized text from the result.
anonymized_text = anonymize_result.text

# Fetch the anonynized entities from the result.
anonymized_entities = anonymize_result.items
anonymized_text, anonymized_entities

('My name is FUZgO8VQHXIO0lcnbAO6Hzt+bSqQCViP/WkoIDLaX5ycPHxVPkrr5VSUW4X6Je3b',
 [{'start': 11, 'end': 75, 'entity_type': 'PERSON', 'text': 'FUZgO8VQHXIO0lcnbAO6Hzt+bSqQCViP/WkoIDLaX5ycPHxVPkrr5VSUW4X6Je3b', 'operator': 'encrypt'}])

#### Presidio Anonymizer: Decrypt

In [None]:
# Initialize the engine:
engine = DeanonymizeEngine()

# Invoke the deanonymize function with the text, anonymizer results
# and a 'decrypt' operator to get the original text as output.
deanonymized_result = engine.deanonymize(
    text=anonymized_text,
    entities=anonymized_entities,
    operators={"DEFAULT": OperatorConfig("decrypt", {"key": crypto_key})},
)

deanonymized_result

text: My name is PRABHA BHARADWAJ
items:
[
    {'start': 11, 'end': 27, 'entity_type': 'PERSON', 'text': 'PRABHA BHARADWAJ', 'operator': 'decrypt'}
]

### Alternatively, call the Decrypt operator directly

In [None]:
from presidio_anonymizer.operators import Decrypt

# Fetch the encrypted entity value from the previous stage
encrypted_entity_value = anonymize_result.items[0].text

# Restore the original entity value
Decrypt().operate(text=encrypted_entity_value, params={"key": crypto_key})

'PRABHA BHARADWAJ'