In [2]:
# download presidio
! pip install presidio_analyzer presidio_anonymizer
! python -m spacy download en_core_web_lg

Collecting presidio_analyzer
  Downloading presidio_analyzer-2.2.353-py3-none-any.whl.metadata (2.6 kB)
Collecting presidio_anonymizer
  Downloading presidio_anonymizer-2.2.353-py3-none-any.whl.metadata (8.0 kB)
Collecting spacy<4.0.0,>=3.4.4 (from presidio_analyzer)
  Downloading spacy-3.7.4-cp310-cp310-win_amd64.whl.metadata (27 kB)
Collecting tldextract (from presidio_analyzer)
  Downloading tldextract-5.1.1-py3-none-any.whl.metadata (11 kB)
Collecting phonenumbers<9.0.0,>=8.12 (from presidio_analyzer)
  Downloading phonenumbers-8.13.30-py2.py3-none-any.whl.metadata (11 kB)
Collecting pycryptodome>=3.10.1 (from presidio_anonymizer)
  Downloading pycryptodome-3.20.0-cp35-abi3-win_amd64.whl.metadata (3.4 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy<4.0.0,>=3.4.4->presidio_analyzer)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl (29 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy<4.0.0,>=3.4.4->presidio_analyzer)
  Downloading spacy_loggers-1.0.5-py3-none-any.w

In [3]:
import json
from pprint import pprint
import logging
from presidio_analyzer import AnalyzerEngine, PatternRecognizer
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig

In [10]:
def analyze_and_anonymize(text_to_anonymize):
    # Analyzer engine setup
    analyzer = AnalyzerEngine()
    analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["PHONE_NUMBER"], language='en')
    print(f'Analyzer results: {analyzer_results}')

    # Add custom recognizers
    titles_recognizer = PatternRecognizer(supported_entity="TITLE", deny_list=["Mr.", "Mrs.", "Miss"])
    pronoun_recognizer = PatternRecognizer(supported_entity="PRONOUN", deny_list=["he", "He", "his", "His", "she", "She", "hers", "Hers"])
    analyzer.registry.add_recognizer(titles_recognizer)
    analyzer.registry.add_recognizer(pronoun_recognizer)

    # Analyze text
    analyzer_results = analyzer.analyze(text=text_to_anonymize, entities=["TITLE", "PRONOUN"], language="en")
    print(f'Analyzer results with custom recognizers: {analyzer_results}')
    
    analyzer_results = analyzer.analyze(text=text_to_anonymize, language='en')


    # Anonymize text
    anonymizer = AnonymizerEngine()
    anonymized_results = anonymizer.anonymize(
        text=text_to_anonymize,
        analyzer_results=analyzer_results,    
        operators={
            "DEFAULT": OperatorConfig("replace", {"new_value": "**********"}), 
            "PHONE_NUMBER": OperatorConfig("mask", {"type": "mask", "masking_char" : "*", "chars_to_mask" : 12, "from_end" : True}),
            "TITLE": OperatorConfig("redact", {})
        }
    )

    print(f'Text after anonymization: {anonymized_results.text}')
    print("Detailed response:")
    pprint(json.loads(anonymized_results.to_json()))

In [13]:
if __name__ == "__main__":
    text_to_anonymize = "Hello, Mr.Prashant Khoragade, Thank you ! your contact number is 8889994470 and email id is pras778@gmail.com"
    analyze_and_anonymize(text_to_anonymize)

Analyzer results: [type: PHONE_NUMBER, start: 65, end: 75, score: 0.75]
Analyzer results with custom recognizers: []
Text after anonymization: Hello, ********************, Thank you ! your contact number is ********** and email id is **********
Detailed response:
{'items': [{'end': 101,
            'entity_type': 'EMAIL_ADDRESS',
            'operator': 'replace',
            'start': 91,
            'text': '**********'},
           {'end': 74,
            'entity_type': 'PHONE_NUMBER',
            'operator': 'mask',
            'start': 64,
            'text': '**********'},
           {'end': 27,
            'entity_type': 'PERSON',
            'operator': 'replace',
            'start': 17,
            'text': '**********'},
           {'end': 17,
            'entity_type': 'URL',
            'operator': 'replace',
            'start': 7,
            'text': '**********'}],
 'text': 'Hello, ********************, Thank you ! your contact number is '
         '********** and email i

In [14]:
from flask import Flask, request, jsonify


In [15]:
app = Flask(__name__)


@app.route('/analyze_and_anonymize', methods=['POST'])
def analyze_and_anonymize_route():
    data = request.json
    text_to_anonymize = data.get('text_to_anonymize')

    if text_to_anonymize:
        anonymized_text = analyze_and_anonymize(text_to_anonymize)
        return jsonify({'anonymized_text': anonymized_text}), 200
    else:
        return jsonify({'error': 'Text to anonymize is missing in the request.'}), 400


In [19]:
if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1