In [1]:
# %pip install "presidio_analyzer[transformers]"
from presidio_analyzer import AnalyzerEngine, RecognizerRegistry
from presidio_analyzer.nlp_engine import NlpEngineProvider
from presidio_anonymizer import AnonymizerEngine
from transformers import AutoTokenizer, AutoModelForTokenClassification
from huggingface_hub import snapshot_download

# Download and load transformer model
transformer_model = "obi/deid_roberta_i2b2"
snapshot_download(repo_id=transformer_model)
AutoTokenizer.from_pretrained(transformer_model)
AutoModelForTokenClassification.from_pretrained(transformer_model)

# Configuration file path
conf_file_path = "Config/sample.yaml"

# Create NLP engine based on configuration
provider = NlpEngineProvider(conf_file=conf_file_path)
nlp_engine = provider.create_engine()

# Initialize AnalyzerEngine with the created NLP engine
analyzer = AnalyzerEngine(
    nlp_engine=nlp_engine,
    supported_languages=["en", "es"]
)

# Example usage
text = "John Doe visited the hospital on January 3rd, 2020."
results = analyzer.analyze(text=text, language="en")

for result in results:
    print(f"Entity: {result.entity_type}, Text: {text[result.start:result.end]}, Start: {result.start}, End: {result.end}")


Note: you may need to restart the kernel to use updated packages.


  from .autonotebook import tqdm as notebook_tqdm
Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 98560.67it/s]


Entity: DATE_TIME, Text: January 3rd,, Start: 33, End: 45
Entity: DATE_TIME, Text: 2020, Start: 46, End: 50
Entity: PERSON, Text: Doe, Start: 5, End: 8
Entity: PERSON, Text: John, Start: 0, End: 4
