In [1]:
import os
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# This example requires environment variables named "LANGUAGE_KEY" and "LANGUAGE_ENDPOINT"
language_endpoint = os.getenv("CONTENT_SAFETY_ENDPOINT")
language_key = os.getenv("CONTENT_SAFETY_KEY")

함수 정의

In [3]:
# Authenticate the client using your key and endpoint 
def authenticate_client():
    ta_credential = AzureKeyCredential(language_key)
    text_analytics_client = TextAnalyticsClient(
            endpoint=language_endpoint, 
            credential=ta_credential)
    return text_analytics_client

# Example method for detecting sensitive information (PII) from text 
def pii_recognition_example(client, documents):
    response = client.recognize_pii_entities(documents, language="en")
    result = [doc for doc in response if not doc.is_error]
    for doc in result:
        print("Redacted Text: {}".format(doc.redacted_text))
        for entity in doc.entities:
            print("Entity: {}".format(entity.text))
            print("\tCategory: {}".format(entity.category))
            print("\tConfidence Score: {}".format(entity.confidence_score))
            print("\tOffset: {}".format(entity.offset))
            print("\tLength: {}".format(entity.length))

실행

In [4]:
client = authenticate_client()
documents = [
    "The employee's SSN is 859-98-0987.",
    "The employee's phone number is 555-555-5555.",
    "고객 이름은 김민수이고 전화번호는 010-1234-5678입니다."
]
pii_recognition_example(client, documents)

Redacted Text: The ********'s SSN is ***********.
Entity: employee
	Category: PersonType
	Confidence Score: 0.97
	Offset: 4
	Length: 8
Entity: 859-98-0987
	Category: USSocialSecurityNumber
	Confidence Score: 0.85
	Offset: 22
	Length: 11
Redacted Text: The ********'s phone number is ************.
Entity: employee
	Category: PersonType
	Confidence Score: 0.98
	Offset: 4
	Length: 8
Entity: 555-555-5555
	Category: PhoneNumber
	Confidence Score: 0.8
	Offset: 31
	Length: 12
Redacted Text: ** 이름은 ***이고 전화번호는 010-1234-5678입니다.
Entity: 고객
	Category: PersonType
	Confidence Score: 0.85
	Offset: 0
	Length: 2
Entity: 김민수
	Category: Person
	Confidence Score: 1.0
	Offset: 7
	Length: 3
