In [None]:
import spacy

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
import re

In [None]:
PII_PATTERNS = {
    "EMAIL": r"\b[\w\.-]+@[\w\.-]+\.\w{2,4}\b",
    "PHONE": r"\b(?:\+1[-.\s]?|1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b"
}

NER_LABELS = ["PERSON", "GPE", "ORG", "DATE"]

In [None]:
def mask_pii(text):

  pii_spans = []
  for label, pattern in PII_PATTERNS.items():
    for match in re.finditer(pattern, text):
        pii_spans.append((match.start(), match.end(), label))


  doc = nlp(text)

  for ent in doc.ents:
      if ent.label_ in NER_LABELS:
          pii_spans.append((ent.start_char, ent.end_char, ent.label_))

  pii_spans.sort()

  masked_text = ""
  last_idx = 0

  for start, end, label in pii_spans:
      # Önceki parçayı ekle
      masked_text += text[last_idx:start]
      # Maske etiketi ekle
      masked_text += f"[{label}]"
      # İmleci ileri taşı
      last_idx = end

  masked_text += text[last_idx:]

  return masked_text

In [None]:
text = """
Hi, I'm Sarah Johnson and I work at Microsoft in Seattle.
I was born in 1988 and graduated from Stanford University.
You can reach me at sarah.johnson@example.com or call me at +1 555 123 4567.
"""

In [None]:
print("🔒 Masked Text:\n")
print(mask_pii(text))

🔒 Masked Text:


Hi, I'm [PERSON] and I work at [ORG] in [GPE]. 
I was born in [DATE] and graduated from [ORG].
You can reach me at [EMAIL] or call me at +[PHONE].

