In [2]:
import spacy
from spacy.tokens import Token

nlp = spacy.load("en_core_web_sm")

def resolve_token_type(token: Token) -> str:
    if token.like_num:
        return "[NUMBER]"
    if token.ent_type_:
        return f"[{token.ent_type_}]"
    return "[O]"

def mask_ner_and_numbers(text: str) -> str:
    doc = nlp(text)
    masked_tokens: list[str] = []
    for token in doc:
        if token.ent_type_ in {"PERSON", "ORG", "GPE", "LOC", "DATE", "TIME", "MONEY", "QUANTITY", "PERCENT"} or token.like_num:
            # Avoid duplicate [REDACTED] for consecutive tokens of the same entity
            if (masked_tokens and masked_tokens[-1] == f"[{token.ent_type_}]"):
                continue
            masked_tokens.append(resolve_token_type(token))
        else:
            masked_tokens.append(token.text)
    return " ".join(masked_tokens)

# Example usage
text = "Look up all existing orders in NA region"

masked_text = mask_ner_and_numbers(text)
print(masked_text)

Look up all existing orders in [GPE] region
