In [None]:
import spacy
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
from spacy.tokens import Doc, Span

# Load model and tokenizer
model_name = "../bert-large-mp-local"  # Your saved model directory
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Create NER pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")



In [None]:
# Define spaCy component
def hf_ner_pipe(doc):
    text = doc.text
    entities = ner_pipeline(text)

    ents = []
    for ent in entities:
        start, end, label = ent["start"], ent["end"], ent["entity_group"]
        span = Span(doc, len(doc[:start]), len(doc[:end]), label=label)
        ents.append(span)

    doc.ents = ents
    return doc

# Load spaCy blank model
nlp = spacy.blank("en")  # You can change "en" to your desired language

# Add custom pipeline
nlp.add_pipe("hf_ner", first=True, factory=lambda nlp, name: hf_ner_pipe)

# Test it
text = "Elon Musk is the CEO of SpaceX."
doc = nlp(text)

# Print entities
for ent in doc.ents:
    print(ent.text, ent.label_)
