In [1]:
import spacy
from spacy.pipeline import EntityRecognizer
from spacy.tokens import Doc, Span
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline

# Load your saved model
model_name = "bert-large-mp-local"  # Directory where your model is saved
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Create an NER pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

# Define a function to process text with the Hugging Face model
def hf_ner_pipe(doc):
    text = doc.text
    entities = ner_pipeline(text)

    ents = []
    for ent in entities:
        start_char, end_char, label = ent["start"], ent["end"], ent["entity_group"]

        # Convert character-based indices to token indices
        start_token = len(tokenizer.encode(text[:start_char])) - 1
        end_token = len(tokenizer.encode(text[:end_char])) - 1

        span = Span(doc, start_token, end_token + 1, label=label)
        ents.append(span)

    doc.ents = ents  # Assign extracted entities to the doc
    return doc

# Create a blank spaCy pipeline
nlp = spacy.blank("en")

# Add custom NER component
nlp.add_pipe(hf_ner_pipe, name="hf_ner", first=True)

# Test it
text = "Elon Musk is the CEO of SpaceX."
doc = nlp(text)

# Print extracted entities
for ent in doc.ents:
    print(ent.text, ent.label_)
