In [None]:
from transformers import BertTokenizer, BertForTokenClassification
from transformers import pipeline
import torch

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [None]:
def get_named_entities(text):
    """
    Function to perform Named Entity Recognition (NER) on a given input text using a pre-trained BERT model.
    """
    # Tokenize the input text and get token IDs
    inputs = tokenizer(text, return_tensors="pt")  # 'pt' means PyTorch tensor format

    # Forward pass through the model to get predictions
    outputs = model(**inputs).logits

    # Get predicted class indices for each token
    predictions = torch.argmax(outputs, dim=2)

    # Map predictions to token labels
    labels = [model.config.id2label[prediction.item()] for prediction in predictions[0]]

    # Decode tokens from input text
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])

    # Return list of tokens and corresponding labels (NER classes)
    return list(zip(tokens, labels))

In [None]:
text = "John Doe works at OpenAI and lives in San Francisco."

In [None]:
entities = get_named_entities(text)

# Print the tokens and their corresponding named entity labels
print("\nNamed Entities in the input text:")
for token, label in entities:
    print(f"{token}: {label}")


In [None]:
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

# Use the pipeline on the same sample text
print("\nNER Pipeline Output:")
for entity in ner_pipeline(text):
    print(f"{entity['word']} -> {entity['entity']} (Score: {entity['score']:.2f})")