In [25]:
from transformers import pipeline

In [26]:
# Load a pre-trained model specifically fine-tuned for NER tasks
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [27]:
# a function to analyze the text for named entities
def analyze_text(text):
    """
    This function takes a text input, analyzes it for named entities,
    and displays images for any recognized persons.
    """
    entities = ner_pipeline(text)  # Use the NER pipeline to process the input text

    recognized_entities = {}  # Initialize a dictionary to hold recognized person entities

    for entity in entities:
        name = entity['word']  # The recognized entity (e.g., a person's name)
        label = entity['entity_group']  # The type of entity (e.g., 'PER' for person)
        score = entity['score']  # Confidence score for the recognized entity

        if label == 'PER':  # Only process entities labeled as 'PER' (persons)
            recognized_entities[name] = score  # Add recognized name and score to the dictionary
            print(f"Entity: {name}, Label: {label}, Score: {score:.2f}")  # Print entity details


In [28]:
text = "Apple is looking at buying U.K. startup for $1 billion. The CEO of Microsoft, Satya Nadella, visited Apple."
analyze_text(text)  # Call the function with the example text

Entity: Satya Nadella, Label: PER, Score: 1.00


In [29]:
text = "Martin Scorsese is known for directing classic films such as Goodfellas and The Irishman. His unique storytelling style and ability to portray complex characters have earned him numerous accolades, including Academy Awards. Quentin Tarantino, another acclaimed director, is famous for films like Pulp Fiction and Inglourious Basterds."
analyze_text(text)

Entity: Martin Scorsese, Label: PER, Score: 0.99
Entity: Quentin Tarantino, Label: PER, Score: 1.00


1. dbmdz/bert-large-cased-finetuned-conll03-english: Fine-tuned on the CoNLL-03 dataset.
2. dbmdz/bert-base-cased-finetuned-conll03-english: A smaller version of the above model.
3. huggingface/transformers: A generic model that supports multiple tasks, including NER.

In [32]:
from transformers import pipeline

# Load the pre-trained NER model
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")

def recognize_entities(text):
    """
    Function to recognize entities in the provided text.

    Parameters:
    text (str): The input text for entity recognition.

    Returns:
    list: A list of recognized entities with their labels and scores.
    """
    entities = ner_pipeline(text)
    return [(entity['word'], entity['entity'], entity['score']) for entity in entities]  # Updated key here

# Example usage
text_input = "Barack Obama was born in Hawaii. He was elected president in 2008."
results = recognize_entities(text_input)

# Display results
for entity, label, score in results:
    print(f"Entity: {entity}, Label: {label}, Score: {score:.2f}")


Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Entity: Barack, Label: B-PER, Score: 1.00
Entity: Obama, Label: I-PER, Score: 1.00
Entity: Hawaii, Label: B-LOC, Score: 1.00
