# Named Entity Recognition (NER) System

This notebook builds a Named Entity Recognition (NER) system using spaCy and evaluates it using Accuracy, Precision, Recall, and F1-score.

In [1]:

# Install required libraries (run once)
!pip install spacy scikit-learn pandas
!python -m spacy download en_core_web_sm


Collecting spacy
  Downloading spacy-3.8.11-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Downloading murmurhash-1.0.15-cp312-cp312-win_amd64.whl.metadata (2.3 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Downloading cymem-2.0.13-cp312-cp312-win_amd64.whl.metadata (9.9 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Downloading preshed-3.0.12-cp312-cp312-win_amd64.whl.metadata (2.6 kB)
Collecting thinc<8.4.0,>=8.3.4 (from spacy)
  Downloading thinc-8.3.10-cp312-cp312-win_amd64.whl.metadata (15 kB)
Collecting wasabi<1.2.0,>=0.9.1 (from spacy)
  Downloading wasabi-1.1.3-py3-none-any.whl.metadata (28 kB)
Collecting srsly<3.0.0,>=2.4.3 (from spacy)
  Downloading srsly-2.5.2-cp312-cp312-win_am


[notice] A new release of pip is available: 25.2 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip



   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ------------------------------------- -- 15/16 [spacy]
   ----------


[notice] A new release of pip is available: 25.2 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:

import spacy
import pandas as pd
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support


In [3]:

# Load spaCy NER model
nlp = spacy.load("en_core_web_sm")


In [4]:

# Real-world text samples (news & social media)
texts = [
    "Elon Musk is the CEO of Tesla and SpaceX.",
    "Google was founded in California.",
    "Narendra Modi visited the United States.",
    "Apple launched the new iPhone in India.",
    "Microsoft acquired LinkedIn for $26 billion."
]


In [5]:

# Ground truth entities (manually labeled for evaluation)
true_entities = [
    [("Elon Musk", "PERSON"), ("Tesla", "ORG"), ("SpaceX", "ORG")],
    [("Google", "ORG"), ("California", "GPE")],
    [("Narendra Modi", "PERSON"), ("United States", "GPE")],
    [("Apple", "ORG"), ("India", "GPE")],
    [("Microsoft", "ORG"), ("LinkedIn", "ORG")]
]


In [6]:

# Function to extract entities using spaCy
def extract_entities(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]


In [7]:

# Generate predictions
predicted_entities = [extract_entities(text) for text in texts]

for t, p in zip(texts, predicted_entities):
    print("\nText:", t)
    print("Predicted Entities:", p)



Text: Elon Musk is the CEO of Tesla and SpaceX.
Predicted Entities: [('Elon Musk', 'PERSON'), ('Tesla', 'ORG')]

Text: Google was founded in California.
Predicted Entities: [('Google', 'ORG'), ('California', 'GPE')]

Text: Narendra Modi visited the United States.
Predicted Entities: [('Narendra Modi', 'PERSON'), ('the United States', 'GPE')]

Text: Apple launched the new iPhone in India.
Predicted Entities: [('Apple', 'ORG'), ('India', 'GPE')]

Text: Microsoft acquired LinkedIn for $26 billion.
Predicted Entities: [('Microsoft', 'ORG'), ('LinkedIn', 'ORG'), ('$26 billion', 'MONEY')]


In [8]:

# Prepare labels for evaluation
true_labels = []
pred_labels = []

for true, pred in zip(true_entities, predicted_entities):
    true_dict = dict(true)
    pred_dict = dict(pred)
    
    for ent, label in true_dict.items():
        true_labels.append(label)
        pred_labels.append(pred_dict.get(ent, "O"))  # O = Not detected


In [9]:

# Evaluation metrics
accuracy = accuracy_score(true_labels, pred_labels)

precision, recall, f1, _ = precision_recall_fscore_support(
    true_labels, pred_labels, average='weighted'
)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.8181818181818182
Precision: 1.0
Recall: 0.8181818181818182
F1-score: 0.8958677685950414


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:

# Detailed classification report
print(classification_report(true_labels, pred_labels))


              precision    recall  f1-score   support

         GPE       1.00      0.67      0.80         3
           O       0.00      0.00      0.00         0
         ORG       1.00      0.83      0.91         6
      PERSON       1.00      1.00      1.00         2

    accuracy                           0.82        11
   macro avg       0.75      0.62      0.68        11
weighted avg       1.00      0.82      0.90        11



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
