In [1]:
!pip install scikit-learn
!pip install -U spacy



In [3]:
# Imports

import spacy
from spacy import displacy
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

In [4]:
!python -m spacy download en_core_web_md
nlp = spacy.load("en_core_web_md")

Collecting en-core-web-md==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl (33.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.5/33.5 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [5]:
# Text

news_text = """
The Indian Space Research Organisation (ISRO) is set to launch the Chandrayaan-4 mission from Sriharikota in 2026.
Prime Minister Narendra Modi announced a budget of ₹2,000 crore for the project during a summit in New Delhi.
Meanwhile, Tata Consultancy Services and Infosys are expanding their AI research centers in Bengaluru and Hyderabad
to compete with global giants like Google and Microsoft.
"""

# Process the text
doc = nlp(news_text)

# Visualize the entities in the notebook
displacy.render(doc, style="ent", jupyter=True)

In [6]:
# Evaluation

expected_entities = [
    ("Indian Space Research Organisation", "ORG"),
    ("ISRO", "ORG"),
    ("Sriharikota", "GPE"),
    ("2026", "DATE"),
    ("Narendra Modi", "PERSON"),
    ("New Delhi", "GPE"),
    ("Tata Consultancy Services", "ORG"),
    ("Infosys", "ORG"),
    ("Bengaluru", "GPE"),
    ("Hyderabad", "GPE"),
    ("Google", "ORG"),
    ("Microsoft", "ORG")
]

# Extract Predicted Entities
predicted_entities = [(ent.text, ent.label_) for ent in doc.ents]

# Calculation Logic
y_true = [1] * len(expected_entities)
y_pred = []

for text, label in expected_entities:
    # Check if the expected entity exists in the predicted list
    if any(text in p_text and label == p_label for p_text, p_label in predicted_entities):
        y_pred.append(1)
    else:
        y_pred.append(0)

In [8]:
# Metrics

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

metrics_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1-Score"],
    "Value": [f"{accuracy:.2%}", f"{precision:.2%}", f"{recall:.2%}", f"{f1:.2%}"]
})

print("\n--- NER System Performance Report ---")
print(metrics_df)


--- NER System Performance Report ---
      Metric    Value
0   Accuracy   75.00%
1  Precision  100.00%
2     Recall   75.00%
3   F1-Score   85.71%
