In [36]:
import json
import spacy
import pandas as pd
from collections import Counter

In [37]:
# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

In [38]:
# Define relevant entity tags
relevant_entities = ["PERSON", "NORP", "ORG", "GPE", "LOC", "DATE", "MONEY"]

In [40]:
### PART 1: COUNT TAGS IN MANUAL JSON FILE ###
manual_json_file = "annotations.json"  # Update with actual file path
with open(manual_json_file, "r", encoding="utf-8") as f:
    manual_data = json.load(f)

# Extract annotations from manual JSON
annotations = manual_data.get("annotations", [])
manual_entity_counter = Counter()
manual_entities = []  # List to store entities from manual annotations

# Count manual entities and store for comparison
for idx, (text, data) in enumerate(annotations):
    entities = data.get("entities", [])
    for start, end, label in entities:
        if label in relevant_entities:
            manual_entities.append((idx, start, end, label))
            manual_entity_counter[label] += 1

# Convert manual counts to DataFrame for display
manual_df = pd.DataFrame(manual_entity_counter.items(), columns=["Tag", "Count"])
manual_df = manual_df.sort_values(by="Tag")

print("\nManual Annotation Counts:")
print(manual_df)


Manual Annotation Counts:
      Tag  Count
0    DATE    140
2     GPE    200
6     LOC     64
3   MONEY     25
4    NORP     96
1     ORG    102
5  PERSON     96


In [41]:
### PART 2: SPACY ANNOTATION ###
tweets_text_file = "Sentences.txt"  # Update with actual file path
with open(tweets_text_file, "r", encoding="utf-8") as f:
    lines = f.readlines()[:300]  # Take only first 300 tweets

spacy_entity_counter = Counter()
spacy_entities = []  # List to store entities detected by SpaCy

# Collect SpaCy entities and count
for idx, sentence in enumerate(lines):
    doc = nlp(sentence.strip())  # Process with SpaCy
    for ent in doc.ents:
        if ent.label_ in relevant_entities:
            spacy_entities.append((idx, ent.start_char, ent.end_char, ent.label_))
            spacy_entity_counter[ent.label_] += 1

# Convert SpaCy counts to DataFrame for display
spacy_df = pd.DataFrame(spacy_entity_counter.items(), columns=["Tag", "Count"])
spacy_df = spacy_df.sort_values(by="Tag")

print("\nSpaCy Annotation Counts:")
print(spacy_df)


SpaCy Annotation Counts:
      Tag  Count
0    DATE    172
2     GPE    215
6     LOC     33
3   MONEY     17
4    NORP    105
1     ORG    113
5  PERSON     99


In [47]:
### PART 3: ALIGN AND EVALUATE ###
def match_entities(manual_entities, spacy_entities):
    tp = Counter()
    fp = Counter()
    fn = Counter()

    matched_spacy = set()

    # True Positives (TP) and False Negatives (FN)
    for m_idx, m_start, m_end, m_label in manual_entities:
        match_found = False
        for s_idx, s_start, s_end, s_label in spacy_entities:
            if (
                m_idx == s_idx
                and m_label == s_label
                and not (s_end <= m_start or s_start >= m_end)  # Overlapping entities
            ):
                tp[m_label] += 1
                matched_spacy.add((s_idx, s_start, s_end, s_label))
                match_found = True
                break
        if not match_found:
            fn[m_label] += 1

    # False Positives (FP)
    for s_idx, s_start, s_end, s_label in spacy_entities:
        if (s_idx, s_start, s_end, s_label) not in matched_spacy:
            fp[s_label] += 1

    return tp, fp, fn

# Compute metrics
tp, fp, fn = match_entities(manual_entities, spacy_entities)

### PART 4: GENERATE CLASSIFICATION REPORT ###
evaluation_data = []
for tag in relevant_entities:
    # Calculate Precision, Recall, and F1 Score
    precision = tp[tag] / (tp[tag] + fp[tag]) if (tp[tag] + fp[tag]) > 0 else 0
    recall = tp[tag] / (tp[tag] + fn[tag]) if (tp[tag] + fn[tag]) > 0 else 0
    f1_score = (
        2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    )

    # Add data for each tag
    evaluation_data.append({
        "Tag": tag,
        "TP": tp[tag],
        "FP": fp[tag],
        "FN": fn[tag],
        "Precision": round(precision, 3),
        "Recall": round(recall, 3),
        "F1 Score": round(f1_score, 3),
    })

# Convert evaluation results to DataFrame
evaluation_df = pd.DataFrame(evaluation_data)

print("\nClassification Report:")
print(evaluation_df)


Classification Report:
      Tag   TP  FP  FN  Precision  Recall  F1 Score
0  PERSON   81  18  15      0.818   0.844     0.831
1    NORP   85  20  11      0.810   0.885     0.846
2     ORG   81  32  21      0.717   0.794     0.753
3     GPE  177  38  23      0.823   0.885     0.853
4     LOC   26   7  38      0.788   0.406     0.536
5    DATE  137  36   3      0.792   0.979     0.875
6   MONEY   16   1   9      0.941   0.640     0.762


<h3>Performance analysis of SpaCy’s NER model:</h3><ol>
    <li> Precision: A Precision score is high, it indicates that SpaCy’s model predicts few false positives.
What if Precision is low, it means SpaCy is incorrectly labeling entities where there are none.
“MONEY” tag has high Precision. So, there are very less False Positives predicted by SpaCy.
</li>
    <li>Recall: A high recall score indicates SpaCy was able to correctly identify most of entities
present in the manually annotated data. A low recall indicates that SpaCy missed many entities.
“LOC” tag has low Recall. So, the SpaCy did not predict many LOC entities.
</li>
<li>F1 Score: It reflects how well SpaCy’s model is performing in terms of both recognizing
entities correctly and covering all of them.

</li>
    
</ol>

<h4>My SpaCy model well performed on annotating “DATE” tag which has high F1 Score.</h4>

F1 Score.