# **NERC:**

In [1]:
import pandas as pd
import spacy
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

nlp = spacy.load('en_core_web_sm')

df_sentences = pd.read_csv('sentiment-topic-test.tsv', sep='\t', dtype={'sentence id': str})
df_annotations = pd.read_csv('NER-test.tsv',
    sep='\t',
    header=None,
    names=['sentence_id', 'token_id', 'token', 'BIO_NER_tag'],
    skiprows=1,
    dtype={'sentence_id': str, 'token': str, 'BIO_NER_tag': str}
)

def assign_bio_tags(doc):
    bio_tags = ['O'] * len(doc)
    for ent in doc.ents:
        bio_tags[ent.start] = f"B-{ent.label_}"
        for i in range(ent.start + 1, ent.end):
            bio_tags[i] = f"I-{ent.label_}"
    return bio_tags

df_sentences['spacy_bio_tags'] = df_sentences['text'].apply(lambda x: assign_bio_tags(nlp(x)))



In [2]:
predicted_tags_flat = [tag for sublist in df_sentences['spacy_bio_tags'] for tag in sublist]
annotated_tags_flat = df_annotations['BIO_NER_tag'].tolist()
length = min(len(predicted_tags_flat), len(annotated_tags_flat))
predicted_tags_flat = predicted_tags_flat[:length]
annotated_tags_flat = annotated_tags_flat[:length]
le = LabelEncoder()

le.fit(predicted_tags_flat + annotated_tags_flat)
encoded_predicted = le.transform(predicted_tags_flat)
encoded_annotated = le.transform(annotated_tags_flat)
print(classification_report(encoded_annotated, encoded_predicted, target_names=le.classes_, zero_division=0))

               precision    recall  f1-score   support

       B-DATE       1.00      1.00      1.00         1
        B-GPE       0.00      0.00      0.00         0
        B-LAW       0.00      0.00      0.00         0
        B-ORG       1.00      0.67      0.80         3
     B-PERSON       1.00      1.00      1.00         6
B-WORK_OF_ART       1.00      0.50      0.67         4
       I-DATE       0.25      1.00      0.40         1
        I-LAW       0.00      0.00      0.00         0
        I-ORG       1.00      0.33      0.50         6
     I-PERSON       1.00      1.00      1.00         3
I-WORK_OF_ART       1.00      0.67      0.80         9
            O       0.96      0.98      0.97       160

     accuracy                           0.93       193
    macro avg       0.68      0.60      0.59       193
 weighted avg       0.96      0.93      0.94       193

