In [1]:
import pandas as pd
from simpletransformers.ner import NERModel



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
file_path = 'NER-test.tsv'
df = pd.read_csv(file_path, sep='\t', quoting=3)

In [18]:
sentences_df = df.groupby('sentence_id')['token'].apply(list).reset_index()
test_sentences = [" ".join(tokens) for tokens in sentences_df['token']]

for i, sentence in enumerate(test_sentences):
    print(f"{i}: {sentence}")

0: If you're visiting Paris , make sure to see the Louvre , as they exhibit the Mona Lisa !
1: Amazon , Google and Meta control a huge share of the technology market globally .
2: Did you hear Pharoah Sanders recorded an album with Floating Points ?
3: Madvillainy is still my favourite MF DOOM record .
4: My friend Kevin just finished watching Succession , and won't stop talking about Kieran Culkin 's performance .
5: Venus Williams has always been overshadowed by her sister .
6: Since Queen Elizabeth died , King Charles has been the head of the British Royal Family .
7: I stayed up all night playing Dark Souls again .
8: Speaking of great movies - do you remember Once Upon a Time in America ?
9: Michael Jordan is considered one of the best players in the history of the NBA .
10: They used to call it New Amsterdam before they called it New York .
11: MMA is not my favourite , but seeing John Cena is not something you get to do every day .
12: OK Computer is supposed to be one of the de

In [4]:
englishmodel = NERModel(
    model_type="bert",
    model_name="dslim/bert-base-NER",
    use_cuda=False
)

predictions, raw_outputs = englishmodel.predict(test_sentences)
predictions

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1/1 [00:05<00:00,  5.29s/it]
Running Prediction: 100%|██████████| 1/1 [00:02<00:00,  2.41s/it]


[[{'If': 'O'},
  {"you're": 'O'},
  {'visiting': 'O'},
  {'Paris': 'B-LOC'},
  {',': 'O'},
  {'make': 'O'},
  {'sure': 'O'},
  {'to': 'O'},
  {'see': 'O'},
  {'the': 'O'},
  {'Louvre': 'B-ORG'},
  {',': 'O'},
  {'as': 'O'},
  {'they': 'O'},
  {'exhibit': 'O'},
  {'the': 'O'},
  {'Mona': 'B-MISC'},
  {'Lisa': 'I-MISC'},
  {'!': 'O'}],
 [{'Amazon': 'B-ORG'},
  {',': 'O'},
  {'Google': 'B-ORG'},
  {'and': 'O'},
  {'Meta': 'B-ORG'},
  {'control': 'O'},
  {'a': 'O'},
  {'huge': 'O'},
  {'share': 'O'},
  {'of': 'O'},
  {'the': 'O'},
  {'technology': 'O'},
  {'market': 'O'},
  {'globally': 'O'},
  {'.': 'O'}],
 [{'Did': 'O'},
  {'you': 'O'},
  {'hear': 'O'},
  {'Pharoah': 'B-PER'},
  {'Sanders': 'I-PER'},
  {'recorded': 'O'},
  {'an': 'O'},
  {'album': 'O'},
  {'with': 'O'},
  {'Floating': 'B-ORG'},
  {'Points': 'I-ORG'},
  {'?': 'O'}],
 [{'Madvillainy': 'O'},
  {'is': 'O'},
  {'still': 'O'},
  {'my': 'O'},
  {'favourite': 'O'},
  {'MF': 'O'},
  {'DOOM': 'O'},
  {'record': 'O'},
  {'.': 'O'}]

In [5]:

from sklearn.metrics import classification_report
import numpy as np

y_pred_flat = []
for sentence_predictions in predictions:
    for token_prediction_dict in sentence_predictions:
        y_pred_flat.append(list(token_prediction_dict.values())[0])

print(f"Number of predicted tages: {len(y_pred_flat)}")

file_path = 'NER-test.tsv'
df_true = pd.read_csv(file_path, sep='\t', quoting=3)


tag_mapping = {}

tag_mapping = {
    'O': 'O',
    'B-LOCATION': 'B-LOC',
    'I-LOCATION': 'I-LOC',
    'B-ORG': 'B-ORG',
    'I-ORG': 'I-ORG',
    'B-WORK_OF_ART': 'B-WORK_OF_ART',
    'I-WORK_OF_ART': 'I-WORK_OF_ART',
    'B-PERSON': 'B-PER',
    'I-PERSON': 'I-PER'
}


df_true['mapped_BIO_NER_tag'] = df_true['BIO_NER_tag'].replace(tag_mapping)
y_true_flat=df_true['mapped_BIO_NER_tag'].tolist()

if len(y_true_flat) == len(y_pred_flat):
        all_labels = sorted(list(set(y_true_flat + y_pred_flat)))
        
        report = classification_report(
            y_true_flat,
            y_pred_flat,
            labels=all_labels,
            digits=2,
            zero_division=0
        )
        print(report)

else:
    print("\nError: Length mismatch between true and predicted labels.")

Number of predicted tages: 216
               precision    recall  f1-score   support

        B-LOC       0.75      1.00      0.86         3
       B-MISC       0.00      0.00      0.00         0
        B-ORG       0.75      0.75      0.75         8
        B-PER       0.80      0.67      0.73        12
B-WORK_OF_ART       0.00      0.00      0.00         6
        I-LOC       1.00      1.00      1.00         2
       I-MISC       0.00      0.00      0.00         0
        I-ORG       0.60      0.60      0.60         5
        I-PER       1.00      0.69      0.82        13
I-WORK_OF_ART       0.00      0.00      0.00         8
            O       0.96      0.99      0.98       159

     accuracy                           0.88       216
    macro avg       0.53      0.52      0.52       216
 weighted avg       0.88      0.88      0.87       216



In [6]:
token_column_in_df_true = 'token'
tokens = df_true[token_column_in_df_true].tolist()

print ("\n" + "="*80)
print("token-by-token check")
print("="*80)
print(f"{'Token':<25} | {'True Tag':<15} | {'Predicted Tag':<15} | Check")
print("-"*80)

errors = 0
for i in range(len(y_true_flat)):
    token = str(tokens[i])
    true_tag = str(y_true_flat[i])
    pred_tag = str(y_pred_flat[i])
    
    if true_tag == pred_tag:
        status = "ok"
    else:
        status = "not ok"
        errors += 1
    print(f"{token:<25} | {true_tag:<15} | {pred_tag:<15} | {status}")
    
print(f"number of errors: {errors}")


token-by-token check
Token                     | True Tag        | Predicted Tag   | Check
--------------------------------------------------------------------------------
If                        | O               | O               | ok
you're                    | O               | O               | ok
visiting                  | O               | O               | ok
Paris                     | B-LOC           | B-LOC           | ok
,                         | O               | O               | ok
make                      | O               | O               | ok
sure                      | O               | O               | ok
to                        | O               | O               | ok
see                       | O               | O               | ok
the                       | O               | O               | ok
Louvre                    | B-ORG           | B-ORG           | ok
,                         | O               | O               | ok
as                     

In [None]:
#The following cells contain the code run to try other transformer models for the NERC technique. Due to their relatively underwhelming
#performance, they were not considered for the purpose of final results analysis (only the bert-base-NER model, whose results are in the
# code cells above, was considered for final results analysis).

In [7]:
#Configuration of the RobertaForTokenClassification model with a classification head suitable for 37 labels for models trained on ontonotes.

ontonotes_labels = [
    "O",
    "B-CARDINAL", "I-CARDINAL", "B-DATE", "I-DATE", "B-EVENT", "I-EVENT",
    "B-FAC", "I-FAC", "B-GPE", "I-GPE", "B-LANGUAGE", "I-LANGUAGE",
    "B-LAW", "I-LAW", "B-LOC", "I-LOC", "B-MONEY", "I-MONEY",
    "B-NORP", "I-NORP", "B-ORDINAL", "I-ORDINAL", "B-ORG", "I-ORG",
    "B-PERCENT", "I-PERCENT", "B-PERSON", "I-PERSON", "B-PRODUCT", "I-PRODUCT",
    "B-QUANTITY", "I-QUANTITY", "B-TIME", "I-TIME", "B-WORK_OF_ART", "I-WORK_OF_ART"
]

print(f"Number of labels defined: {len(ontonotes_labels)}")

Number of labels defined: 37


In [8]:
englishmodel_3 = NERModel(
    model_type="roberta",
    model_name="guishe/nuner-v1_ontonotes5",
    labels=ontonotes_labels,
    use_cuda=False
)

predictions, raw_outputs = englishmodel_3.predict(test_sentences)
predictions    

100%|██████████| 1/1 [00:05<00:00,  5.44s/it]
Running Prediction: 100%|██████████| 1/1 [00:02<00:00,  2.37s/it]


[[{'If': 'O'},
  {"you're": 'O'},
  {'visiting': 'O'},
  {'Paris': 'B-FAC'},
  {',': 'O'},
  {'make': 'O'},
  {'sure': 'O'},
  {'to': 'O'},
  {'see': 'O'},
  {'the': 'I-NORP'},
  {'Louvre': 'I-QUANTITY'},
  {',': 'O'},
  {'as': 'O'},
  {'they': 'O'},
  {'exhibit': 'O'},
  {'the': 'I-MONEY'},
  {'Mona': 'B-NORP'},
  {'Lisa': 'B-NORP'},
  {'!': 'O'}],
 [{'Amazon': 'B-LANGUAGE'},
  {',': 'O'},
  {'Google': 'B-LANGUAGE'},
  {'and': 'O'},
  {'Meta': 'B-LANGUAGE'},
  {'control': 'O'},
  {'a': 'O'},
  {'huge': 'O'},
  {'share': 'O'},
  {'of': 'O'},
  {'the': 'O'},
  {'technology': 'O'},
  {'market': 'O'},
  {'globally': 'O'},
  {'.': 'O'}],
 [{'Did': 'O'},
  {'you': 'O'},
  {'hear': 'O'},
  {'Pharoah': 'I-DATE'},
  {'Sanders': 'B-EVENT'},
  {'recorded': 'O'},
  {'an': 'O'},
  {'album': 'O'},
  {'with': 'O'},
  {'Floating': 'I-MONEY'},
  {'Points': 'B-NORP'},
  {'?': 'O'}],
 [{'Madvillainy': 'I-MONEY'},
  {'is': 'O'},
  {'still': 'O'},
  {'my': 'O'},
  {'favourite': 'O'},
  {'MF': 'B-LANGUAGE'

In [9]:
from sklearn.metrics import classification_report
import numpy as np

y_pred_flat = []
for sentence_predictions in predictions:
    for token_prediction_dict in sentence_predictions:
        y_pred_flat.append(list(token_prediction_dict.values())[0])

print(f"Number of predicted tages: {len(y_pred_flat)}")

file_path = 'NER-test.tsv'
df_true = pd.read_csv(file_path, sep='\t', quoting=3)

tag_mapping = {}

tag_mapping = {
    'O': 'O',
    'B-LOCATION': 'B-LOC',
    'I-LOCATION': 'I-LOC',
    'B-ORG': 'B-ORG',
    'I-ORG': 'I-ORG',
    'B-WORK_OF_ART': 'B-WORK_OF_ART',
    'I-WORK_OF_ART': 'I-WORK_OF_ART',
    'B-PERSON': 'B-PERSON',
    'I-PERSON': 'I-PERSON'
}

df_true['mapped_BIO_NER_tag'] = df_true['BIO_NER_tag'].replace(tag_mapping)
y_true_flat=df_true['mapped_BIO_NER_tag'].tolist()

if len(y_true_flat) == len(y_pred_flat):
        all_labels = sorted(list(set(y_true_flat + y_pred_flat)))
        
        report = classification_report(
            y_true_flat,
            y_pred_flat,
            labels=all_labels,
            digits=2,
            zero_division=0
        )
        print(report)

else:
    print("\nError: Length mismatch between true and predicted labels.")

Number of predicted tages: 216
               precision    recall  f1-score   support

   B-CARDINAL       0.00      0.00      0.00         0
       B-DATE       0.00      0.00      0.00         0
      B-EVENT       0.00      0.00      0.00         0
        B-FAC       0.00      0.00      0.00         0
   B-LANGUAGE       0.00      0.00      0.00         0
        B-LOC       0.00      0.00      0.00         3
       B-NORP       0.00      0.00      0.00         0
    B-ORDINAL       0.00      0.00      0.00         0
        B-ORG       0.00      0.00      0.00         8
     B-PERSON       0.00      0.00      0.00        12
    B-PRODUCT       0.00      0.00      0.00         0
   B-QUANTITY       0.00      0.00      0.00         0
B-WORK_OF_ART       0.00      0.00      0.00         6
   I-CARDINAL       0.00      0.00      0.00         0
       I-DATE       0.00      0.00      0.00         0
      I-EVENT       0.00      0.00      0.00         0
        I-FAC       0.00      0.0

In [10]:
englishmodel_4 = NERModel(
    model_type="bert",
    model_name="arnabdhar/bert-tiny-ontonotes",
    labels=ontonotes_labels,
    use_cuda=False
)

predictions, raw_outputs = englishmodel_4.predict(test_sentences)
predictions

100%|██████████| 1/1 [00:05<00:00,  5.17s/it]
Running Prediction: 100%|██████████| 1/1 [00:00<00:00, 20.07it/s]


[[{'If': 'O'},
  {"you're": 'O'},
  {'visiting': 'O'},
  {'Paris': 'O'},
  {',': 'O'},
  {'make': 'O'},
  {'sure': 'O'},
  {'to': 'O'},
  {'see': 'O'},
  {'the': 'O'},
  {'Louvre': 'O'},
  {',': 'O'},
  {'as': 'O'},
  {'they': 'O'},
  {'exhibit': 'O'},
  {'the': 'O'},
  {'Mona': 'O'},
  {'Lisa': 'O'},
  {'!': 'O'}],
 [{'Amazon': 'O'},
  {',': 'O'},
  {'Google': 'O'},
  {'and': 'O'},
  {'Meta': 'O'},
  {'control': 'O'},
  {'a': 'O'},
  {'huge': 'O'},
  {'share': 'O'},
  {'of': 'O'},
  {'the': 'O'},
  {'technology': 'O'},
  {'market': 'O'},
  {'globally': 'O'},
  {'.': 'O'}],
 [{'Did': 'O'},
  {'you': 'O'},
  {'hear': 'O'},
  {'Pharoah': 'O'},
  {'Sanders': 'O'},
  {'recorded': 'O'},
  {'an': 'O'},
  {'album': 'O'},
  {'with': 'O'},
  {'Floating': 'O'},
  {'Points': 'O'},
  {'?': 'O'}],
 [{'Madvillainy': 'O'},
  {'is': 'O'},
  {'still': 'O'},
  {'my': 'O'},
  {'favourite': 'O'},
  {'MF': 'O'},
  {'DOOM': 'O'},
  {'record': 'O'},
  {'.': 'O'}],
 [{'My': 'O'},
  {'friend': 'O'},
  {'Kevin'

In [11]:
from sklearn.metrics import classification_report
import numpy as np

y_pred_flat = []
for sentence_predictions in predictions:
    for token_prediction_dict in sentence_predictions:
        y_pred_flat.append(list(token_prediction_dict.values())[0])

print(f"Number of predicted tages: {len(y_pred_flat)}")

file_path = 'NER-test.tsv'
df_true = pd.read_csv(file_path, sep='\t', quoting=3)

tag_mapping = {}

tag_mapping = {
    'O': 'O',
    'B-LOCATION': 'B-LOC',
    'I-LOCATION': 'I-LOC',
    'B-ORG': 'B-ORG',
    'I-ORG': 'I-ORG',
    'B-WORK_OF_ART': 'B-WORK_OF_ART',
    'I-WORK_OF_ART': 'I-WORK_OF_ART',
    'B-PERSON': 'B-PERSON',
    'I-PERSON': 'I-PERSON'
}

df_true['mapped_BIO_NER_tag'] = df_true['BIO_NER_tag'].replace(tag_mapping)
y_true_flat=df_true['mapped_BIO_NER_tag'].tolist()

if len(y_true_flat) == len(y_pred_flat):
        all_labels = sorted(list(set(y_true_flat + y_pred_flat)))
        
        report = classification_report(
            y_true_flat,
            y_pred_flat,
            labels=all_labels,
            digits=2,
            zero_division=0
        )
        print(report)

else:
    print("\nError: Length mismatch between true and predicted labels.")

Number of predicted tages: 216
               precision    recall  f1-score   support

   B-CARDINAL       0.00      0.00      0.00         0
      B-EVENT       0.00      0.00      0.00         0
        B-LOC       0.00      0.00      0.00         3
        B-ORG       0.00      0.00      0.00         8
     B-PERSON       0.00      0.00      0.00        12
B-WORK_OF_ART       0.00      0.00      0.00         6
   I-CARDINAL       0.00      0.00      0.00         0
       I-DATE       0.00      0.00      0.00         0
        I-LOC       0.00      0.00      0.00         2
        I-ORG       0.00      0.00      0.00         5
     I-PERSON       0.00      0.00      0.00        13
I-WORK_OF_ART       0.00      0.00      0.00         8
            O       0.75      0.97      0.84       159

     accuracy                           0.72       216
    macro avg       0.06      0.07      0.06       216
 weighted avg       0.55      0.72      0.62       216



In [12]:
englishmodel_5 = NERModel(
    model_type="roberta",
    model_name="tner/roberta-large-ontonotes5",
    labels=ontonotes_labels,
    use_cuda=False
)

predictions, raw_outputs = englishmodel_5.predict(test_sentences)
predictions

100%|██████████| 1/1 [00:05<00:00,  5.56s/it]
Running Prediction: 100%|██████████| 1/1 [00:03<00:00,  3.93s/it]


[[{'If': 'O'},
  {"you're": 'O'},
  {'visiting': 'O'},
  {'Paris': 'B-FAC'},
  {',': 'O'},
  {'make': 'O'},
  {'sure': 'O'},
  {'to': 'O'},
  {'see': 'O'},
  {'the': 'O'},
  {'Louvre': 'I-NORP'},
  {',': 'O'},
  {'as': 'O'},
  {'they': 'O'},
  {'exhibit': 'O'},
  {'the': 'O'},
  {'Mona': 'O'},
  {'Lisa': 'O'},
  {'!': 'O'}],
 [{'Amazon': 'B-LANGUAGE'},
  {',': 'O'},
  {'Google': 'B-LANGUAGE'},
  {'and': 'O'},
  {'Meta': 'B-LANGUAGE'},
  {'control': 'O'},
  {'a': 'O'},
  {'huge': 'O'},
  {'share': 'O'},
  {'of': 'O'},
  {'the': 'O'},
  {'technology': 'O'},
  {'market': 'O'},
  {'globally': 'O'},
  {'.': 'O'}],
 [{'Did': 'O'},
  {'you': 'O'},
  {'hear': 'O'},
  {'Pharoah': 'O'},
  {'Sanders': 'O'},
  {'recorded': 'O'},
  {'an': 'O'},
  {'album': 'O'},
  {'with': 'O'},
  {'Floating': 'O'},
  {'Points': 'O'},
  {'?': 'O'}],
 [{'Madvillainy': 'O'},
  {'is': 'O'},
  {'still': 'O'},
  {'my': 'O'},
  {'favourite': 'O'},
  {'MF': 'O'},
  {'DOOM': 'O'},
  {'record': 'O'},
  {'.': 'O'}],
 [{'My':

In [13]:
from sklearn.metrics import classification_report
import numpy as np

y_pred_flat = []
for sentence_predictions in predictions:
    for token_prediction_dict in sentence_predictions:
        y_pred_flat.append(list(token_prediction_dict.values())[0])

print(f"Number of predicted tages: {len(y_pred_flat)}")

file_path = 'NER-test.tsv'
df_true = pd.read_csv(file_path, sep='\t', quoting=3)

tag_mapping = {}

tag_mapping = {
    'O': 'O',
    'B-LOCATION': 'B-LOC',
    'I-LOCATION': 'I-LOC',
    'B-ORG': 'B-ORG',
    'I-ORG': 'I-ORG',
    'B-WORK_OF_ART': 'B-WORK_OF_ART',
    'I-WORK_OF_ART': 'I-WORK_OF_ART',
    'B-PERSON': 'B-PERSON',
    'I-PERSON': 'I-PERSON'
}

df_true['mapped_BIO_NER_tag'] = df_true['BIO_NER_tag'].replace(tag_mapping)
y_true_flat=df_true['mapped_BIO_NER_tag'].tolist()

if len(y_true_flat) == len(y_pred_flat):
        all_labels = sorted(list(set(y_true_flat + y_pred_flat)))
        
        report = classification_report(
            y_true_flat,
            y_pred_flat,
            labels=all_labels,
            digits=2,
            zero_division=0
        )
        print(report)

else:
    print("\nError: Length mismatch between true and predicted labels.")

Number of predicted tages: 216
               precision    recall  f1-score   support

      B-EVENT       0.00      0.00      0.00         0
        B-FAC       0.00      0.00      0.00         0
   B-LANGUAGE       0.00      0.00      0.00         0
        B-LOC       0.00      0.00      0.00         3
       B-NORP       0.00      0.00      0.00         0
        B-ORG       0.00      0.00      0.00         8
     B-PERSON       0.00      0.00      0.00        12
   B-QUANTITY       0.00      0.00      0.00         0
B-WORK_OF_ART       0.00      0.00      0.00         6
       I-DATE       0.00      0.00      0.00         0
        I-FAC       0.00      0.00      0.00         0
   I-LANGUAGE       0.00      0.00      0.00         0
        I-LOC       0.00      0.00      0.00         2
      I-MONEY       0.00      0.00      0.00         0
       I-NORP       0.00      0.00      0.00         0
        I-ORG       0.00      0.00      0.00         5
     I-PERSON       0.00      0.0

In [14]:
englishmodel_6 = NERModel(
    model_type="bert",
    model_name="dbmdz/bert-large-cased-finetuned-conll03-english",
    use_cuda=False
)

predictions, raw_outputs = englishmodel_6.predict(test_sentences)
predictions

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 1/1 [00:05<00:00,  5.38s/it]
Running Prediction: 100%|██████

[[{'If': 'O'},
  {"you're": 'O'},
  {'visiting': 'O'},
  {'Paris': 'I-LOC'},
  {',': 'O'},
  {'make': 'O'},
  {'sure': 'O'},
  {'to': 'O'},
  {'see': 'O'},
  {'the': 'O'},
  {'Louvre': 'I-ORG'},
  {',': 'O'},
  {'as': 'O'},
  {'they': 'O'},
  {'exhibit': 'O'},
  {'the': 'O'},
  {'Mona': 'I-MISC'},
  {'Lisa': 'I-MISC'},
  {'!': 'O'}],
 [{'Amazon': 'I-ORG'},
  {',': 'O'},
  {'Google': 'I-ORG'},
  {'and': 'O'},
  {'Meta': 'I-ORG'},
  {'control': 'O'},
  {'a': 'O'},
  {'huge': 'O'},
  {'share': 'O'},
  {'of': 'O'},
  {'the': 'O'},
  {'technology': 'O'},
  {'market': 'O'},
  {'globally': 'O'},
  {'.': 'O'}],
 [{'Did': 'O'},
  {'you': 'O'},
  {'hear': 'O'},
  {'Pharoah': 'I-PER'},
  {'Sanders': 'I-PER'},
  {'recorded': 'O'},
  {'an': 'O'},
  {'album': 'O'},
  {'with': 'O'},
  {'Floating': 'I-ORG'},
  {'Points': 'I-ORG'},
  {'?': 'O'}],
 [{'Madvillainy': 'I-MISC'},
  {'is': 'O'},
  {'still': 'O'},
  {'my': 'O'},
  {'favourite': 'O'},
  {'MF': 'I-ORG'},
  {'DOOM': 'I-ORG'},
  {'record': 'O'},


In [15]:
from sklearn.metrics import classification_report
import numpy as np

y_pred_flat = []
for sentence_predictions in predictions:
    for token_prediction_dict in sentence_predictions:
        y_pred_flat.append(list(token_prediction_dict.values())[0])

print(f"Number of predicted tages: {len(y_pred_flat)}")

file_path = 'NER-test.tsv'
df_true = pd.read_csv(file_path, sep='\t', quoting=3)

df_true['mapped_BIO_NER_tag'] = df_true['BIO_NER_tag']
y_true_flat=df_true['mapped_BIO_NER_tag'].tolist()

if len(y_true_flat) == len(y_pred_flat):
        all_labels = sorted(list(set(y_true_flat + y_pred_flat)))
        
        report = classification_report(
            y_true_flat,
            y_pred_flat,
            labels=all_labels,
            digits=2,
            zero_division=0
        )
        print(report)

else:
    print("\nError: Length mismatch between true and predicted labels.")

Number of predicted tages: 216
               precision    recall  f1-score   support

   B-LOCATION       0.00      0.00      0.00         3
        B-ORG       0.00      0.00      0.00         8
     B-PERSON       0.00      0.00      0.00        12
B-WORK_OF_ART       0.00      0.00      0.00         6
        I-LOC       0.00      0.00      0.00         0
   I-LOCATION       0.00      0.00      0.00         2
       I-MISC       0.00      0.00      0.00         0
        I-ORG       0.17      0.40      0.24         5
        I-PER       0.00      0.00      0.00         0
     I-PERSON       0.00      0.00      0.00        13
I-WORK_OF_ART       0.00      0.00      0.00         8
            O       0.98      0.99      0.99       159

     accuracy                           0.74       216
    macro avg       0.10      0.12      0.10       216
 weighted avg       0.73      0.74      0.73       216



In [16]:
englishmodel_7 = NERModel(
    model_type="distilbert",
    model_name="cgensheimer/claims-data-model",
    labels=ontonotes_labels,
    use_cuda=False
)

predictions, raw_outputs = englishmodel_7.predict(test_sentences)
predictions

from sklearn.metrics import classification_report
import numpy as np

y_pred_flat = []
for sentence_predictions in predictions:
    for token_prediction_dict in sentence_predictions:
        y_pred_flat.append(list(token_prediction_dict.values())[0])

print(f"Number of predicted tages: {len(y_pred_flat)}")

file_path = 'NER-test.tsv'
df_true = pd.read_csv(file_path, sep='\t', quoting=3)

df_true['mapped_BIO_NER_tag'] = df_true['BIO_NER_tag']
y_true_flat=df_true['mapped_BIO_NER_tag'].tolist()

if len(y_true_flat) == len(y_pred_flat):
        all_labels = sorted(list(set(y_true_flat + y_pred_flat)))
        
        report = classification_report(
            y_true_flat,
            y_pred_flat,
            labels=all_labels,
            digits=2,
            zero_division=0
        )
        print(report)

else:
    print("\nError: Length mismatch between true and predicted labels.")

100%|██████████| 1/1 [00:06<00:00,  6.30s/it]
Running Prediction: 100%|██████████| 1/1 [00:00<00:00,  1.82it/s]

Number of predicted tages: 216
               precision    recall  f1-score   support

   B-CARDINAL       0.00      0.00      0.00         0
       B-DATE       0.00      0.00      0.00         0
      B-EVENT       0.00      0.00      0.00         0
   B-LOCATION       0.00      0.00      0.00         3
        B-ORG       0.00      0.00      0.00         8
     B-PERSON       0.00      0.00      0.00        12
B-WORK_OF_ART       0.00      0.00      0.00         6
   I-CARDINAL       0.00      0.00      0.00         0
       I-DATE       0.00      0.00      0.00         0
   I-LOCATION       0.00      0.00      0.00         2
    I-ORDINAL       0.00      0.00      0.00         0
        I-ORG       0.00      0.00      0.00         5
     I-PERSON       0.00      0.00      0.00        13
I-WORK_OF_ART       0.00      0.00      0.00         8
            O       0.75      0.97      0.84       159

     accuracy                           0.72       216
    macro avg       0.05      0.




In [17]:
englishmodel_8 = NERModel(
    model_type="bert",
    model_name="sartajbhuvaji/bert-named-entity-recognition",
    #labels=ontonotes_labels,
    use_cuda=False
)

predictions, raw_outputs = englishmodel_8.predict(test_sentences)
predictions

from sklearn.metrics import classification_report
import numpy as np

y_pred_flat = []
for sentence_predictions in predictions:
    for token_prediction_dict in sentence_predictions:
        y_pred_flat.append(list(token_prediction_dict.values())[0])

print(f"Number of predicted tages: {len(y_pred_flat)}")

file_path = 'NER-test.tsv'
df_true = pd.read_csv(file_path, sep='\t', quoting=3)

df_true['mapped_BIO_NER_tag'] = df_true['BIO_NER_tag']
y_true_flat=df_true['mapped_BIO_NER_tag'].tolist()

if len(y_true_flat) == len(y_pred_flat):
        all_labels = sorted(list(set(y_true_flat + y_pred_flat)))
        
        report = classification_report(
            y_true_flat,
            y_pred_flat,
            labels=all_labels,
            digits=2,
            zero_division=0
        )
        print(report)

else:
    print("\nError: Length mismatch between true and predicted labels.")

100%|██████████| 1/1 [00:05<00:00,  5.52s/it]
Running Prediction: 100%|██████████| 1/1 [00:01<00:00,  1.11s/it]

Number of predicted tages: 216
               precision    recall  f1-score   support

   B-LOCATION       0.00      0.00      0.00         3
       B-MISC       0.00      0.00      0.00         0
        B-ORG       0.00      0.00      0.00         8
     B-PERSON       0.00      0.00      0.00        12
B-WORK_OF_ART       0.00      0.00      0.00         6
   I-LOCATION       0.00      0.00      0.00         2
       I-MISC       0.00      0.00      0.00         0
        I-ORG       0.00      0.00      0.00         5
     I-PERSON       0.00      0.00      0.00        13
I-WORK_OF_ART       0.00      0.00      0.00         8
            O       0.75      1.00      0.86       159

     accuracy                           0.74       216
    macro avg       0.07      0.09      0.08       216
 weighted avg       0.55      0.74      0.63       216




