# Testing the accuracy of created pipelines
### All models are loaded and used with train data, after which the scores are computed 

In [1]:
from scripts import load_test_data
from ner_models import (RegexModel,
                        HfBERTModel,
                        HfBERTUncasedModel,
                        HfRoBERTaModel,
                        HfElectraModel,
                        SpacyLgModel,
                        SpacyTrfModel,
                        FlairModel)

In [2]:
user_requests, required_data = load_test_data()

In [3]:
user_requests[0]

"Hello, I'm Olivia Parker, I want to fly from Barcelona to Amsterdam on 20th May 2024."

In [4]:
required_data[0]

{'name': 'Olivia Parker',
 'departure': 'Barcelona',
 'destination': 'Amsterdam',
 'date': '20-05-2024'}

In [5]:
RegexModel().extract_flight_details(user_requests[0])

('Olivia Parker', 'Barcelona', 'Amsterdam', '20-05-2024')

In [6]:
def compute_accuracy(model, user_requests, required_data, return_all=False):
    answers_all = []
    answers_name = []
    answers_departure = []
    answers_destination = []
    answers_date = []

    for i in range(len(user_requests)):
        request = user_requests[i]
        true_name = required_data[i]['name']
        true_departure = required_data[i]['departure']
        true_destination = required_data[i]['destination']
        true_date = required_data[i]['date']

        pred_name, pred_departure, pred_destination, pred_date = model.extract_flight_details(request)

        name = true_name == pred_name
        departure = true_departure == pred_departure
        destination = true_destination == pred_destination
        date = true_date == pred_date
        if name and departure and destination and date:
            answers_all.append(1)
        else:
            answers_all.append(0)

        answers_name.append(1) if name else answers_name.append(0)
        answers_departure.append(1) if departure else answers_departure.append(0)
        answers_destination.append(1) if destination else answers_destination.append(0)
        answers_date.append(1) if date else answers_date.append(0)
        
    if return_all:
        return answers_all, answers_name, answers_departure, answers_destination, answers_date
    else:
        dict = {}
        for num, i in enumerate(range(0, len(answers_all)-1, 20)):
            dict[f'group_{num+1}'] = ((sum(answers_all[i:i+20]))/20) * 100
        
        dict['whole'] = ((sum(answers_all))/ len(answers_all)) * 100
        dict['name'] = ((sum(answers_name))/ len(answers_all)) * 100
        dict['departure'] = ((sum(answers_departure))/ len(answers_all)) * 100
        dict['destination'] = ((sum(answers_destination))/ len(answers_all)) * 100
        dict['date'] = ((sum(answers_date))/ len(answers_all)) * 100
        return dict

In [7]:
#answers_all, answers_name, answers_departure, answers_destination, answers_date = compute_accuracy(RegexModel(), user_requests, required_data)

In [8]:
score = {}
score['RegexModel'] = compute_accuracy(RegexModel(), user_requests, required_data)
score['HfBERTModel'] = compute_accuracy(HfBERTModel(), user_requests, required_data)
score['HfBERTUncasedModel'] = compute_accuracy(HfBERTUncasedModel(), user_requests, required_data)
score['HfRoBERTaModel'] = compute_accuracy(HfRoBERTaModel(), user_requests, required_data)
score['HfElectraModel'] = compute_accuracy(HfElectraModel(), user_requests, required_data)
score['SpacyLgModel'] = compute_accuracy(SpacyLgModel(), user_requests, required_data)
score['SpacyTrfModel'] = compute_accuracy(SpacyTrfModel(), user_requests, required_data)
score['FlairModel'] = compute_accuracy(FlairModel(), user_requests, required_data)

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at dslim/bert-base-NER-uncased were not used when initializing BertForTokenClassification: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (

2023-11-30 13:09:31,952 SequenceTagger predicts: Dictionary with 76 tags: <unk>, O, B-CARDINAL, E-CARDINAL, S-PERSON, S-CARDINAL, S-PRODUCT, B-PRODUCT, I-PRODUCT, E-PRODUCT, B-WORK_OF_ART, I-WORK_OF_ART, E-WORK_OF_ART, B-PERSON, E-PERSON, S-GPE, B-DATE, I-DATE, E-DATE, S-ORDINAL, S-LANGUAGE, I-PERSON, S-EVENT, S-DATE, B-QUANTITY, E-QUANTITY, S-TIME, B-TIME, I-TIME, E-TIME, B-GPE, E-GPE, S-ORG, I-GPE, S-NORP, B-FAC, I-FAC, E-FAC, B-NORP, E-NORP, S-PERCENT, B-ORG, E-ORG, B-LANGUAGE, E-LANGUAGE, I-CARDINAL, I-ORG, S-WORK_OF_ART, I-QUANTITY, B-MONEY


In [9]:
score

{'RegexModel': {'group_1': 65.0,
  'group_2': 0.0,
  'group_3': 60.0,
  'group_4': 0.0,
  'group_5': 0.0,
  'group_6': 35.0,
  'whole': 26.666666666666668,
  'name': 72.5,
  'departure': 71.66666666666667,
  'destination': 69.16666666666667,
  'date': 48.333333333333336},
 'HfBERTModel': {'group_1': 100.0,
  'group_2': 100.0,
  'group_3': 100.0,
  'group_4': 0.0,
  'group_5': 95.0,
  'group_6': 95.0,
  'whole': 81.66666666666667,
  'name': 83.33333333333334,
  'departure': 83.33333333333334,
  'destination': 82.5,
  'date': 97.5},
 'HfBERTUncasedModel': {'group_1': 100.0,
  'group_2': 100.0,
  'group_3': 100.0,
  'group_4': 60.0,
  'group_5': 95.0,
  'group_6': 95.0,
  'whole': 91.66666666666666,
  'name': 100.0,
  'departure': 95.0,
  'destination': 98.33333333333333,
  'date': 97.5},
 'HfRoBERTaModel': {'group_1': 100.0,
  'group_2': 100.0,
  'group_3': 100.0,
  'group_4': 60.0,
  'group_5': 95.0,
  'group_6': 100.0,
  'whole': 92.5,
  'name': 100.0,
  'departure': 95.0,
  'destinati

# Making table for report

In [10]:
import pandas as pd    

In [11]:
models = list(score.keys())

In [12]:
gr1, gr2, gr3, gr4, gr5, gr6, whole, name, departure, destination, date = [],[],[],[],[],[],[],[],[],[],[]
for model in models:
    gr1.append(score[model]['group_1'])
    gr2.append(score[model]['group_2'])
    gr3.append(score[model]['group_3'])
    gr4.append(score[model]['group_4'])
    gr5.append(score[model]['group_5'])
    gr6.append(score[model]['group_6'])

    whole.append(score[model]['whole'])

    name.append(score[model]['name'])
    departure.append(score[model]['departure'])
    destination.append(score[model]['destination'])
    date.append(score[model]['date'])

In [13]:
df = pd.DataFrame()
df['models'] = models
df['group_1'] = gr1
df['group_2'] = gr2
df['group_3'] = gr3
df['group_4'] = gr4
df['group_5'] = gr5
df['group_6'] = gr6
df['whole'] = whole

In [14]:
df

Unnamed: 0,models,group_1,group_2,group_3,group_4,group_5,group_6,whole
0,RegexModel,65.0,0.0,60.0,0.0,0.0,35.0,26.666667
1,HfBERTModel,100.0,100.0,100.0,0.0,95.0,95.0,81.666667
2,HfBERTUncasedModel,100.0,100.0,100.0,60.0,95.0,95.0,91.666667
3,HfRoBERTaModel,100.0,100.0,100.0,60.0,95.0,100.0,92.5
4,HfElectraModel,100.0,100.0,100.0,55.0,95.0,100.0,91.666667
5,SpacyLgModel,90.0,10.0,90.0,0.0,55.0,70.0,52.5
6,SpacyTrfModel,100.0,90.0,100.0,0.0,50.0,100.0,73.333333
7,FlairModel,100.0,100.0,100.0,95.0,100.0,100.0,99.166667


In [15]:
df.to_excel("df1.xlsx") 

In [16]:
df2 = pd.DataFrame()
df2['models'] = models
df2['name'] = name
df2['departure'] = departure
df2['destination'] = destination
df2['date'] = date
df2

Unnamed: 0,models,name,departure,destination,date
0,RegexModel,72.5,71.666667,69.166667,48.333333
1,HfBERTModel,83.333333,83.333333,82.5,97.5
2,HfBERTUncasedModel,100.0,95.0,98.333333,97.5
3,HfRoBERTaModel,100.0,95.0,100.0,97.5
4,HfElectraModel,98.333333,95.0,100.0,97.5
5,SpacyLgModel,80.0,83.333333,83.333333,60.0
6,SpacyTrfModel,83.333333,83.333333,83.333333,85.833333
7,FlairModel,100.0,99.166667,100.0,100.0


In [17]:
df2.to_excel("df2.xlsx") 