### Code

#### Imports

In [12]:
import pandas as pd
from sklearn.metrics import classification_report

#### Functions

In [13]:
def error_fixer(df):
    '''Fixes situations where there are too many tokens (halucinations) or missing tokens (errors).'''
    pred = []
    gold = []
    errors = []
    for idx, row in df.iterrows():
        if len(row.semtag) < len(row.gen_semtag):
            pred += [i for i in row.gen_semtag[:len(row.semtag)]]
            gold += [i for i in row.semtag]
            errors.append([idx, 'longer', row.semtag, row.gen_semtag])
        elif len(row.semtag) > len(row.gen_semtag):
            errors.append([idx, 'shorter', row.semtag, row.gen_semtag])
        else:
            pred += [i for i in row.gen_semtag]
            gold += [i for i in row.semtag]
            
    return gold, pred

In [14]:
def classification_printer(in_file):
    '''Prints the classification report for a given file.'''
    df = pd.read_json(in_file)

    gold, pred = error_fixer(df)

    print(classification_report(gold, pred, zero_division=0))

### Classification

In [22]:
variant = 'Sentinel'
model = 't5-base'
output_json = './2023-02-02_18-55-11-t5-base-test_generated.json'

print(f'{variant} - {model}' + '\n')
classification_printer(output_json)
print(f'{variant} - {model}' + '\n')

Sentinel - t5-base

              precision    recall  f1-score   support

         ALT       0.91      0.72      0.81        29
         AND       0.92      0.62      0.74        53
         APX       1.00      0.62      0.76        13
         ART       0.31      1.00      0.48         5
         BOT       1.00      1.00      1.00         1
         BUT       1.00      1.00      1.00         7
         CLO       0.94      0.79      0.86        19
         COL       0.90      0.75      0.82        12
         CON       0.98      0.75      0.85       871
         COO       0.92      0.75      0.83        16
         CTC       1.00      0.50      0.67         2
         DEF       0.93      0.69      0.79       797
         DEG       0.80      0.40      0.53        30
         DIS       0.86      0.76      0.81       476
         DOM       1.00      0.60      0.75         5
         DOW       1.00      1.00      1.00         3
         DST       0.82      0.85      0.84        27
       

In [23]:
variant = 'Sentinel'
model = 't5-small'
output_json = './2023-02-02_19-10-41-t5-small-test_generated.json'

print(f'{variant} - {model}' + '\n')
classification_printer(output_json)
print(f'{variant} - {model}' + '\n')

Sentinel - t5-small

              precision    recall  f1-score   support

         ALT       0.75      0.93      0.83        29
         AND       0.80      0.74      0.76        53
         APX       1.00      1.00      1.00        13
         ART       0.27      0.80      0.40         5
         BOT       1.00      1.00      1.00         1
         BUT       0.86      0.86      0.86         7
         CLO       0.79      1.00      0.88        19
         COL       1.00      1.00      1.00        12
         CON       0.93      0.98      0.95       871
         COO       0.86      0.38      0.52        16
         CTC       1.00      0.50      0.67         2
         DEF       0.92      0.97      0.94       797
         DEG       0.95      0.63      0.76        30
         DIS       0.92      0.84      0.88       476
         DOM       0.83      1.00      0.91         5
         DOW       1.00      1.00      1.00         3
         DST       0.77      1.00      0.87        27
      

In [None]:
variant = ''
model = ''
output_json = './'

print(f'{variant} - {model}' + '\n')
classification_printer(output_json)
print(f'{variant} - {model}' + '\n')