In [None]:
import torch
from torch import Tensor, nn
import pandas as pd
import os
import sys
sys.path.append("nlp_project")
import numpy as np
import matplotlib.pyplot as plt
from nlp_project.scripts.read_write_data import load_data

In [None]:
DATA_BASE_PATH = os.path.join(os.getcwd(), 'nlp_project', 'data')
MODELS_BASE_PATH = os.path.join(os.getcwd(), 'nlp_project', 'models')

# models
EXPERIMENT1_PATH = os.path.join(MODELS_BASE_PATH, "Baseline.pt")
EXPERIMENT2_PATH = os.path.join(MODELS_BASE_PATH, "Experiment2.pt")
EXPERIMENT3_PATH = os.path.join(MODELS_BASE_PATH, "Experiment3.pt")
EXPERIMENT4_PATH = os.path.join(MODELS_BASE_PATH, "Experiment4.pt")

# test data
TEST_SET_PATH = os.path.join(DATA_BASE_PATH, "processed", "test.conll")
test_docs, test_labels, test_bio, test_domain = load_data(os.path.join(os.getcwd(), TEST_SET_PATH))

## Getting train, dev and test scores:
### P.S: To run this you need to download the [Google News Word2vec mode](https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/view?usp=sharing&resourcekey=0-wjGZdNAUop6WykTtMip30g), name it "GoogleNews-50k.bin", and place it in /notebooks/nlp-project/models/.

In [None]:
e1_model = torch.load(EXPERIMENT1_PATH)
e2_model = torch.load(EXPERIMENT2_PATH)
e3_model = torch.load(EXPERIMENT3_PATH)
e4_model = torch.load(EXPERIMENT4_PATH)

In [None]:
e1_errors = e1_model.evaluate(test_docs, test_labels, test_bio, test_domain)
e2_errors = e2_model.evaluate(test_docs, test_labels, test_bio, test_domain)
e3_errors = e3_model.evaluate(test_docs, test_labels, test_bio, test_domain)
e4_errors = e4_model.evaluate(test_docs, test_labels, test_bio, test_domain)

In [None]:
e1_errors.head()

In [None]:
replacement = {
    'B-PER': 'PER',
    'I-PER': 'PER',
    'B-LOC': 'LOC',
    'O': 'O',
    'B-MISC': 'MISC',
    'B-ORG': 'ORG',
    'I-ORG': 'ORG',
    'I-LOC': 'LOC',
    'B-LOCderiv': 'MISC',
    'I-MISC': 'MISC',
    'B-LOCpart': 'LOC',
    'B-ORGpart': 'ORG',
    'B-MISCpart': 'MISC',
    'I-MISCpart': 'MISC',
    'I-LOCpart': 'LOC',
    'B-PERpart': 'PER',
    'B-ORGderiv': 'ORG'
}

e1_errors["general_BIO"] = e1_errors.explode("entity_BIO").replace(to_replace=replacement).pipe(lambda df_: df_[~df_.index.duplicated(keep='first')]).entity_BIO
e2_errors["general_BIO"] = e2_errors.explode("entity_BIO").replace(to_replace=replacement).pipe(lambda df_: df_[~df_.index.duplicated(keep='first')]).entity_BIO
e3_errors["general_BIO"] = e3_errors.explode("entity_BIO").replace(to_replace=replacement).pipe(lambda df_: df_[~df_.index.duplicated(keep='first')]).entity_BIO
e4_errors["general_BIO"] = e4_errors.explode("entity_BIO").replace(to_replace=replacement).pipe(lambda df_: df_[~df_.index.duplicated(keep='first')]).entity_BIO


In [None]:
e1_errors

In [None]:
e1_err = e1_errors.groupby("error_type").count().entity.to_dict()
e2_err = e2_errors.groupby("error_type").count().entity.to_dict()
e3_err = e3_errors.groupby("error_type").count().entity.to_dict()
e4_err = e4_errors.groupby("error_type").count().entity.to_dict()

errors = {}
for (e1, e1_val), (e2, e2_val), (e3, e3_val),(e4, e4_val) in zip(e1_err.items(), e2_err.items(), e3_err.items(), e4_err.items()):
    errors[e1] = [e1_val, e2_val, e3_val, e4_val]

In [None]:
errors

In [None]:
all_bio_errors = pd.DataFrame(pd.concat([e1_errors['entity_BIO'],e2_errors['entity_BIO'],e3_errors['entity_BIO'],e4_errors['entity_BIO']]).explode())
unique, counts = np.unique(all_bio_errors, return_counts=True)
# arr1inds = arr1.argsort()
# sorted_arr1 = arr1[arr1inds[::-1]]
# sorted_arr2 = arr2[arr1inds[::-1]]
indeces = counts.argsort()
counts=counts[indeces[::-1]]
unique = unique[indeces[::-1]]
# unique.sort()
plt.bar(unique, height=counts)
plt.xticks(rotation=90);

In [None]:
experiments = ("E1", "E2", "E3", "E4")

x = np.arange(len(experiments))  # the label locations
width = 0.2 # the width of the bars
multiplier = 0

fig, ax = plt.subplots(layout='constrained')

for attribute, measurement in errors.items():
    offset = width * multiplier
    rects = ax.bar(x + offset, measurement, width, label=attribute)
    # ax.bar_label(rects, padding=3)
    multiplier += 1

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Number of errors', fontsize=14)
# ax.set_title('Error types by experiment')
ax.set_xticks(x + width, experiments, fontsize=14)
ax.legend(loc='upper left', ncols=3, prop={'size': 14})
ax.set_ylim(0, 400)

plt.show()

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b2f14aee-af04-4db5-af55-57a3a58b9f40' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>