### This notebook demonstrates how to evaluate accuracy

In [1]:
from sklearn.neighbors import NearestNeighbors
from BM25Vectorizer import BM25Vectorizer
from tqdm import tqdm
from utils import *

# don't forget to set clean=True to clean the data!
data = load_data_from_json('data/data_with_augmented_examples.json', clean=True)
data_test = load_data_from_json('data/test_200.json')

100%|██████████| 103874/103874 [00:52<00:00, 1981.77it/s]


In [2]:
data_train, data_seen_500, data_unseen_500 = split_seen_unseen(data)
print(len(list(itertools.chain(*data_train.values()))))
print(len(list(itertools.chain(*data_seen_500.values()))))
print(len(list(itertools.chain(*data_unseen_500.values()))))

100%|██████████| 98481/98481 [03:12<00:00, 510.85it/s] 


8689479
500
500


### Fit training data

In [3]:
# fit BM25 vectorizer
vectorizer = BM25Vectorizer(stop_words=None, stemmer=None, norm=False)
X_BM25 = vectorizer.fit_transform(data_train) # <--- fit train data here
words = vectorizer.words

# fit knn
knn = NearestNeighbors(metric='cosine')
knn.fit(X_BM25)

100%|██████████| 129297/129297 [00:00<00:00, 315206.34it/s]


NearestNeighbors(metric='cosine')

### Test seen data

In [4]:
y_pred = []
y_gold = []
for word, defi in tqdm(data_seen_500.items()): # <--- use seen_500 data here
    query = list(defi)[0]
    query = vectorizer.transform(query)

    prediction = search(query, knn, words, n=1000)

    y_pred.append(prediction)
    y_gold.append(word)

evaluate(y_pred, y_gold)

100%|██████████| 496/496 [02:44<00:00,  3.02it/s]

acc@1: 0.3
acc@10: 0.61
acc@100: 0.87
median rank: 3
standard error of mean rank: 8





(0.3024193548387097,
 0.6149193548387096,
 0.8689516129032258,
 3.0,
 7.992881798306646)

### Test unseen data

In [5]:
y_pred = []
y_gold = []
for word, defi in tqdm(data_unseen_500.items()): # <--- use unseen_500 data here
    query = list(defi)[0]
    query = vectorizer.transform(query)

    prediction = search(query, knn, words, n=1000)

    y_pred.append(prediction)
    y_gold.append(word)

evaluate(y_pred, y_gold)

100%|██████████| 495/495 [02:44<00:00,  3.01it/s]

acc@1: 0.23
acc@10: 0.55
acc@100: 0.82
median rank: 7
standard error of mean rank: 10





(0.23232323232323232,
 0.5515151515151515,
 0.8202020202020202,
 7.0,
 10.382158479706304)

### Test description set (200) data

In [6]:
# fit BM25 vectorizer
vectorizer = BM25Vectorizer(stop_words=None, stemmer=None, norm=False)
X_BM25 = vectorizer.fit_transform(data) # <--- fit all data here
words = vectorizer.words

# fit knn
knn = NearestNeighbors(metric='cosine')
knn.fit(X_BM25)

y_pred = []
y_gold = []
for word, defi in tqdm(data_test.items()): # <--- use desc_200 data here
    query = list(defi)[0]
    query = vectorizer.transform(query)

    prediction = search(query, knn, words, n=1000)

    y_pred.append(prediction)
    y_gold.append(word)

evaluate(y_pred, y_gold)

100%|██████████| 129297/129297 [00:00<00:00, 317002.60it/s]
100%|██████████| 200/200 [01:07<00:00,  2.96it/s]

acc@1: 0.01
acc@10: 0.04
acc@100: 0.32
median rank: 230
standard error of mean rank: 27





(0.005, 0.035, 0.315, 230.5, 27.220657759778693)