In [1]:
import joblib
from src.db_client import DB_Client
import pandas as pd
import numpy as np
from sklearn.metrics import ndcg_score
import nlpaug.augmenter.word as naw
import random

In [2]:
db_client = DB_Client()
model = joblib.load("models/movie_trivia.joblib")

In [None]:
eval_data = pd.read_csv('eval.csv')

In [3]:
sample_pred = model.predict("What is Bubba's name in Forest Gump?") # we get a sample prediction output to obtain the output dimensions

In [None]:
# extract only the intent ids from the sample prediction for use in one-hot-encoding
intent_ids = [prediction[0] for prediction in sample_pred]
intent_ids.sort(key=lambda x: x[0])

In [359]:
def get_ndcg(row):
    question = row['question']
    intent_id = row[1]
    encoded_intent = list(map(lambda x: 1 if x == intent_id else 0, intent_ids))
    encoded_intent = np.asarray([encoded_intent])
    model_pred = model.predict(question)
    model_pred.sort(key=lambda x: x[0])
    model_pred = [prediction[1] for prediction in model_pred]
    model_pred = np.asarray([model_pred])
    return ndcg_score(encoded_intent, model_pred)

In [266]:
eval_data['ndcg'] = eval_data.apply(get_ndcg, axis=1)

In [170]:
mean_ndcg = eval_data['ndcg'].mean()

In [171]:
mean_ndcg

0.9630929753571458

In [369]:
def typofy(text, percentage):
    percent = percentage
    nocharrepl = len(text) * percent
    for _ in range(int(nocharrepl)):
        repl_idx = random.randint(0, len(text)-1)
        if repl_idx == 0:
            text = random.choice('abcdefghijklmnopqrstuvwxyz') + text[repl_idx + 1:]
        elif repl_idx == len(text)-1:
            text = text[:repl_idx] + random.choice('abcdefghijklmnopqrstuvwxyz')
        else:
            text = text[:repl_idx] + random.choice('abcdefghijklmnopqrstuvwxyz') + text[repl_idx + 1:]
    return text

In [441]:
def simulate_typo(row):
    question = row['question']
    return typofy(question, 0.3)

In [442]:
ndcg_means = []
for _ in range(100):
    # to repeat the experiment of a single typo percentage 100 times
    eval_data_typo = eval_data.apply(simulate_typo, axis=1)
    typo = pd.DataFrame(eval_data_typo, columns=['question'])
    typo['intentID'] = eval_data[' intentID']
    typo['ndcg'] = typo.apply(get_ndcg, axis=1)
    ndcg_means.append(typo['ndcg'].mean())

avg_ndcg_typo = sum(ndcg_means) / len(ndcg_means)
avg_ndcg_typo

0.4213688435435202

In [237]:
eval_data.drop(columns=['question_typo'], inplace=True)

In [239]:
eval_data.to_csv('./eval_data/eval_0_results.csv')