## Import Necessary Libraries
Run this cell to import all of the necessary libraries needed for the Frequently Requested Documents Model Testing Notebook.

In [None]:
from sentence_transformers import SentenceTransformer, util
import numpy as np
import pandas as pd
import os
import pickle

from frequently_requested_docs.docs_helper import getModel, getSaveName, loadEmbeddings, getEmbeddingPath, test_sentence
from frequently_requested_docs.docs_config import TOP_K, MODEL_NAMES, DATA_CSV_PATH

## Model Selection and Initialization

Run this cell to select and initialize a model you wish to test by setting `m` equal to a number 0 to 8, in accordance with the model's position in the `model_name` list.

In [None]:
model_name = [
    'nli-mpnet-base-v2',
    'nli-roberta-base-v2',
    'princeton-nlp/sup-simcse-roberta-large',
    'princeton-nlp/unsup-simcse-roberta-large',
    'stsb-distilroberta-base-v2',
    'stsb-mpnet-base-v2',
    'stsb-roberta-base',
    'stsb-roberta-base-v2',
    'stsb-roberta-large',
]

m = 0
        

In [None]:
save_name = getSaveName(model_name[m])
    
model = getModel(model_name[m], save_name)

## Initialize and Load Corpus Embeddings
Run this cell to initialize and load the corpus embeddings from the Frequently Requested Documents dataset. 

In [None]:
# Format of corpus sentences
corpus_docs = []
data = pd.read_csv(DATA_CSV_PATH)
data.reset_index()

for ind, row in data.iterrows():
    if isinstance(row['Document'], str):
        corpus_docs.append(row)

# Load corpus embeddings if exist, otherwise encode embeddings
embedding_path = getEmbeddingPath(save_name)
corpus_embeddings = None
            
corpus_docs, corpus_embeddings = loadEmbeddings(model, embedding_path, corpus_docs)
        

## Test Sentence Selection
Run this cell to select and embed a sentence to test by setting `i` equal to a number 0 to 2, in accordance with the sentence's postion in the `examples` list.

In [None]:
examples = [
    'I am searching for the Detention Facility Reviews for the Randall County Jail in Amarillo, Texas', 
    'Statements made by former georgia senator david perdue about visas.', 
    "All documents regarding the TSA’s throughput data for August 2017"]

i = 0


In [None]:
sentence = examples[i]

test_sentence(sentence, model, corpus_docs, corpus_embeddings, TOP_K)