In [1]:
## eval retriever

In [2]:
# make sure these indices do not collide with existing ones, the indices will be wiped clean before data is inserted
doc_index = "squad_docs"
label_index = "squad_labels"

In [3]:
# Connect to Elasticsearch
from haystack.document_stores import ElasticsearchDocumentStore

document_store = ElasticsearchDocumentStore(
    host="localhost",
    username="",
    password="",
    index=doc_index,
    label_index=label_index,
    embedding_field="emb",
    embedding_dim=768,
    excluded_meta_data=["emb"],
)

INFO - haystack.modeling.model.optimization -  apex not found, won't use it. See https://nvidia.github.io/apex/
ERROR - root -  Failed to import 'magic' (from 'python-magic' and 'python-magic-bin' on Windows). FileTypeClassifier will not perform mimetype detection on extensionless files. Please make sure the necessary OS libraries are installed if you need this functionality.


In [4]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_big"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=False)

INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_big/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_big/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_big/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_big/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_big


In [6]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [7]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:01<00:00, 14.76it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.4013333333333333


In [3]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="IIC/dpr-spanish-question_encoder-allqa-base",
            passage_embedding_model="IIC/dpr-spanish-passage_encoder-allqa-base",
            use_gpu=False,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-question_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: spanish
INFO - haystack.modeling.model.language_model -  Loaded IIC/dpr-spanish-question_encoder-allqa-base
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-passage_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from languag

In [4]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/640 [00:00<?, ? Docs/s]

In [6]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:01<00:00, 14.33it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.5256944444444445


In [8]:
# Initialize Retriever
from haystack.nodes import ElasticsearchRetriever, BM25Retriever

retriever = BM25Retriever(document_store=document_store)

In [9]:
## Evaluate Retriever on its own #THIS IS THE ES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 25/25 [00:00<00:00, 125.72it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 25 questions (72.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.72
Retriever Mean Avg Precision: 0.516
