In [1]:
doc_index = "evaluation_docs"
label_index = "evaluation_labels"

In [2]:
filename="../data/nq/nq_dev_subset_v2.json"

In [3]:
from haystack.preprocessor.utils import eval_data_from_file
docs, labels = eval_data_from_file(filename=filename) # return: (List of Documents, List of Labels)

12/03/2020 12:43:53 - INFO - faiss -   Loading faiss with AVX2 support.
12/03/2020 12:43:53 - INFO - faiss -   Loading faiss.


In [7]:
type(docs), len(docs), type(docs[5])

(list, 50, haystack.schema.Document)

In [8]:
type(labels), len(labels), type(labels[4])

(list, 96, haystack.schema.Label)

In [4]:
from haystack.document_store.faiss import FAISSDocumentStore

In [6]:
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
document_ela=ElasticsearchDocumentStore(host="localhost", username="", password="", index="document",
                                            create_index=False, embedding_field="emb",
                                            embedding_dim=768, excluded_meta_data=["emb"])

In [7]:
document_ela.write_documents(docs, index=doc_index)

12/03/2020 12:44:03 - INFO - elasticsearch -   HEAD http://localhost:9200/evaluation_docs [status:200 request:0.011s]
12/03/2020 12:44:03 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:0.451s]


In [8]:
document_ela.write_labels(labels, index=label_index)

12/03/2020 12:44:09 - INFO - elasticsearch -   HEAD http://localhost:9200/evaluation_labels [status:200 request:0.003s]
12/03/2020 12:44:10 - INFO - elasticsearch -   POST http://localhost:9200/_bulk?refresh=wait_for [status:200 request:0.938s]


In [9]:
from haystack.retriever.sparse import ElasticsearchRetriever
retriever = ElasticsearchRetriever(document_store=document_ela)

In [10]:
## Evaluate Retriever on its own
retriever_eval_results = retriever.eval(top_k=20, label_index=label_index, doc_index=doc_index)
## Retriever Recall is the proportion of questions for which the correct document containing the answer is
## among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
## Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

12/03/2020 12:46:10 - INFO - elasticsearch -   POST http://localhost:9200/evaluation_labels/_search?scroll=5m&size=1000 [status:200 request:0.005s]
12/03/2020 12:46:10 - INFO - elasticsearch -   POST http://localhost:9200/_search/scroll [status:200 request:0.003s]
12/03/2020 12:46:10 - INFO - elasticsearch -   DELETE http://localhost:9200/_search/scroll [status:200 request:0.003s]
12/03/2020 12:46:10 - INFO - haystack.retriever.base -   Performing eval queries...
  0%|          | 0/54 [00:00<?, ?it/s]12/03/2020 12:46:10 - INFO - elasticsearch -   POST http://localhost:9200/evaluation_docs/_search [status:200 request:0.019s]
12/03/2020 12:46:10 - INFO - elasticsearch -   POST http://localhost:9200/evaluation_docs/_search [status:200 request:0.020s]
12/03/2020 12:46:10 - INFO - elasticsearch -   POST http://localhost:9200/evaluation_docs/_search [status:200 request:0.026s]
12/03/2020 12:46:10 - INFO - elasticsearch -   POST http://localhost:9200/evaluation_docs/_search [status:200 reques

In [11]:
document_store = FAISSDocumentStore()

In [12]:
document_store.write_documents(docs, index=doc_index)

In [14]:
document_store.write_labels(labels, index=label_index)

In [15]:
from haystack.retriever.dense import DensePassageRetriever
dpr = DensePassageRetriever(document_store=document_store,
                                  query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
                                  passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
                                  max_seq_len_query=64,
                                  max_seq_len_passage=256,
                                  batch_size=16,
                                  use_gpu=False,
                                  embed_title=True,
                                  use_fast_tokenizers=True)

	 We guess it's an *ENGLISH* model ... 
	 If not: Init the language model by supplying the 'language' param.
	 We guess it's an *ENGLISH* model ... 
	 If not: Init the language model by supplying the 'language' param.


In [16]:
## Evaluate Retriever on its own
dpr_eval_results = dpr.eval(top_k=20, label_index=label_index, doc_index=doc_index)

12/03/2020 12:50:31 - INFO - haystack.retriever.base -   Performing eval queries...
  0%|          | 0/54 [00:00<?, ?it/s]
Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s][A
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  5.11 Batches/s]
  2%|▏         | 1/54 [00:00<00:11,  4.54it/s]
Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s][A
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  5.94 Batches/s]
  4%|▎         | 2/54 [00:00<00:10,  4.82it/s]
Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s][A
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  5.16 Batches/s]
  6%|▌         | 3/54 [00:00<00:10,  4.82it/s]
Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s][A
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  6.42 Batches/s]
  7%|▋         | 4/54 [00:00<00:09,  5.08it/s]
Inferencing Samples:   0%|          | 0/1 [00:00<?, ? Batches/s][A
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  

In [17]:
## Retriever Recall is the proportion of questions for which the correct document containing the answer is
## among the correct documents
print("Retriever Recall:", dpr_eval_results["recall"])
## Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", dpr_eval_results["map"])

Retriever Recall: 0.0
Retriever Mean Avg Precision: 0.0


In [18]:
document_store.update_embeddings(dpr)

