In [1]:
## eval retriever

In [2]:
# make sure these indices do not collide with existing ones, the indices will be wiped clean before data is inserted
doc_index = "squad_docs"
label_index = "squad_labels"

In [3]:
# Connect to Elasticsearch
# docker start es01-test -a
from haystack.document_stores import ElasticsearchDocumentStore

document_store = ElasticsearchDocumentStore(
    host="localhost",
    username="",
    password="",
    index=doc_index,
    label_index=label_index,
    embedding_field="emb",
    embedding_dim=768,
    excluded_meta_data=["emb"],
)

INFO - haystack.document_stores.base -  Numba not found, replacing njit() with no-op implementation. Enable it with 'pip install numba'.
INFO - haystack.modeling.model.optimization -  apex not found, won't use it. See https://nvidia.github.io/apex/
ERROR - root -  Failed to import 'magic' (from 'python-magic' and 'python-magic-bin' on Windows). FileTypeClassifier will not perform mimetype detection on extensionless files. Please make sure the necessary OS libraries are installed if you need this functionality.


## Model_1

In [4]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_1"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_1


In [6]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [7]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 60.70it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.456


### DEV SET

In [76]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [77]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_1"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_1


In [78]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [79]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 93.53it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 24 questions (70.83%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.7083333333333334
Retriever Mean Avg Precision: 0.39375


## Model_2

In [4]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_2"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=False)

INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_2


In [6]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [7]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|█████████████████████████████████████████████████████████████████████| 25/25 [00:01<00:00, 16.07it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 25 questions (68.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.68
Retriever Mean Avg Precision: 0.49933333333333335


### DEV SET

In [72]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [73]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_2"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_2


In [74]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [75]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 109.72it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.4993055555555556


## Model_3

In [8]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [9]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_3"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_3


In [10]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [11]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 25/25 [00:00<00:00, 103.41it/s]
INFO - haystack.nodes.retriever.base -  For 15 out of 25 questions (60.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.6
Retriever Mean Avg Precision: 0.456


### DEV SET

In [65]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [66]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_3"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_3


In [67]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [68]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 101.73it/s]
INFO - haystack.nodes.retriever.base -  For 15 out of 24 questions (62.50%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.625
Retriever Mean Avg Precision: 0.3541666666666667


## Model_4

In [4]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_4"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_4


In [6]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [7]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 48.73it/s]
INFO - haystack.nodes.retriever.base -  For 13 out of 25 questions (52.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.52
Retriever Mean Avg Precision: 0.36333333333333334


### DEV SET

In [61]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [62]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_4"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_4


In [63]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [64]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 105.35it/s]
INFO - haystack.nodes.retriever.base -  For 13 out of 24 questions (54.17%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.5416666666666666
Retriever Mean Avg Precision: 0.25625000000000003


## Model_5

In [4]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_5"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_5


In [6]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [7]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 68.85it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 25 questions (68.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.68
Retriever Mean Avg Precision: 0.5076666666666667


### DEV SET

In [57]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [58]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_5"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_5


In [59]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [60]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 95.77it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.5256944444444445


## Model_6

In [8]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [9]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_6"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_6


In [10]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [11]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 99.27it/s]
INFO - haystack.nodes.retriever.base -  For 12 out of 25 questions (48.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.48
Retriever Mean Avg Precision: 0.38333333333333336


### DEV SET

In [49]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [50]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_6"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_6


In [51]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [52]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 105.09it/s]
INFO - haystack.nodes.retriever.base -  For 13 out of 24 questions (54.17%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.5416666666666666
Retriever Mean Avg Precision: 0.2965277777777778


## Model_7

In [4]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_7"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_7


In [6]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [7]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 69.00it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 25 questions (68.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.68
Retriever Mean Avg Precision: 0.5076666666666667


### DEV SET

In [45]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [46]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_7"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_7


In [47]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [48]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 105.80it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.5027777777777778


## Model_8

In [4]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_8"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_8


In [6]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [7]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 49.33it/s]
INFO - haystack.nodes.retriever.base -  For 15 out of 25 questions (60.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.6
Retriever Mean Avg Precision: 0.4866666666666667


### DEV SET

In [41]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [42]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_8"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_8


In [43]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [44]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 109.92it/s]
INFO - haystack.nodes.retriever.base -  For 15 out of 24 questions (62.50%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.625
Retriever Mean Avg Precision: 0.36944444444444446


## Model_9

In [8]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [9]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_9"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_9


In [10]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [11]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 25/25 [00:00<00:00, 102.15it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 25 questions (68.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.68
Retriever Mean Avg Precision: 0.5076666666666667


### DEV SET

In [37]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [38]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_9"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_9


In [39]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [40]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 110.63it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.5256944444444445


## Model_10

In [4]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_10"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_10


In [6]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [7]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 45.77it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 25 questions (68.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.68
Retriever Mean Avg Precision: 0.5076666666666667


### DEV SET

In [33]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [34]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_10"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_10


In [35]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [36]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_2
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 82.24it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.5256944444444445


## Base model

In [6]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [7]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="IIC/dpr-spanish-question_encoder-allqa-base",
            passage_embedding_model="IIC/dpr-spanish-passage_encoder-allqa-base",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-question_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: spanish
INFO - haystack.modeling.model.language_model -  Loaded IIC/dpr-spanish-question_encoder-allqa-base
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-passage_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from lang

In [8]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/704 [00:00<?, ? Docs/s]

In [9]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 54.09it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 25 questions (68.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.68
Retriever Mean Avg Precision: 0.5076666666666667


### DEV SET

In [29]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [30]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="IIC/dpr-spanish-question_encoder-allqa-base",
            passage_embedding_model="IIC/dpr-spanish-passage_encoder-allqa-base",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-question_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: spanish
INFO - haystack.modeling.model.language_model -  Loaded IIC/dpr-spanish-question_encoder-allqa-base
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-passage_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from lang

In [31]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/640 [00:00<?, ? Docs/s]

In [32]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 110.94it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.5256944444444445


## Old Base Model

In [10]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [11]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="voidful/dpr-question_encoder-bert-base-multilingual",
            passage_embedding_model="voidful/dpr-ctx_encoder-bert-base-multilingual",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find voidful/dpr-question_encoder-bert-base-multilingual locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: multilingual
INFO - haystack.modeling.model.language_model -  Loaded voidful/dpr-question_encoder-bert-base-multilingual
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.
INFO - haystack.modeling.model.language_mode

In [12]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/704 [00:00<?, ? Docs/s]

In [13]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 91.97it/s]
INFO - haystack.nodes.retriever.base -  For 7 out of 25 questions (28.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.28
Retriever Mean Avg Precision: 0.19


### DEV SET

In [25]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [26]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="voidful/dpr-question_encoder-bert-base-multilingual",
            passage_embedding_model="voidful/dpr-ctx_encoder-bert-base-multilingual",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find voidful/dpr-question_encoder-bert-base-multilingual locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: multilingual
INFO - haystack.modeling.model.language_model -  Loaded voidful/dpr-question_encoder-bert-base-multilingual
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.
INFO - haystack.modeling.model.language_mode

In [27]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/640 [00:00<?, ? Docs/s]

In [28]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 99.29it/s]
INFO - haystack.nodes.retriever.base -  For 8 out of 24 questions (33.33%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.3333333333333333
Retriever Mean Avg Precision: 0.21319444444444444


## BM25

In [17]:
# Initialize Retriever
from haystack.nodes import ElasticsearchRetriever, BM25Retriever

retriever = BM25Retriever(document_store=document_store)

In [18]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor
)



In [16]:
## Evaluate Retriever on its own #THIS IS THE ES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 25/25 [00:00<00:00, 395.48it/s]
INFO - haystack.nodes.retriever.base -  For 14 out of 25 questions (56.00%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.56
Retriever Mean Avg Precision: 0.4793333333333333


### DEV SET

In [19]:
from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False,
)

# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor
)



In [20]:
## Evaluate Retriever on its own #THIS IS THE ES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=5, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 484.57it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 24 questions (70.83%), the answer was in the top-5 candidate passages selected by the retriever.


Retriever Recall: 0.7083333333333334
Retriever Mean Avg Precision: 0.5305555555555556
