In [1]:
# make sure these indices do not collide with existing ones, the indices will be wiped clean before data is inserted
doc_index = "squad_docs"
label_index = "squad_labels"

# 200 split_lenght

Estos eran los parametros por defecto encontrados en los tutoriales de haystack

In [2]:
model_name = ['model_1', 'model_2', 'model_3', 'model_4', 'model_5', 'model_6', 'model_7', 'model_8', 'model_9'
             ,'model_10', 'model_1', 'base_model', 'old_base_model', 'bm25']

test_200_map = []
test_200_recall = []
dev_200_map = []
dev_200_recall = []

In [3]:
# Connect to Elasticsearch
# docker start es01-test -a
from haystack.document_stores import ElasticsearchDocumentStore

document_store = ElasticsearchDocumentStore(
    host="localhost",
    username="",
    password="",
    index=doc_index,
    label_index=label_index,
    embedding_field="emb",
    embedding_dim=768,
    excluded_meta_data=["emb"],
)

from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    split_length=200,
    split_by="word",
    split_overlap=0,
    split_respect_sentence_boundary=False,
    clean_empty_lines=False,
    clean_whitespace=False
)

INFO - haystack.document_stores.base -  Numba not found, replacing njit() with no-op implementation. Enable it with 'pip install numba'.
INFO - haystack.modeling.model.optimization -  apex not found, won't use it. See https://nvidia.github.io/apex/
ERROR - root -  Failed to import 'magic' (from 'python-magic' and 'python-magic-bin' on Windows). FileTypeClassifier will not perform mimetype detection on extensionless files. Please make sure the necessary OS libraries are installed if you need this functionality.


## Model 1

### Test set

In [4]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [5]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_1"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_1


In [6]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [9]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 66.70it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.4063333333333333


### Dev set

In [8]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [9]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [10]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_1"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_1


In [11]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [12]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 80.99it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.3371527777777778


## Model 2

In [13]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [14]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [15]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_2"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_2


In [16]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [17]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 73.41it/s]
INFO - haystack.nodes.retriever.base -  For 20 out of 25 questions (80.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.8
Retriever Mean Avg Precision: 0.5023809523809525


### Dev set

In [18]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [19]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [20]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_2"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_2


In [21]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [22]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 67.17it/s]
INFO - haystack.nodes.retriever.base -  For 19 out of 24 questions (79.17%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.7916666666666666
Retriever Mean Avg Precision: 0.50625


## Model 3

In [23]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [24]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [25]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_3"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_3


In [26]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [27]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 61.32it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 25 questions (72.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.72
Retriever Mean Avg Precision: 0.4754920634920636


### Dev set

In [28]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [29]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [30]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_3"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_3


In [31]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [32]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 81.74it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.373015873015873


## Model 4

In [33]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [34]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [35]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_4"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_4


In [36]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [37]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 77.24it/s]
INFO - haystack.nodes.retriever.base -  For 15 out of 25 questions (60.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.6
Retriever Mean Avg Precision: 0.3734920634920635


### Dev set

In [38]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [39]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [40]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_4"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_4


In [41]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [42]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 78.57it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 24 questions (70.83%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.7083333333333334
Retriever Mean Avg Precision: 0.2785714285714286


## Model 5

In [43]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [44]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [45]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_5"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_5


In [46]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [47]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 82.83it/s]
INFO - haystack.nodes.retriever.base -  For 21 out of 25 questions (84.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.84
Retriever Mean Avg Precision: 0.5166666666666667


### Dev set

In [48]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [49]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [50]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_5"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_5


In [51]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [52]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 76.00it/s]
INFO - haystack.nodes.retriever.base -  For 20 out of 24 questions (83.33%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.8333333333333334
Retriever Mean Avg Precision: 0.5385912698412699


## Model 6

In [53]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [54]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [55]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_6"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_6


In [56]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [57]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 85.29it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.40801587301587305


### Dev set

In [58]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [59]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [60]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_6"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_6


In [61]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [62]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 79.42it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 24 questions (66.67%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.6666666666666666
Retriever Mean Avg Precision: 0.31636904761904766


## Model 7

In [63]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [64]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [65]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_7"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_7


In [66]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [67]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 82.81it/s]
INFO - haystack.nodes.retriever.base -  For 21 out of 25 questions (84.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.84
Retriever Mean Avg Precision: 0.5137619047619048


### Dev set

In [68]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [69]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [70]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_7"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_7


In [71]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [72]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 77.57it/s]
INFO - haystack.nodes.retriever.base -  For 20 out of 24 questions (83.33%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.8333333333333334
Retriever Mean Avg Precision: 0.5143518518518518


## Model 8

In [73]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [74]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [75]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_8"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_8


In [76]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [77]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 80.02it/s]
INFO - haystack.nodes.retriever.base -  For 20 out of 25 questions (80.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.8
Retriever Mean Avg Precision: 0.49866666666666676


### Dev set

In [78]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [79]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [80]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_8"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_8


In [81]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [82]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 87.67it/s]
INFO - haystack.nodes.retriever.base -  For 19 out of 24 questions (79.17%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.7916666666666666
Retriever Mean Avg Precision: 0.39523809523809533


## Model 9

In [83]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [84]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [85]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_9"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_9


In [86]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [87]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 62.31it/s]
INFO - haystack.nodes.retriever.base -  For 21 out of 25 questions (84.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.84
Retriever Mean Avg Precision: 0.5166666666666667


### Dev set

In [88]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [89]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [90]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_9"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_9


In [91]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [92]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 77.97it/s]
INFO - haystack.nodes.retriever.base -  For 20 out of 24 questions (83.33%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.8333333333333334
Retriever Mean Avg Precision: 0.5385912698412699


## Model 10

In [93]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [94]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [95]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_10"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_10


In [96]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/656 [00:00<?, ? Docs/s]

In [97]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 81.33it/s]
INFO - haystack.nodes.retriever.base -  For 21 out of 25 questions (84.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.84
Retriever Mean Avg Precision: 0.5166666666666667


### Dev set

In [98]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [99]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [100]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_10"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_10


In [101]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

In [102]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 80.65it/s]
INFO - haystack.nodes.retriever.base -  For 20 out of 24 questions (83.33%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.8333333333333334
Retriever Mean Avg Precision: 0.5385912698412699


## Base Model

In [103]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test Set

In [104]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [105]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="IIC/dpr-spanish-question_encoder-allqa-base",
            passage_embedding_model="IIC/dpr-spanish-passage_encoder-allqa-base",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-question_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: spanish
INFO - haystack.modeling.model.language_model -  Loaded IIC/dpr-spanish-question_encoder-allqa-base
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-passage_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from lang

In [106]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/704 [00:00<?, ? Docs/s]

In [107]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 73.38it/s]
INFO - haystack.nodes.retriever.base -  For 21 out of 25 questions (84.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.84
Retriever Mean Avg Precision: 0.5166666666666667


### Dev Set

In [108]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [109]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [110]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="IIC/dpr-spanish-question_encoder-allqa-base",
            passage_embedding_model="IIC/dpr-spanish-passage_encoder-allqa-base",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-question_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: spanish
INFO - haystack.modeling.model.language_model -  Loaded IIC/dpr-spanish-question_encoder-allqa-base
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-passage_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from lang

In [111]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/640 [00:00<?, ? Docs/s]

In [112]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 80.66it/s]
INFO - haystack.nodes.retriever.base -  For 20 out of 24 questions (83.33%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.8333333333333334
Retriever Mean Avg Precision: 0.5385912698412699


## Old Base Model

In [113]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test Set

In [114]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [115]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="voidful/dpr-question_encoder-bert-base-multilingual",
            passage_embedding_model="voidful/dpr-ctx_encoder-bert-base-multilingual",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find voidful/dpr-question_encoder-bert-base-multilingual locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: multilingual
INFO - haystack.modeling.model.language_model -  Loaded voidful/dpr-question_encoder-bert-base-multilingual
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.
INFO - haystack.modeling.model.language_mode

In [116]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 651 docs ...


Updating embeddings:   0%|          | 0/651 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/704 [00:00<?, ? Docs/s]

In [117]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 81.44it/s]
INFO - haystack.nodes.retriever.base -  For 10 out of 25 questions (40.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.4
Retriever Mean Avg Precision: 0.204


### Dev Set

In [118]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [119]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [120]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="voidful/dpr-question_encoder-bert-base-multilingual",
            passage_embedding_model="voidful/dpr-ctx_encoder-bert-base-multilingual",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find voidful/dpr-question_encoder-bert-base-multilingual locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: multilingual
INFO - haystack.modeling.model.language_model -  Loaded voidful/dpr-question_encoder-bert-base-multilingual
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.
INFO - haystack.modeling.model.language_mode

In [121]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 608 docs ...


Updating embeddings:   0%|          | 0/608 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/640 [00:00<?, ? Docs/s]

In [122]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 83.79it/s]
INFO - haystack.nodes.retriever.base -  For 11 out of 24 questions (45.83%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.4583333333333333
Retriever Mean Avg Precision: 0.23303571428571426


## BM25

In [123]:
# Initialize Retriever
from haystack.nodes import ElasticsearchRetriever, BM25Retriever

retriever = BM25Retriever(document_store=document_store)

In [124]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test Set

In [125]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor
)



In [126]:
## Evaluate Retriever on its own #THIS IS THE ES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 63.48it/s]
INFO - haystack.nodes.retriever.base -  For 17 out of 25 questions (68.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.68
Retriever Mean Avg Precision: 0.46944444444444433


### Dev Set

In [127]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [128]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor
)



In [129]:
## Evaluate Retriever on its own #THIS IS THE ES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_200_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_200_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 101.15it/s]
INFO - haystack.nodes.retriever.base -  For 18 out of 24 questions (75.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.75
Retriever Mean Avg Precision: 0.5365079365079366


# 400 split_lenght

Estos eran los parametros por defecto en el area de entrenamiento de haystack, tambien son los mismos parametros con que se subieron los documentos

In [130]:
model_name = ['model_1', 'model_2', 'model_3', 'model_4', 'model_5', 'model_6', 'model_7', 'model_8', 'model_9'
             ,'model_10', 'base_model', 'old_base_model', 'bm25']

test_400_map = []
test_400_recall = []
dev_400_map = []
dev_400_recall = []

In [131]:
# Connect to Elasticsearch
# docker start es01-test -a
from haystack.document_stores import ElasticsearchDocumentStore

document_store = ElasticsearchDocumentStore(
    host="localhost",
    username="",
    password="",
    index=doc_index,
    label_index=label_index,
    embedding_field="emb",
    embedding_dim=768,
    excluded_meta_data=["emb"],
)

from haystack.nodes import PreProcessor

document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

preprocessor = PreProcessor(
    clean_empty_lines=False, #Not supported
    clean_whitespace=False, #Not supported
    split_by="word",
    split_length=400,
    split_respect_sentence_boundary=False, #Not supported
    split_overlap=0,
    language="es"
)


## Model 1

### Test set

In [132]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [133]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_1"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_1


In [134]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [135]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 82.37it/s]
INFO - haystack.nodes.retriever.base -  For 13 out of 25 questions (52.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.52
Retriever Mean Avg Precision: 0.30366666666666664


### Dev set

In [136]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [137]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [138]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_1"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_1/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_1/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_1


In [139]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [140]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 88.85it/s]
INFO - haystack.nodes.retriever.base -  For 10 out of 24 questions (41.67%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.4166666666666667
Retriever Mean Avg Precision: 0.2108796296296296


## Model 2

In [141]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [142]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [143]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_2"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_2


In [144]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [145]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 72.97it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.3804285714285714


### Dev set

In [146]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [147]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [148]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_2"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_2/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_2/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_2


In [149]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [150]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 85.49it/s]
INFO - haystack.nodes.retriever.base -  For 12 out of 24 questions (50.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.5
Retriever Mean Avg Precision: 0.3483796296296296


## Model 3

In [151]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [152]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [153]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_3"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_3


In [154]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [155]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 87.63it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.36753968253968256


### Dev set

In [156]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [157]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [158]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_3"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_3/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_3/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_3


In [159]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [160]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 85.67it/s]
INFO - haystack.nodes.retriever.base -  For 12 out of 24 questions (50.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.5
Retriever Mean Avg Precision: 0.25143849206349206


## Model 4

In [161]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [162]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [163]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_4"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_4


In [164]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [165]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 89.62it/s]
INFO - haystack.nodes.retriever.base -  For 13 out of 25 questions (52.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.52
Retriever Mean Avg Precision: 0.3224444444444444


### Dev set

In [166]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [167]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [168]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_4"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_4/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_4/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_4


In [169]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [170]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 67.72it/s]
INFO - haystack.nodes.retriever.base -  For 9 out of 24 questions (37.50%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.375
Retriever Mean Avg Precision: 0.20949074074074073


## Model 5

In [171]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [172]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [173]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_5"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_5


In [174]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [175]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 87.40it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.3813809523809523


### Dev set

In [176]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [177]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [178]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_5"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_5/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_5/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_5


In [179]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [180]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 82.36it/s]
INFO - haystack.nodes.retriever.base -  For 12 out of 24 questions (50.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.5
Retriever Mean Avg Precision: 0.38958333333333334


## Model 6

In [181]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [182]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [183]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_6"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_6


In [184]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [185]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 78.71it/s]
INFO - haystack.nodes.retriever.base -  For 13 out of 25 questions (52.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.52
Retriever Mean Avg Precision: 0.3361111111111111


### Dev set

In [186]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [187]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [188]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_6"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_6/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_6/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_6


In [189]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [190]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 75.94it/s]
INFO - haystack.nodes.retriever.base -  For 9 out of 24 questions (37.50%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.375
Retriever Mean Avg Precision: 0.21006944444444442


## Model 7

In [191]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [192]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [193]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_7"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_7


In [194]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [195]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 80.49it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.3813809523809523


### Dev set

In [196]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [197]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [198]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_7"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_7/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_7/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_7


In [199]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [200]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 85.84it/s]
INFO - haystack.nodes.retriever.base -  For 12 out of 24 questions (50.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.5
Retriever Mean Avg Precision: 0.34791666666666665


## Model 8

In [201]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [202]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [203]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_8"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_8


In [204]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [205]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 72.77it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.36825396825396817


### Dev set

In [206]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [207]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [208]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_8"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_8/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_8/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_8


In [209]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [210]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 63.88it/s]
INFO - haystack.nodes.retriever.base -  For 12 out of 24 questions (50.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.5
Retriever Mean Avg Precision: 0.2659722222222222


## Model 9

In [211]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [212]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [213]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_9"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_9


In [214]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [215]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 67.32it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.3813809523809523


### Dev set

In [216]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [217]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [218]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_9"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_9/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_9/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_9


In [219]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [220]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 82.64it/s]
INFO - haystack.nodes.retriever.base -  For 12 out of 24 questions (50.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.5
Retriever Mean Avg Precision: 0.38958333333333334


## Model 10

In [221]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test set

In [222]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [223]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_10"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_10


In [224]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/336 [00:00<?, ? Docs/s]

In [225]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 80.34it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.3813809523809523


### Dev set

In [226]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [227]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [228]:
from haystack.nodes import DensePassageRetriever

save_dir = "models/model_10"

retriever = DensePassageRetriever.load(load_dir=save_dir, document_store=document_store, use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/query_encoder
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Model found locally at models/model_10/passage_encoder
INFO - haystack.modeling.model.language_model -  Loaded models/model_10/passage_encoder
INFO - haystack.nodes.retriever.dense -  DPR model loaded from models/model_10


In [229]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [230]:
## Evaluate Retriever on its own # THIS IS THE FINE-TUNED model_1
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 87.70it/s]
INFO - haystack.nodes.retriever.base -  For 12 out of 24 questions (50.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.5
Retriever Mean Avg Precision: 0.38958333333333334


## Base Model

In [231]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test Set

In [232]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [233]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="IIC/dpr-spanish-question_encoder-allqa-base",
            passage_embedding_model="IIC/dpr-spanish-passage_encoder-allqa-base",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-question_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: spanish
INFO - haystack.modeling.model.language_model -  Loaded IIC/dpr-spanish-question_encoder-allqa-base
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-passage_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from lang

In [234]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/384 [00:00<?, ? Docs/s]

In [235]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 79.13it/s]
INFO - haystack.nodes.retriever.base -  For 16 out of 25 questions (64.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.64
Retriever Mean Avg Precision: 0.3813809523809523


### Dev Set

In [236]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [237]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [238]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="IIC/dpr-spanish-question_encoder-allqa-base",
            passage_embedding_model="IIC/dpr-spanish-passage_encoder-allqa-base",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-question_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: spanish
INFO - haystack.modeling.model.language_model -  Loaded IIC/dpr-spanish-question_encoder-allqa-base
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find IIC/dpr-spanish-passage_encoder-allqa-base locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from lang

In [239]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [240]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 65.07it/s]
INFO - haystack.nodes.retriever.base -  For 12 out of 24 questions (50.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.5
Retriever Mean Avg Precision: 0.38958333333333334


## Old Base Model

In [241]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test Set

In [242]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [243]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="voidful/dpr-question_encoder-bert-base-multilingual",
            passage_embedding_model="voidful/dpr-ctx_encoder-bert-base-multilingual",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find voidful/dpr-question_encoder-bert-base-multilingual locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: multilingual
INFO - haystack.modeling.model.language_model -  Loaded voidful/dpr-question_encoder-bert-base-multilingual
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.
INFO - haystack.modeling.model.language_mode

In [244]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 326 docs ...


Updating embeddings:   0%|          | 0/326 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/384 [00:00<?, ? Docs/s]

In [245]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 25/25 [00:00<00:00, 91.43it/s]
INFO - haystack.nodes.retriever.base -  For 6 out of 25 questions (24.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.24
Retriever Mean Avg Precision: 0.12444444444444444


### Dev Set

In [246]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [247]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor,
)



In [248]:
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
            document_store=document_store,
            query_embedding_model="voidful/dpr-question_encoder-bert-base-multilingual",
            passage_embedding_model="voidful/dpr-ctx_encoder-bert-base-multilingual",
            use_gpu=True,
            batch_size = 64
            )

INFO - haystack.modeling.utils -  Using devices: CUDA:0
INFO - haystack.modeling.utils -  Number of GPUs: 1
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find voidful/dpr-question_encoder-bert-base-multilingual locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Automatically detected language from language model name: multilingual
INFO - haystack.modeling.model.language_model -  Loaded voidful/dpr-question_encoder-bert-base-multilingual
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.
INFO - haystack.modeling.model.language_mode

In [249]:
document_store.update_embeddings(retriever, index=doc_index)

INFO - haystack.document_stores.elasticsearch -  Updating embeddings for all 305 docs ...


Updating embeddings:   0%|          | 0/305 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/320 [00:00<?, ? Docs/s]

In [250]:
## Evaluate Retriever on its own #THIS IS THE BARE BONES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|███████████████████████████████████████████| 24/24 [00:00<00:00, 85.77it/s]
INFO - haystack.nodes.retriever.base -  For 7 out of 24 questions (29.17%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.2916666666666667
Retriever Mean Avg Precision: 0.14484126984126983


## BM25

In [251]:
# Initialize Retriever
from haystack.nodes import ElasticsearchRetriever, BM25Retriever

retriever = BM25Retriever(document_store=document_store)

In [252]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

### Test Set

In [253]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/test.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor
)



In [254]:
## Evaluate Retriever on its own #THIS IS THE ES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
test_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
test_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 25/25 [00:00<00:00, 100.00it/s]
INFO - haystack.nodes.retriever.base -  For 19 out of 25 questions (76.00%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.76
Retriever Mean Avg Precision: 0.4868253968253968


### Dev Set

In [255]:
document_store.delete_documents(index=doc_index)
document_store.delete_documents(index=label_index)

In [256]:
# The add_eval_data() method converts the given dataset in json format into Haystack document and label objects. Those objects are then indexed in their respective document and label index in the document store. The method can be used with any dataset in SQuAD format.
document_store.add_eval_data(
    filename="squad_format_thesis/dev.json",
    doc_index=doc_index,
    label_index=label_index,
    preprocessor=preprocessor
)



In [257]:
## Evaluate Retriever on its own #THIS IS THE ES
# Note that no_answer samples are omitted when evaluation is performed with this method
retriever_eval_results = retriever.eval(top_k=10, label_index=label_index, doc_index=doc_index, open_domain=True)
# Retriever Recall is the proportion of questions for which the correct document containing the answer is
# among the correct documents
print("Retriever Recall:", retriever_eval_results["recall"])
dev_400_recall.append(retriever_eval_results["recall"])
# Retriever Mean Avg Precision rewards retrievers that give relevant documents a higher rank
print("Retriever Mean Avg Precision:", retriever_eval_results["map"])
dev_400_map.append(retriever_eval_results["map"])

INFO - haystack.nodes.retriever.base -  Performing eval queries...
100%|██████████████████████████████████████████| 24/24 [00:00<00:00, 109.77it/s]
INFO - haystack.nodes.retriever.base -  For 19 out of 24 questions (79.17%), the answer was in the top-10 candidate passages selected by the retriever.


Retriever Recall: 0.7916666666666666
Retriever Mean Avg Precision: 0.5680555555555555


# Build the data

In [258]:
print(len(model_name))
print("200")
print("TEST_RECALL", len(test_200_recall), "\n", [round(x,3) for x in test_200_recall])
print("TEST_MAP", len(test_200_map), "\n", [round(x,3) for x in test_200_map])
print("DEV_RECALL", len(dev_200_recall), "\n",  [round(x,3) for x in dev_200_recall])
print("DEV_MAP", len(dev_200_map),"\n",  [round(x,3) for x in dev_200_map])
print("400")
print("TEST_RECALL\n", len(test_400_recall) , "\n", [round(x,3) for x in test_400_recall])
print("TEST_MAP\n", len(test_400_map), "\n", [round(x,3) for x in test_400_map])
print("DEV_RECALL\n", len(dev_400_recall), "\n", [round(x,3) for x in dev_400_recall])
print("DEV_MAP", len(dev_400_map), "\n", [round(x,3) for x in dev_400_map])

r_test_200_recall = [round(x,3) for x in test_200_recall]
r_test_200_map = [round(x,3) for x in test_200_map]
r_dev_200_recall = [round(x,3) for x in dev_200_recall]
r_dev_200_map = [round(x,3) for x in dev_200_map]

r_test_400_recall = [round(x,3) for x in test_400_recall]
r_test_400_map = [round(x,3) for x in test_400_map]
r_dev_400_recall = [round(x,3) for x in dev_400_recall]
r_dev_400_map = [round(x,3) for x in dev_400_map]

13
200
TEST_RECALL 13 
 [0.64, 0.8, 0.72, 0.6, 0.84, 0.64, 0.84, 0.8, 0.84, 0.84, 0.84, 0.4, 0.68]
TEST_MAP 13 
 [0.406, 0.502, 0.475, 0.373, 0.517, 0.408, 0.514, 0.499, 0.517, 0.517, 0.517, 0.204, 0.469]
DEV_RECALL 13 
 [0.75, 0.792, 0.75, 0.708, 0.833, 0.667, 0.833, 0.792, 0.833, 0.833, 0.833, 0.458, 0.75]
DEV_MAP 13 
 [0.337, 0.506, 0.373, 0.279, 0.539, 0.316, 0.514, 0.395, 0.539, 0.539, 0.539, 0.233, 0.537]
400
TEST_RECALL
 13 
 [0.52, 0.64, 0.64, 0.52, 0.64, 0.52, 0.64, 0.64, 0.64, 0.64, 0.64, 0.24, 0.76]
TEST_MAP
 13 
 [0.304, 0.38, 0.368, 0.322, 0.381, 0.336, 0.381, 0.368, 0.381, 0.381, 0.381, 0.124, 0.487]
DEV_RECALL
 13 
 [0.417, 0.5, 0.5, 0.375, 0.5, 0.375, 0.5, 0.5, 0.5, 0.5, 0.5, 0.292, 0.792]
DEV_MAP 13 
 [0.211, 0.348, 0.251, 0.209, 0.39, 0.21, 0.348, 0.266, 0.39, 0.39, 0.39, 0.145, 0.568]


In [259]:
data_eval = {"model_name" : model_name, "test_200_recall": r_test_200_recall, "test_200_map": r_test_200_map,
            "dev_200_recall": r_dev_200_recall, "dev_200_map": r_dev_200_map, "test_400_recall": r_test_400_recall,
            "test_400_map": r_test_400_map, "dev_400_recall": r_dev_400_recall, "dev_400_map": r_dev_400_map}
data_eval

{'model_name': ['model_1',
  'model_2',
  'model_3',
  'model_4',
  'model_5',
  'model_6',
  'model_7',
  'model_8',
  'model_9',
  'model_10',
  'base_model',
  'old_base_model',
  'bm25'],
 'test_200_recall': [0.64,
  0.8,
  0.72,
  0.6,
  0.84,
  0.64,
  0.84,
  0.8,
  0.84,
  0.84,
  0.84,
  0.4,
  0.68],
 'test_200_map': [0.406,
  0.502,
  0.475,
  0.373,
  0.517,
  0.408,
  0.514,
  0.499,
  0.517,
  0.517,
  0.517,
  0.204,
  0.469],
 'dev_200_recall': [0.75,
  0.792,
  0.75,
  0.708,
  0.833,
  0.667,
  0.833,
  0.792,
  0.833,
  0.833,
  0.833,
  0.458,
  0.75],
 'dev_200_map': [0.337,
  0.506,
  0.373,
  0.279,
  0.539,
  0.316,
  0.514,
  0.395,
  0.539,
  0.539,
  0.539,
  0.233,
  0.537],
 'test_400_recall': [0.52,
  0.64,
  0.64,
  0.52,
  0.64,
  0.52,
  0.64,
  0.64,
  0.64,
  0.64,
  0.64,
  0.24,
  0.76],
 'test_400_map': [0.304,
  0.38,
  0.368,
  0.322,
  0.381,
  0.336,
  0.381,
  0.368,
  0.381,
  0.381,
  0.381,
  0.124,
  0.487],
 'dev_400_recall': [0.417,
  0.

In [261]:
import pandas as pd

df_data_eval = pd.DataFrame(data_eval)
df_data_eval

Unnamed: 0,model_name,test_200_recall,test_200_map,dev_200_recall,dev_200_map,test_400_recall,test_400_map,dev_400_recall,dev_400_map
0,model_1,0.64,0.406,0.75,0.337,0.52,0.304,0.417,0.211
1,model_2,0.8,0.502,0.792,0.506,0.64,0.38,0.5,0.348
2,model_3,0.72,0.475,0.75,0.373,0.64,0.368,0.5,0.251
3,model_4,0.6,0.373,0.708,0.279,0.52,0.322,0.375,0.209
4,model_5,0.84,0.517,0.833,0.539,0.64,0.381,0.5,0.39
5,model_6,0.64,0.408,0.667,0.316,0.52,0.336,0.375,0.21
6,model_7,0.84,0.514,0.833,0.514,0.64,0.381,0.5,0.348
7,model_8,0.8,0.499,0.792,0.395,0.64,0.368,0.5,0.266
8,model_9,0.84,0.517,0.833,0.539,0.64,0.381,0.5,0.39
9,model_10,0.84,0.517,0.833,0.539,0.64,0.381,0.5,0.39


In [262]:
df_data_eval.to_csv("data/df_data_eval.csv", index=False)