In [1]:
from haystack.document_stores import InMemoryDocumentStore
from sentence_transformers import SentenceTransformer, util
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.nodes import EmbeddingRetriever
from haystack.pipelines import ExtractiveQAPipeline
from haystack.nodes import FARMReader
from pprint import pprint
from haystack.utils import print_answers
from haystack.utils import fetch_archive_from_http
from haystack.nodes import BM25Retriever
import os
from haystack.pipelines import Pipeline

from haystack.pipelines.standard_pipelines import TextIndexingPipeline


document_store = ElasticsearchDocumentStore(
    similarity="dot_product",
    embedding_dim=768
)

doc_dir = "corpus\content\manuals_dump"

files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
indexing_pipeline = TextIndexingPipeline(document_store)
indexing_pipeline.run_batch(file_paths=files_to_index)

retriever = EmbeddingRetriever(
    document_store=document_store,
    embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
    model_format="sentence_transformers"
)


  from .autonotebook import tqdm as notebook_tqdm
OpenAI tiktoken module is not available for Python < 3.8,Linux ARM64 and AARCH64. Falling back to GPT2TokenizerFast.
Converting files: 100%|██████████████████████| 101/101 [00:15<00:00,  6.70it/s]
Preprocessing: 100%|███████████████████████| 101/101 [00:50<00:00,  2.01docs/s]


In [None]:
document_store.update_embeddings(retriever)

Updating embeddings:   0%|                       | 0/391320 [00:00<?, ? Docs/s]
Batches:   0%|                                         | 0/313 [00:00<?, ?it/s][A
Batches:   0%|                               | 1/313 [00:38<3:19:43, 38.41s/it][A
Batches:   1%|▏                              | 2/313 [01:07<2:51:07, 33.01s/it][A
Batches:   1%|▎                              | 3/313 [01:37<2:42:42, 31.49s/it][A
Batches:   1%|▍                              | 4/313 [02:08<2:42:12, 31.50s/it][A
Batches:   2%|▍                              | 5/313 [02:37<2:37:17, 30.64s/it][A
Batches:   2%|▌                              | 6/313 [03:07<2:34:26, 30.18s/it][A
Batches:   2%|▋                              | 7/313 [03:36<2:32:35, 29.92s/it][A
Batches:   3%|▊                              | 8/313 [04:05<2:31:06, 29.73s/it][A
Batches:   3%|▉                              | 9/313 [04:35<2:30:28, 29.70s/it][A
Batches:   3%|▉                             | 10/313 [05:05<2:30:49, 29.87s/it][A
Batches

In [None]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

pipe = ExtractiveQAPipeline(reader, retriever)

In [None]:
prediction = pipe.run(
    query="How do I change the radio frequencies?",
    params={
        "Retriever": {"top_k": 20},
        "Reader": {"top_k": 20}
    }
)

In [None]:
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")

pprint(prediction)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print_answers(
    prediction,
    details="minimum" ## Choose from `minimum`, `medium`, and `all`
)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print_answers(
    prediction,
    details="all" ## Choose from `minimum`, `medium`, and `all`
)