In [1]:
from haystack.document_stores import FAISSDocumentStore

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
document_store = FAISSDocumentStore(faiss_index_factory_str="Flat")

In [3]:
from haystack.utils import clean_wiki_text, convert_files_to_docs, fetch_archive_from_http


In [4]:
doc_dir = r"C:\Users\NieFamily\Downloads\scraped"

docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)

document_store.write_documents(docs)


Writing Documents: 10000it [00:00, 215360.89it/s]        


In [20]:
new_document_store = FAISSDocumentStore.load("my_faiss")

In [15]:
from haystack.nodes import EmbeddingRetriever

retriever = EmbeddingRetriever(
    document_store=new_document_store, embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1"
)
# Important:
# Now that we initialized the Retriever, we need to call update_embeddings() to iterate over all
# previously indexed documents and update their embedding representation.
# While this can be a time consuming operation (depending on the corpus size), it only needs to be done once.
# At query time, we only need to embed the query and compare it to the existing document embeddings, which is very fast.


In [16]:
document_store.update_embeddings(retriever)


Batches: 100%|██████████| 1/1 [00:10<00:00, 10.91s/it]docs/s]
Documents Processed: 10000 docs [00:11, 904.01 docs/s]        


In [17]:
document_store.save("my_faiss")

In [21]:
from haystack.nodes import FARMReader

reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

In [22]:
from haystack.pipelines import ExtractiveQAPipeline

pipe = ExtractiveQAPipeline(reader, retriever)

In [23]:
prediction = pipe.run(
    query="What is the latest news about Russia?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
)


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.70it/s]
Inferencing Samples: 100%|██████████| 16/16 [05:36<00:00, 21.05s/ Batches]


In [11]:
from haystack.utils import print_answers


print_answers(prediction, details="minimum")


'Query: What is the latest news about Russia?'
'Answers:'
[   {   'answer': "Russian guns are again shelling people's homes",
        'context': 'ansk - the city Ukraine recaptured last September - '
                   "Russian guns are again shelling people's homes. Evacuation "
                   'orders have been issued for dozens of t'},
    {   'answer': 'air attacks',
        'context': 'rities were at the scene investigating the wreckage on '
                   'Wednesday.\n'
                   'The air attacks in Russia are the latest in a spate of '
                   'strikes deep inside its terri'},
    {   'answer': 'wars in Georgia and Ukraine',
        'context': 'cades, along with its growing confrontation with the West '
                   'and wars in Georgia and Ukraine. Andrew was part of a Wall '
                   'Street Journal reporting team sho'},
    {   'answer': 'wars in Georgia and Ukraine',
        'context': 'cades, along with its growing confrontation wi