In [1]:
import os

from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.converters import TextFileToDocument
from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack.components.preprocessors import DocumentCleaner
from haystack.components.retrievers.in_memory.embedding_retriever import InMemoryEmbeddingRetriever, InMemoryDocumentStore
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever


from milvus_haystack import MilvusDocumentStore
from milvus_haystack.milvus_embedding_retriever import MilvusEmbeddingRetriever


document_store = MilvusDocumentStore(
    connection_args={
        "host": "localhost",
        "port": "19530",
        "user": "",
        "password": "",
        "secure": False,
    },
    drop_old=True,
)
text_file_converter = TextFileToDocument()
cleaner = DocumentCleaner()
splitter = DocumentSplitter(split_length = 1000, split_overlap=10)
writer = DocumentWriter(document_store)

doc_dir = "/home/sann_htet/Desktop/milvus-tutorial/dataset"
files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]


indexing_pipeline = Pipeline()
indexing_pipeline.add_component("converter", text_file_converter)
indexing_pipeline.add_component("cleaner", cleaner)
indexing_pipeline.add_component("splitter", splitter)

indexing_pipeline.connect("converter.documents", "cleaner.documents")
indexing_pipeline.connect("cleaner.documents", "splitter.documents")
docs = indexing_pipeline.run(data={"sources": files_to_index[:40]})

doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L12-v2")
doc_embedder.warm_up()

docs_with_embeddings = doc_embedder.run(docs['splitter']["documents"])
document_store.write_documents(docs_with_embeddings["documents"])

retriever = MilvusEmbeddingRetriever(document_store, top_k=1)

  from .autonotebook import tqdm as notebook_tqdm
Batches: 100%|██████████| 5/5 [00:11<00:00,  2.27s/it]


In [2]:
query = """
The Night King is shown leading his army south. Through ravens' eyes, Bran locates the Night King's army beyond the Wall.
"""
# query = """
# Arya escapes King's Landing with Yoren and his party of recruits; and on the road, she clashes with the other Night's Watch child recruits Lommy, Gendry, and Hot Pie but eventually befriends them. On the way, the party is attacked by Amory Lorch when Yoren refuses to yield Gendry, who is actually a bastard son of the late King Robert, to the Lannisters.  The Night's Watch convoy is overrun and massacred, but Arya and the other children escape through a tunnel.  Before escaping, she rescues three prisoners locked in a wagon cage, among them a mysterious man named Jaqen H'ghar.
# """
text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L12-v2", progress_bar=False)
text_embedder.warm_up()
query_embedding = text_embedder.run(query)["embedding"]

result = retriever.run(query_embedding=query_embedding)

In [4]:
result['documents'][0].content

'not include the Night King\'s first appearance onscreen as it does in the series, it would have been an extensive montage of scenes from the past, present and possible future in the series. Flashbacks would have included scenes of Ned cleaning Ice beneath a weirwood tree from the show\'s original pilot, Bran\'s uncle Benjen and Lyanna Stark as children (later shown in a more extensive flashback in season 6), King Aerys watching and laughing as Ned\'s father and brother are burned and Jaime and Cersei embracing in the old keep at Winterfell just before Bran discovered them. Scenes reflecting the show\'s present included Jon with Ghost, a bloodied Robb surrounded by the Red Wedding dead, and Arya holding her sword Needle as her face blurs and changes. Possible future images include a dragon\'s shadow passing over King\'s Landing, "hints of strange small children with very dark eyes" and a group of four distinctive northern hills behind a very large weirwood. The episode would also have 