In [5]:
from dotenv import load_dotenv
from haystack import Pipeline
from haystack.components.builders import ChatPromptBuilder
from haystack.components.converters import TextFileToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.dataclasses import ChatMessage
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.utils import Secret
from pathlib import Path

# Initialize document store
document_store = InMemoryDocumentStore()

# Load files and convert to documents
files = [f for f in Path("./example_data").iterdir() if f.is_file()]
converter = TextFileToDocument()
docs = converter.run(sources=files)['documents']

# Write documents to the store
doc_embedder = SentenceTransformersDocumentEmbedder(
    model="sentence-transformers/all-MiniLM-L6-v2")
doc_embedder.warm_up()

docs_with_embeddings = doc_embedder.run(docs)
document_store.write_documents(docs_with_embeddings["documents"])

Batches: 100%|██████████| 1/1 [00:00<00:00, 13.96it/s]


5

In [8]:
load_dotenv()
# Define the parts of the pipeline
text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
retriever = InMemoryEmbeddingRetriever(document_store)
llm = OpenAIChatGenerator(model="gpt-4o-mini", api_key=Secret.from_env_var("OPENAI_KEY"))
prompt_builder = ChatPromptBuilder(template=[
    ChatMessage.from_user("""
                          Given the following information, answer the question.

                          Context:
                          {% for doc in documents %}
                            {{ doc.content }}
                          {% endfor %}

                          Question: {{ question }}
                          Answer:
                          """)
])

# Create a pipeline
pipe = Pipeline()

pipe.add_component("text_embedder", text_embedder)
pipe.add_component("retriever", retriever)
pipe.add_component("llm", llm)
pipe.add_component("prompt_builder", prompt_builder)

pipe.connect("text_embedder.embedding", "retriever.query_embedding")
pipe.connect("retriever", "prompt_builder")
pipe.connect("prompt_builder.prompt", "llm.messages")

# Example query
question = "What kind of financial tools do we use?"
response = pipe.run({"text_embedder": {"text": question}, "prompt_builder": {"question": question}})
print(response)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches: 100%|██████████| 1/1 [00:00<00:00, 28.77it/s]


{'llm': {'replies': [ChatMessage(_role=<ChatRole.ASSISTANT: 'assistant'>, _content=[TextContent(text='Eir uses the following financial tools:\n\n1. **Visma** for payroll and bookkeeping.\n2. **Tripletex** for invoicing and expense handling.')], _name=None, _meta={'model': 'gpt-4o-mini-2024-07-18', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 34, 'prompt_tokens': 4506, 'total_tokens': 4540, 'completion_tokens_details': CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), 'prompt_tokens_details': PromptTokensDetails(audio_tokens=0, cached_tokens=4352)}})]}}
