# Loading Doc

In [1]:
from haystack.document_stores.in_memory import InMemoryDocumentStore
document_store = InMemoryDocumentStore()

In [2]:
from datasets import load_dataset
from haystack import Document

In [3]:
dataset=load_dataset("bilgeyucel/seven-wonders", split="train")

In [4]:
dataset

Dataset({
    features: ['id', 'content', 'content_type', 'meta', 'id_hash_keys', 'score', 'embedding'],
    num_rows: 151
})

Converting dataset's data into haystack document

In [5]:
docs = [Document(content=doc['content'],meta = doc['meta'])for doc in dataset]

In [6]:
#docs

In [7]:
#from haystack.components.embedders import SentenceTransformersDocumentEmbedder
#text_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")


## Document Embedding

In [8]:
from haystack.components.embedders import SentenceTransformersDocumentEmbedder,SentenceTransformersTextEmbedder
doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")

In [9]:
doc_embedder.warm_up()

In [10]:
docs_with_embeddings = doc_embedder.run(docs)

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

In [11]:
print(docs_with_embeddings.keys())


dict_keys(['documents'])


In [12]:
document_store.write_documents(docs_with_embeddings['documents'])


151

# retriever


In [13]:
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
retriever = InMemoryEmbeddingRetriever(document_store)

In [14]:
from haystack.components.builders import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
template  =[ChatMessage.from_user( """You are a helpful assistant. 
                                  Answer the question based on the  
                                  context provided.
                                  {% for document in documents %}
                                    {{document.content}}
                                    {% endfor %}
                                    Question: {{question}}
                                    Answer:""")]

In [15]:
import os
from dotenv import load_dotenv

# Create a .env file with your API key
# GOOGLE_API_KEY=your_api_key_here

# Load environment variables from .env file
load_dotenv()

# Set API key from environment variable
os.environ["Gemini_API_KEY"] = os.getenv("GOOGLE_API_KEY") or ""

In [16]:
prompt_builder = ChatPromptBuilder(template = template, required_variables={"question","documents"})

In [17]:
from haystack_integrations.components.generators.google_genai import GoogleGenAIChatGenerator

In [18]:
import os
#from getpass import getpass
from haystack_integrations.components.generators.google_genai import GoogleGenAIChatGenerator
#if "OPENAI_API_KEY" not in os.environ:
#    os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key:")
llm = GoogleGenAIChatGenerator(model="gemini-1.5-flash")


Both GOOGLE_API_KEY and GEMINI_API_KEY are set. Using GOOGLE_API_KEY.


In [19]:
if 'text_embedder' not in locals():
    text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
    text_embedder.warm_up()
    print("✅ text_embedder created")
else:
    print("✅ text_embedder already exists")

# 2. retriever (MISSING - needs to be recreated)
if 'document_store' not in locals():
    print("❌ document_store not found! You need to run the document setup first.")
    print("Please run the document embedding setup code first.")
else:
    retriever = InMemoryEmbeddingRetriever(document_store)
    print("✅ retriever created")


✅ text_embedder created
✅ retriever created


In [20]:
question = "who wrote De septem mundi miraculis ?"

In [21]:
from haystack import Pipeline
rag_pipeline = Pipeline()
# ✅ CHANGE 2: Add text_embedder component to pipeline
rag_pipeline.add_component("text_embedder", text_embedder)  # NEW
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("llm", llm)
# ✅ CHANGE 3: Connect components properly with query embedding
rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")  # NEW CONNECTION
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x000001ACD8BCFE90>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: ChatPromptBuilder
  - llm: GoogleGenAIChatGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (list[float])
  - retriever.documents -> prompt_builder.documents (list[Document])
  - prompt_builder.prompt -> llm.messages (list[ChatMessage])

In [22]:
RAG = rag_pipeline.run({
    "text_embedder": {"text": question},  # ✅ NEW: Provides query embedding
    "prompt_builder": {"question": question}
})

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [23]:
# Execute the RAG pipeline if not already done
print(RAG["llm"]["replies"])

[ChatMessage(_role=<ChatRole.ASSISTANT: 'assistant'>, _content=[TextContent(text='Philo of Byzantium wrote *De septem mundi miraculis*.\n')], _name=None, _meta={'model': 'gemini-1.5-flash', 'finish_reason': 'stop', 'usage': {'prompt_tokens': 2335, 'completion_tokens': 15, 'total_tokens': 2350}})]


In [24]:
# Execute the RAG pipeline if not already done
print(RAG["llm"]["replies"][0].text)

Philo of Byzantium wrote *De septem mundi miraculis*.

