In [1]:
import os
import numpy as np
import faiss
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq 
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


import os
from dotenv import load_dotenv

load_dotenv()
groq_api_key = os.getenv("groq_api")



# --- 2. Initialize Models and Components ---

# LLM for generating hypothetical documents (e.g., OpenAI's gpt-3.5-turbo)
# You can use other LLMs like Groq's ChatGroq if you prefer, with appropriate imports
hyde_llm = ChatGroq(
        model_name='gemma2-9b-it',
        temperature=0, # Keep temperature at 0 for more factual/less creative answers
        groq_api_key=groq_api_key
    )

# Embedding model for both hypothetical documents and real documents
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
print("Embedding model (all-MiniLM-L6-v2) initialized.")




  embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


W0616 12:29:43.116000 15012 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


Embedding model (all-MiniLM-L6-v2) initialized.


In [2]:
# --- 3. Create a Dummy Vector Store (If you don't have one saved) ---
# In a real scenario, your 'docs' would come from loaded data and your 'vectorstore'
# would be built from them. For this example, we'll create a small one on the fly.

# Sample content for our "knowledge base"
raw_documents = [
    "Reinforcement Learning from Human Feedback (RLHF) is a technique that fine-tunes language models.",
    "It uses human preferences to train a reward model, which then guides the LLM.",
    "The primary goal of RLHF is to align AI behavior with human values, making models more helpful and harmless.",
    "Without RLHF, LLMs might generate undesirable outputs like toxic or biased content.",
    "HyDE stands for Hypothetical Document Embedding, and it improves retrieval by generating a sample document.",
    "Step-Back Prompting enhances LLM reasoning by making the model derive high-level concepts first.",
    "A Reflection Agent allows an LLM to self-critique and refine its own answers iteratively.",
    "FAISS is a library for efficient similarity search and clustering of dense vectors.",
    "LangChain provides frameworks for building applications with LLMs, including RAG and agents.",
    "Groq offers very fast inference for LLMs like Llama3."
]

# Create embeddings for our dummy documents and build a FAISS index in memory
print("\nCreating a dummy FAISS vector store with sample documents...")
# Ensure docs are processed into LangChain Document objects if not already
from langchain_core.documents import Document
documents_for_faiss = [Document(page_content=d) for d in raw_documents]
vectorstore = FAISS.from_documents(documents_for_faiss, embedding_model)
docs = [doc.page_content for doc in documents_for_faiss] # Original texts list
print(f"Dummy FAISS vector store created with {len(docs)} documents.")


Creating a dummy FAISS vector store with sample documents...
Dummy FAISS vector store created with 10 documents.


In [3]:
# --- 4. Define the HyDE Prompt Template ---
# This prompt instructs the LLM to generate a hypothetical document.
hyde_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "Please write a short, relevant, and well-written hypothetical document "
     "that could answer the user's question. This document should be detailed "
     "and sound like a real piece of text from a knowledge base.\n\n"
     "Hypothetical Document:"), # We want the LLM to fill this
    ("user", "{question}")
])
print("HyDE Prompt Template defined.")


HyDE Prompt Template defined.


In [4]:
# --- 5. Create the HyDE Chain ---
# This chain takes a question, generates a hypothetical document, and outputs it as a string.
hyde_chain = hyde_prompt | hyde_llm | StrOutputParser()
print("HyDE Chain (LLM for generation) defined.")


HyDE Chain (LLM for generation) defined.


In [5]:
# --- 6. The HyDE Retrieval Function ---
def hyde_retrieve(query: str, k: int = 3) -> list[str]:
    """
    Performs retrieval using the Hypothetical Document Embedding (HyDE) technique.

    Args:
        query (str): The user's original short query.
        k (int): The number of top relevant real documents to retrieve.

    Returns:
        list[str]: A list of the actual retrieved document texts.
    """
    print(f"\n--- HyDE Retrieval for query: '{query}' ---")
    
    # Step A: Generate the hypothetical document
    print("Generating hypothetical document...")
    hypothetical_document = hyde_chain.invoke({"question": query})
    print(f"Hypothetical Document Generated:\n---\n{hypothetical_document[:200]}...\n---")

    # Step B: Embed the hypothetical document
    print("Embedding hypothetical document...")
    # embed_documents expects a list, even if it's just one document
    hypothetical_embedding = embedding_model.embed_documents([hypothetical_document])[0] # [0] to get the single vector
    hypothetical_embedding_np = np.array(hypothetical_embedding).astype('float32').reshape(1, -1)
    
    # Optional: Normalize the embedding if your FAISS index expects it
    # faiss.normalize_L2(hypothetical_embedding_np)
    
    # Step C: Use the hypothetical embedding to search the real vector store
    print(f"Searching FAISS with hypothetical embedding for top {k} documents...")
    distances, indices = vectorstore.index.search(hypothetical_embedding_np, k)
    
    retrieved_docs_content = []
    # vectorstore.index_to_docstore_id and vectorstore.docstore._dict are internal
    # properties of LangChain's FAISS class for accessing original documents.
    for i in indices[0]: # indices[0] contains the actual indices
        if i >= 0 and i < len(docs): # Ensure index is valid
            retrieved_docs_content.append(docs[i])
    
    return retrieved_docs_content

In [7]:
# --- 7. Example Usage ---
user_query = "What is RLHF and why is it used?"
top_n_retrieved = 4 # How many real documents we want after HyDE retrieval

retrieved_contexts = hyde_retrieve(user_query, k=top_n_retrieved)

print(f"\n--- Top {top_n_retrieved} Retrieved Documents using HyDE ---")
if retrieved_contexts:
    for i, doc_content in enumerate(retrieved_contexts):
        cleaned_doc_content = doc_content.replace('\n', ' ').strip()
        print(f"Document {i+1}: {cleaned_doc_content[:200]}...") # Print first 200 chars for brevity
else:
    print("No documents retrieved.")


--- HyDE Retrieval for query: 'What is RLHF and why is it used?' ---
Generating hypothetical document...
Hypothetical Document Generated:
---
## RLHF: Reinforcement Learning from Human Feedback

**Definition:**

Reinforcement Learning from Human Feedback (RLHF) is a technique used to train artificial intelligence (AI) models, particularly l...
---
Embedding hypothetical document...
Searching FAISS with hypothetical embedding for top 4 documents...

--- Top 4 Retrieved Documents using HyDE ---
Document 1: Reinforcement Learning from Human Feedback (RLHF) is a technique that fine-tunes language models....
Document 2: It uses human preferences to train a reward model, which then guides the LLM....
Document 3: The primary goal of RLHF is to align AI behavior with human values, making models more helpful and harmless....
Document 4: Step-Back Prompting enhances LLM reasoning by making the model derive high-level concepts first....
