# Query Enhancement -  Hypothetical Document Embedding (HyDE)

In [1]:
from langchain_community.document_loaders import WikipediaLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_classic.chat_models import init_chat_model
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
loader = WikipediaLoader(query="Steve Jobs", load_max_docs=5)
documents = loader.load()

splitter = RecursiveCharacterTextSplitter(
    chunk_size= 300,
    chunk_overlap = 100
    )

chunks = splitter.split_documents(documents)
chunks

[Document(metadata={'title': 'Steve Jobs', 'summary': 'Steven Paul Jobs (February 24, 1955 – October 5, 2011) was an American businessman, inventor, and investor best known for co-founding the technology company Apple Inc. Jobs was also the founder of NeXT and chairman and majority shareholder of Pixar. He was a pioneer of the personal computer revolution of the 1970s and 1980s, along with his early business partner and fellow Apple co-founder Steve Wozniak.\nJobs was born in San Francisco in 1955 and adopted shortly afterwards. He attended Reed College in 1972 before withdrawing that same year. In 1974, he traveled through India, seeking enlightenment before later studying Zen Buddhism. He and Wozniak co-founded Apple in 1976 to further develop and sell Wozniak\'s Apple I personal computer. Together, the duo gained fame and wealth a year later with production and sale of the Apple II, one of the first highly successful mass-produced microcomputers. \nJobs saw the commercial potential 

In [3]:
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
persist_directory = './chroma_db'

## Initialize ChromaDB with HuggingFace Embeddings
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=persist_directory,
    collection_name="rag_collection"
)


In [4]:

llm = init_chat_model("groq:openai/gpt-oss-20b")

In [5]:
base_retriever=vectorstore.as_retriever(search_kwargs={"k":5})

In [6]:
from langchain_classic.prompts.chat import SystemMessagePromptTemplate, ChatPromptTemplate

def get_hyde_doc(query):
    template = """Imagine you are an expert writing a detailed explanation on the topic: '{query}'
    Create an hypothetical answer for the topic"""
    system_message_prompt = SystemMessagePromptTemplate.from_template(template = template)
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])
    messages = chat_prompt.format_prompt(query= query).to_messages()
    print(messages)
    response = llm.invoke(messages)
    hypo_doc = response.content
    return hypo_doc

In [7]:
query= "Why was Steve Jobs fired from Apple?"
print(get_hyde_doc(query))

[SystemMessage(content="Imagine you are an expert writing a detailed explanation on the topic: 'Why was Steve Jobs fired from Apple?'\n    Create an hypothetical answer for the topic", additional_kwargs={}, response_metadata={})]
**Why Steve Jobs Was Forced Out of Apple in 1985 – A Detailed, Historical Overview**

Steve Jobs’ departure from Apple in 1985 is one of the most dramatic moments in corporate history. While it is often simplified to “Jobs was fired,” the reality was a complex power struggle that involved personality clashes, divergent visions for the company, and a series of business missteps. Below is a step‑by‑step breakdown of the factors that led to Jobs’ exit, the key players involved, and the aftermath that shaped Apple’s future.

---

## 1. The Early Years: Jobs, Wozniak, and the Birth of Apple

| Year | Key Event |
|------|-----------|
| 1976 | Jobs and Wozniak launch the Apple I, followed by the Apple II, which becomes a massive commercial success. |
| 1977 | Apple g

In [8]:
matched_doc = base_retriever.invoke(get_hyde_doc(query))
print(matched_doc)

[SystemMessage(content="Imagine you are an expert writing a detailed explanation on the topic: 'Why was Steve Jobs fired from Apple?'\n    Create an hypothetical answer for the topic", additional_kwargs={}, response_metadata={})]
[Document(metadata={'title': 'Steve Jobs', 'source': 'https://en.wikipedia.org/wiki/Steve_Jobs', 'summary': 'Steven Paul Jobs (February 24, 1955 – October 5, 2011) was an American businessman, inventor, and investor best known for co-founding the technology company Apple Inc. Jobs was also the founder of NeXT and chairman and majority shareholder of Pixar. He was a pioneer of the personal computer revolution of the 1970s and 1980s, along with his early business partner and fellow Apple co-founder Steve Wozniak.\nJobs was born in San Francisco in 1955 and adopted shortly afterwards. He attended Reed College in 1972 before withdrawing that same year. In 1974, he traveled through India, seeking enlightenment before later studying Zen Buddhism. He and Wozniak co-f

## Langchain- HypotheticalDocumentEmbedder

Many different types of prompt_key s are there which have their own specific use case:

-    "web_search": web_search,
-   "sci_fact": sci_fact,
-    "arguana": arguana,
-    "trec_covid": trec_covid,
-    "fiqa": fiqa,
-    "dbpedia_entity": dbpedia_entity,
-    "trec_news": trec_news,
-    "mr_tydi": mr_tydi,


In [9]:
from langchain_classic.chains.hyde.base import HypotheticalDocumentEmbedder

hyde_embedding_function = HypotheticalDocumentEmbedder.from_llm(
    llm=llm,
    base_embeddings= embeddings,
    prompt_key="web_search"
)

In [10]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("langchain_crewai.txt")
documents = loader.load()

splitter = RecursiveCharacterTextSplitter(
    chunk_size= 500,
    chunk_overlap = 50,
)

chunks = splitter.split_documents(documents)

In [11]:
vectorstore = Chroma.from_documents(
    documents= chunks,
    embedding= hyde_embedding_function,
    persist_directory= './langchain_crewai_hyde_chain',
    collection_name= "hyde_collection"
)

In [17]:
from langchain_classic.chains.combine_documents import create_stuff_documents_chain

rag_prompt = PromptTemplate.from_template("""
                                          Use the context below to answer the question. Do not add information ooutside the context to your answers. If you do not know the answer based on the context just say 'I do not know'.

                                          Context:
                                          {context}

                                          Question: {input}
                                          """)

rag_chain = create_stuff_documents_chain(llm=llm, prompt= rag_prompt)

In [18]:
def hyde_rag_pipeline(query):
    matched_docs = vectorstore.similarity_search(query, k=4)
    print(matched_docs)
    response = rag_chain.invoke({
        "input": query,
        "context": matched_docs
    })
    return response

In [19]:
query = "What memory nmodules does Langchain provide?"
answer = hyde_rag_pipeline(query)
print("Final Answer:\n", answer)

[Document(metadata={'source': 'langchain_crewai.txt'}, page_content='### Key Features of LangChain\n1. **Prompt Templates**: LangChain allows developers to define reusable templates for prompts, making it easier to standardize interactions with LLMs.\n2. **Chains**: Chains are sequences of calls to LLMs or other utilities. They can be simple or complex, depending on the use case.\n3. **Memory**: LangChain supports memory, enabling applications to maintain context across multiple interactions.'), Document(metadata={'source': 'langchain_crewai.txt'}, page_content='LangChain is a framework designed to help developers build applications that leverage the power of large language models (LLMs). It provides tools and abstractions to make it easier to integrate LLMs into various workflows, such as question answering, summarization, and more. LangChain is modular, allowing developers to use only the components they need or combine them to create complex pipelines.'), Document(metadata={'source'

### Custom Prompt

In [20]:
from langchain_classic.chains.hyde.base import HypotheticalDocumentEmbedder
from langchain_classic.prompts import PromptTemplate
custom = PromptTemplate.from_template(
    "Generate a concise hypothetical answer for this topic: {query}"
)

hyde_embedding_function = HypotheticalDocumentEmbedder.from_llm(
    llm=llm,
    base_embeddings= embeddings,
    custom_prompt= custom
)