In [23]:
!pip -q install langchain huggingface_hub openai chromadb tiktoken faiss-cpu
!pip install sentence_transformers
!pip -q install -U FlagEmbedding



In [24]:
!mkdir -p blog_posts
!unzip -q langchain_blog_posts.zip -d blog_posts

A subdirectory or file -p already exists.
Error occurred while processing: -p.
A subdirectory or file blog_posts already exists.
Error occurred while processing: blog_posts.
'unzip' is not recognized as an internal or external command,
operable program or batch file.


In [25]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN'] = "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"

# Hypothetical Document Embeddings (HyDE)

HyDE creates a "Hypothetical" answer with the LLM and then embeds that for search

HyDE = Base Embedding model+ LLM Chain (with prompts)

In [26]:
from langchain.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain, HypotheticalDocumentEmbedder
from langchain.prompts import PromptTemplate

from langchain.document_loaders import TextLoader
import langchain

## BGE Embeddings

In [27]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True}  # set True to compute cosine similarity

bge_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cpu'},
    encode_kwargs=encode_kwargs
)



In [28]:
# Set up the LLM
llm = HuggingFaceHub()



In [29]:
# Load with `web_search` prompt
embeddings = HypotheticalDocumentEmbedder.from_llm(llm,
                                                   bge_embeddings,
                                                   prompt_key="web_search"
                                                   )

In [30]:
embeddings.llm_chain.prompt

PromptTemplate(input_variables=['QUESTION'], template='Please write a passage to answer the question \nQuestion: {QUESTION}\nPassage:')

In [31]:
langchain.debug = True

In [32]:
# Now we can use it as any embedding class!
result = embeddings.embed_query("What items does McDonalds make?")

[32;1m[1;3m[llm/start][0m [1m[1:llm:HuggingFaceHub] Entering LLM run with input:
[0m{
  "prompts": [
    "Please write a passage to answer the question \nQuestion: What items does McDonalds make?\nPassage:"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:llm:HuggingFaceHub] [274ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "  965 of 10,492\nRolls: 47. 72%\nRolls mixture: 29. 0. 4\nÃ:\nThis question is part of his 91 day affectionates birthday, the week of which he lives in São Paulo. When I first went out in the grocery stores often, once I came home from the nice Buena Vista store, BMW is located right next to the store. It makes sense in the Duelling. BMW purchased her programming outright from Mom",
        "generation_info": null,
        "type": "Generation"
      }
    ]
  ],
  "llm_output": null,
  "run": null
}


## Multiple generations
We can also generate multiple documents and then combine the embeddings for those. By default, we combine those by taking the average. We can do this by changing the LLM we use to generate documents to return multiple things.

In [33]:
multi_llm = HuggingFaceHub(repo_id="google/flan-t5-base", huggingfacehub_api_token="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")

def generate_best_response(prompt, n=4, best_of=4):
    responses = [multi_llm(prompt) for _ in range(n)]
    # Dummy evaluation logic; replace with your own
    best_response = max(responses, key=lambda response: len(response))  # Example: selecting based on length
    return best_response



In [34]:
embeddings = HypotheticalDocumentEmbedder.from_llm(
    multi_llm, bge_embeddings, "web_search"
)

In [35]:
result = embeddings.embed_query("What is McDonalds best selling item?")

[32;1m[1;3m[llm/start][0m [1m[1:llm:HuggingFaceHub] Entering LLM run with input:
[0m{
  "prompts": [
    "Please write a passage to answer the question \nQuestion: What is McDonalds best selling item?\nPassage:"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:llm:HuggingFaceHub] [290ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "McDonalds is the largest McDonalds chain in the United States. McDonalds is the largest McDonald",
        "generation_info": null,
        "type": "Generation"
      }
    ]
  ],
  "llm_output": null,
  "run": null
}


## Using our own prompts
Besides using preconfigured prompts, we can also easily construct our own prompts and use those in the LLMChain that is generating the documents. This can be useful if we know the domain our queries will be in, as we can condition the prompt to generate text more similar to that.

In the example below, let's condition it to generate text about a state of the union address (because we will use that in the next example).

In [36]:
prompt_template = """Please answer the user's question as a single food item
Question: {question}
Answer:"""

prompt = PromptTemplate(input_variables=["question"], template=prompt_template)

llm_chain = LLMChain(llm=llm, prompt=prompt)

In [37]:
embeddings = HypotheticalDocumentEmbedder(
    llm_chain=llm_chain,
    base_embeddings=bge_embeddings
)

In [38]:
result = embeddings.embed_query(
    "What is is McDonalds best selling item?"
)

[32;1m[1;3m[llm/start][0m [1m[1:llm:HuggingFaceHub] Entering LLM run with input:
[0m{
  "prompts": [
    "Please answer the user's question as a single food item\nQuestion: What is is McDonalds best selling item?\nAnswer:"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:llm:HuggingFaceHub] [268ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": " Add cheese, milk, sausage, butter or other hot sauce, mixed with vegetables, insects and lard. Make it often again with vegetables and eggs, like a gallon of water and fresh water. Then put more consistency on beef, rice and vegetables. From your cart, pop it in the fridge too, keep refrigerated a couple of days.\nQuestion: What is McDonald's best selling item?\nAnswer: Add butter, for example with Pepsi.\nQuestion: What is McDonald's best",
        "generation_info": null,
        "type": "Generation"
      }
    ]
  ],
  "llm_output": null,
  "run": null
}


In [39]:
result

[-0.06652691960334778,
 -0.006940238643437624,
 0.014257125556468964,
 -0.01739502139389515,
 0.06561785936355591,
 -0.007304903585463762,
 -0.01880648173391819,
 -0.009014123119413853,
 -0.031072011217474937,
 -0.023735051974654198,
 -0.0156316626816988,
 -0.05064796283841133,
 0.013364186510443687,
 -0.0018809627508744597,
 0.06604881584644318,
 0.004106937907636166,
 0.03372575342655182,
 -0.02100418135523796,
 -0.07922827452421188,
 -0.02742384746670723,
 0.035520993173122406,
 -0.009246508590877056,
 -0.09902767091989517,
 0.02833167091012001,
 0.07266596704721451,
 -0.028704963624477386,
 0.02013317681849003,
 0.0201912559568882,
 -0.03578289598226547,
 -0.20235809683799744,
 0.02943510375916958,
 -0.07395295053720474,
 -0.008134749718010426,
 -0.033773913979530334,
 -0.023636143654584885,
 0.006899202708154917,
 0.051697153598070145,
 -0.014110136777162552,
 -0.01780289225280285,
 0.04523734003305435,
 0.08213768154382706,
 0.007628323044627905,
 0.03681394085288048,
 -0.0325672

## Using HyDE

Now that we have HyDE, we can use it as we would any other embedding class! Here is using it to find similar passages in the state of the union example.

In [40]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

# List of TextLoader instances with specified encoding
loaders = [
    TextLoader("blog_posts/blog.langchain.dev_announcing-langsmith_.txt", encoding='utf-8'),
    TextLoader('blog_posts/blog.langchain.dev_benchmarking-question-answering-over-csv-data_.txt', encoding='utf-8'),
    TextLoader('blog_posts/blog.langchain.dev_chat-loaders-finetune-a-chatmodel-in-your-voice_.txt', encoding='utf-8')
]

docs = []

# Load each file and handle exceptions
for loader in loaders:
    try:
        docs.extend(loader.load())
    except UnicodeDecodeError as e:
        print(f"UnicodeDecodeError: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")

# Split the documents
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)

In [41]:
texts

[Document(page_content='URL: https://blog.langchain.dev/announcing-langsmith/\nTitle: Announcing LangSmith, a unified platform for debugging, testing, evaluating, and monitoring your LLM applications\n\nLangChain exists to make it as easy as possible to develop LLM-powered applications.\n\nWe started with an open-source Python package when the main blocker for building LLM-powered applications was getting a simple prototype working. We remember seeing Nat Friedman tweet in late 2022 that there was “not enough tinkering happening.” The LangChain open-source packages are aimed at addressing this and we see lots of tinkering happening now (Nat agrees)–people are building everything from chatbots over internal company documents to an AI dungeon master for a Dungeons and Dragons game.', metadata={'source': 'blog_posts/blog.langchain.dev_announcing-langsmith_.txt'}),
 Document(page_content='The blocker has now changed. While it’s easy to build a prototype of an application in ~5 lines of Lan

In [42]:
prompt_template = """Please answer the user's question as related to Large Language Models
Question: {question}
Answer:"""

prompt = PromptTemplate(input_variables=["question"], template=prompt_template)

llm_chain = LLMChain(llm=llm, prompt=prompt)

In [43]:
embeddings = HypotheticalDocumentEmbedder(
    llm_chain=llm_chain,
    base_embeddings=bge_embeddings
)

In [45]:
docsearch = Chroma.from_documents(texts, embeddings)

query = "What are chat loaders?"
docs = docsearch.similarity_search(query)

NameError: name 'Chroma' is not defined

In [None]:
print(docs[0].page_content)