In [1]:
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.documents import Document

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# document about langchain
# 1 creating my data
document = [
    Document(page_content = "Langchain is a powerful tool for building applications with LLMs. "),
    Document(page_content = "It provides a set of tools and components for working with LLMs, including vector stores, agents, and more."),
    Document(page_content = "Langchain is easy to use and integrates with other tools and frameworks. "),
    Document(page_content = "It is also compatible with a wide range of LLMs and models. ")
]

In [4]:
#2. Initailizing embedding model
embeddings = GoogleGenerativeAIEmbeddings(model = "models/gemini-embedding-001")

#3. Create Chroma vector store in memory
vector_store = Chroma.from_documents(
    documents=document,
    embedding=embeddings,
    collection_name="my_collection"
)

In [5]:
retriver = vector_store.as_retriever(search_kwargs={"k":2})

In [6]:
query = "Why do we use langchain?"
result = retriver.invoke(query)

In [8]:
for i , doc in  enumerate(result):
    print(f"Document {i+1} : {doc.page_content}")

Document 1 : Langchain is easy to use and integrates with other tools and frameworks. 
Document 2 : Langchain is a powerful tool for building applications with LLMs. 


In [9]:
vector_store.similarity_search(query)

[Document(metadata={}, page_content='Langchain is easy to use and integrates with other tools and frameworks. '),
 Document(metadata={}, page_content='Langchain is a powerful tool for building applications with LLMs. '),
 Document(metadata={}, page_content='It is also compatible with a wide range of LLMs and models. '),
 Document(metadata={}, page_content='It provides a set of tools and components for working with LLMs, including vector stores, agents, and more.')]

# MMR Retrival

In [19]:
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# sample Doucment

docs = [
    Document(page_content = "Langchain is a powerful tool for building applications with LLMs. "),
    Document(page_content = "It provides a set of tools and components for working with LLMs, including vector stores, agents, and more."),
    Document(page_content = "Langchain is easy to use and integrates with other tools and frameworks. "),
    Document(page_content = "It is also compatible with a wide range of LLMs and models. ")
]

#3. Create Chroma vector store in memory
faiss_vector_store = FAISS.from_documents(
    documents=docs,
    embedding=embeddings,
)


In [20]:
retriver = faiss_vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={"k":3, "lambda_mult": 0.5}
)

In [24]:
query2 = "Why do we use langchain?"
result2 = retriver.invoke(query2)

In [25]:
for i , doc in  enumerate(result2):
    print(f"Document {i+1} : {doc.page_content}")

Document 1 : Langchain is easy to use and integrates with other tools and frameworks. 
Document 2 : It provides a set of tools and components for working with LLMs, including vector stores, agents, and more.
Document 3 : It is also compatible with a wide range of LLMs and models. 


# Multi-Query Retriever

In [31]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv

load_dotenv()

model = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",  # or another supported model name
    temperature=0.7,
    max_tokens=None,  # use default or set a limit
    timeout=None,
    max_retries=2
)

In [None]:
# large documents about healt and wellness

random_docs = [
    Document(page_content = "Promote harmony between mental and physical health through mindfulness and self-care practices."),
    Document(page_content = "Encourage balanced eating habits with nutrient-rich foods and hydration."),
    Document(page_content = "Quantum Computing Uses qubits that can exist in multiple states at once, making it insanely powerful for complex calculations."),
    Document(page_content = "Synesthesia A rare condition where people can hear colors or see sounds."),
    Document(page_content = "Monarch Butterflies Travel up to 3,000 miles during migration — one of the longest of any insect species."),
    Document(page_content = "Reduce anxiety through meditation, yoga, or deep-breathing exercises."),
    Document(page_content = "Saturn’s Rings  They might disappear in 100 million years as they slowly fall into the planet."),
    Document(page_content = "Artificial Intelligence Ethics  Debates around how machines should make moral decisions."),
    Document(page_content = "Coffee Chemistry – Over 1,000 chemical compounds contribute to coffee’s unique flavor and aroma."),
    Document(page_content = "Memory Formation – Your brain rewires itself every time you recall a memory — it’s never exactly the same."),
] 

In [None]:
random_vector_store = FAISS.from_documents(
    documents=random_docs,
    embedding=embeddings,
)

In [None]:
similarity_retriver = random_vector_store.as_retriever(search_type='similarity',search_kwargs={"k":5})

In [None]:
multi_query_retriver = MultiQueryRetriever.from_llm(
    retriever=random_vector_store.as_retriever(search_kwargs={"k":5}),
    llm=random_vector_store
)

In [41]:
# best query to test MultiQueryRetriever that make model confuse between the related data
random_query = "What is the best way to sove these problem?"

In [42]:
similarity_result = similarity_retriver.invoke(random_query)
multiquery_result = multi_query_retriver.invoke(random_query)

In [43]:
for i , doc in  enumerate(similarity_result):
    print(f"Document {i+1} : {doc.page_content}")

print('*'*150)

for i , doc in  enumerate(multiquery_result):
    print(f"Document {i+1} : {doc.page_content}")

Document 1 : Reduce anxiety through meditation, yoga, or deep-breathing exercises.
Document 2 : Quantum Computing Uses qubits that can exist in multiple states at once, making it insanely powerful for complex calculations.
Document 3 : Encourage balanced eating habits with nutrient-rich foods and hydration.
Document 4 : Promote harmony between mental and physical health through mindfulness and self-care practices.
Document 5 : Saturn’s Rings  They might disappear in 100 million years as they slowly fall into the planet.
******************************************************************************************************************************************************
Document 1 : Reduce anxiety through meditation, yoga, or deep-breathing exercises.
Document 2 : Promote harmony between mental and physical health through mindfulness and self-care practices.
Document 3 : Encourage balanced eating habits with nutrient-rich foods and hydration.
Document 4 : Quantum Computing Uses qubits th

# Contextual Compression Retrival

In [48]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [49]:
# Recreate the document objects from the previous data
docs = [
    Document(page_content=(
        """The Grand Canyon is one of the most visited natural wonders in the world.
        Photosynthesis is the process by which green plants convert sunlight into energy.
        Millions of tourists travel to see it every year. The rocks date back millions of years."""
    ), metadata={"source": "Doc1"}),

    Document(page_content=(
        """In medieval Europe, castles were built primarily for defense.
        The chlorophyll in plant cells captures sunlight during photosynthesis.
        Knights wore armor made of metal. Siege weapons were often used to breach castle walls."""
    ), metadata={"source": "Doc2"}),

    Document(page_content=(
        """Basketball was invented by Dr. James Naismith in the late 19th century.
        It was originally played with a soccer ball and peach baskets. NBA is now a global league."""
    ), metadata={"source": "Doc3"}),

    Document(page_content=(
        """The history of cinema began in the late 1800s. Silent films were the earliest form.
        Thomas Edison was among the pioneers. Photosynthesis does not occur in animal cells.
        Modern filmmaking involves complex CGI and sound design."""
    ), metadata={"source": "Doc4"})
]

In [50]:
vectorStore = FAISS.from_documents(docs, embeddings)

base_retriver = vectorStore.as_retriever(search_kwargs={"k": 5})

In [51]:
compressor = LLMChainExtractor.from_llm(model)

In [52]:
compressor_retriver = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_retriver
)

In [53]:
comressor_result = compressor_retriver.invoke("What is photo synthesis?")

In [54]:
for i , doc in  enumerate(comressor_result):
    print(f"Document {i+1} : {doc.page_content}")

Document 1 : Photosynthesis is the process by which green plants convert sunlight into energy.
Document 2 : Photosynthesis does not occur in animal cells.
Document 3 : The chlorophyll in plant cells captures sunlight during photosynthesis.
