# Wikipedia Retriver

In [None]:
from langchain_community.retrievers import WikipediaRetriever

In [3]:
retriever = WikipediaRetriever(
    top_k_results=2,
    lang='en'
    )

In [4]:
query = 'The geopolitical history of india and pakistan for the perspective of a chinese'

In [6]:
docs = retriever.invoke(query)

In [7]:
docs

[Document(metadata={'title': 'Indo-Pakistani war of 1971', 'summary': "The Indo-Pakistani war of 1971, also known as the third Indo-Pakistani war, was a military confrontation between India and Pakistan that occurred during the Bangladesh Liberation War in East Pakistan from 3 December 1971 until the Pakistani capitulation in Dhaka on 16 December 1971.  The war began with Pakistan's Operation Chengiz Khan, consisting of preemptive aerial strikes on eight Indian air stations. The strikes led to India declaring war on Pakistan, marking their entry into the war for East Pakistan's independence, on the side of Bengali nationalist forces. India's entry expanded the existing conflict with Indian and Pakistani forces engaging on both the eastern and western fronts. \nThirteen days after the war started, India achieved a clear upper hand, and the Eastern Command of the Pakistan military signed the instrument of surrender on 16 December 1971 in Dhaka, marking the formation of East Pakistan as t

# Vector Store Retriever

In [9]:
from langchain_core.documents import Document
# Step 1: Your source documents
documents = [
    Document(page_content="LangChain helps developers build LLM applications easily."),
    Document(page_content="Chroma is a vector database optimized for LLM-based search."),
    Document(page_content="Embeddings convert text into high-dimensional vectors."),
    Document(page_content="OpenAI provides powerful embedding models."),
]

In [10]:
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

In [11]:
embedding_model = HuggingFaceEmbeddings(
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
)




In [12]:
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    collection_name='my_collection'
)

In [13]:
retriver = vectorstore.as_retriever(search_kwargs = {'k':2})

In [14]:
query = 'What is Chroma used for'
results = retriever.invoke(query)

In [15]:
results

[Document(metadata={'title': 'Chroma key', 'summary': 'Chroma key compositing, or chroma keying, is a visual-effects and post-production technique for compositing (layering) two or more images or video streams together based on colour hues (chroma range). The technique has been used in many fields to remove a background from the subject of a photo or video – particularly the newscasting, motion picture, and video game industries. A colour range in the foreground footage is made transparent, allowing separately filmed background footage or a static image to be inserted into the scene. The chroma keying technique is commonly used in video production and post-production. This technique is also referred to as colour keying, colour separation overlay (CSO; primarily by the BBC), or by various terms for specific colour-related variants such as green screen or blue screen; chroma keying can be done with backgrounds of any colour that are uniform and distinct, but green and blue backgrounds ar

In [17]:
for i, doc in enumerate(results):
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)


--- Result 1 ---
Chroma key compositing, or chroma keying, is a visual-effects and post-production technique for compositing (layering) two or more images or video streams together based on colour hues (chroma range). The technique has been used in many fields to remove a background from the subject of a photo or video – particularly the newscasting, motion picture, and video game industries. A colour range in the foreground footage is made transparent, allowing separately filmed background footage or a static image to be inserted into the scene. The chroma keying technique is commonly used in video production and post-production. This technique is also referred to as colour keying, colour separation overlay (CSO; primarily by the BBC), or by various terms for specific colour-related variants such as green screen or blue screen; chroma keying can be done with backgrounds of any colour that are uniform and distinct, but green and blue backgrounds are more commonly used because they dif

In [18]:
results = vectorstore.similarity_search(query, k=2)

In [19]:
results

[Document(metadata={}, page_content='Chroma is a vector database optimized for LLM-based search.'),
 Document(metadata={}, page_content='LangChain helps developers build LLM applications easily.')]

In [20]:
for i, doc in enumerate(results):
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)


--- Result 1 ---
Chroma is a vector database optimized for LLM-based search.

--- Result 2 ---
LangChain helps developers build LLM applications easily.


# MMR

In [21]:
# Sample documents
docs = [
    Document(page_content="LangChain makes it easy to work with LLMs."),
    Document(page_content="LangChain is used to build LLM based applications."),
    Document(page_content="Chroma is used to store and search document embeddings."),
    Document(page_content="Embeddings are vector representations of text."),
    Document(page_content="MMR helps you get diverse results when doing similarity search."),
    Document(page_content="LangChain supports Chroma, FAISS, Pinecone, and more."),
]

In [22]:
from langchain_community.vectorstores import FAISS

In [30]:
vectorstore = FAISS.from_documents(
    documents=docs,
    embedding=embedding_model
)

In [34]:
retriever = vectorstore.as_retriever(
    search_type='mmr',
    search_kwargs={'k':3,'lambda_mult':0.5}
)

In [35]:
query = 'What is langchain ?' 
results = retriver.invoke(query)

In [36]:
results

[Document(metadata={}, page_content='LangChain helps developers build LLM applications easily.'),
 Document(metadata={}, page_content='Chroma is a vector database optimized for LLM-based search.')]

In [37]:
for i, doc in enumerate(results):
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)


--- Result 1 ---
LangChain helps developers build LLM applications easily.

--- Result 2 ---
Chroma is a vector database optimized for LLM-based search.


# Multiquery Retriever

In [1]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import BaseDocumentTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from langchain_community.retrievers import WikipediaRetriever
# or


ModuleNotFoundError: No module named 'langchain_community.retrievers.multi_query'

In [9]:
from langchain_community.retrievers.multi_query import MultiQueryRetriever


ModuleNotFoundError: No module named 'langchain_community.retrievers.multi_query'

NameError: name 'retriever' is not defined

# Contextual Compression Retriever

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.retrievers import 