MMR retriever

In [None]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document

embeddings=OpenAIEmbeddings(model='text-embedding-3-small')

vector_db=Chroma(
    collection_name='chromaCollection',
    embedding_function=embeddings,
    persist_directory='vector_store'
)

sample_docs = [
    Document(
        page_content="RAG reduces the cost of fine-tuning by grounding LLMs with external data.",
        metadata={"topic": "RAG", "difficulty": "Easy"}
    ),
    Document(
        page_content="LangChain uses vector embeddings to enable semantic search over documents.",
        metadata={"topic": "LangChain", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="Vector embeddings convert text into high-dimensional numerical representations.",
        metadata={"topic": "Embeddings", "difficulty": "Easy"}
    ),
    Document(
        page_content="FAISS is commonly used as a vector store for fast similarity search in RAG systems.",
        metadata={"topic": "VectorDB", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="Chunking improves retrieval accuracy by splitting large documents into smaller pieces.",
        metadata={"topic": "Text Chunking", "difficulty": "Easy"}
    ),
    Document(
        page_content="Cosine similarity is widely used to measure closeness between embedding vectors.",
        metadata={"topic": "Similarity Search", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="RAG pipelines consist of retrievers, prompt templates, and language models.",
        metadata={"topic": "RAG Architecture", "difficulty": "Easy"}
    ),
    Document(
        page_content="Dense Passage Retrieval enables semantic search using bi-encoder architectures.",
        metadata={"topic": "Retrieval", "difficulty": "Advanced"}
    ),
    Document(
        page_content="Prompt engineering helps control LLM outputs without retraining the model.",
        metadata={"topic": "Prompt Engineering", "difficulty": "Easy"}
    ),
    Document(
        page_content="LangChain supports chaining, memory, tools, and agents for complex workflows.",
        metadata={"topic": "LangChain", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="Hybrid search combines keyword-based and vector-based retrieval techniques.",
        metadata={"topic": "Search", "difficulty": "Advanced"}
    ),
    Document(
        page_content="Metadata filtering allows retrieval of documents based on structured attributes.",
        metadata={"topic": "Metadata", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="Reranking improves answer quality by reordering retrieved documents using cross-encoders.",
        metadata={"topic": "RAG Optimization", "difficulty": "Advanced"}
    ),
]

vector_db.add_documents(sample_docs)

mmr_retriever=vector_db.as_retriever(
    search_type='mmr',
    search_kwargs={
    'k':3,                                              #Final Nos of Results
    'fetch_k':5,                                        # Initial Pool to select from
    'lambda_mult':0.6                                   # 0-diverse and 1-Relevnce
    }
)

query="How can we reduce the Cost of Fine Tuning"

result=mmr_retriever.invoke(query)
print(result[0].page_content)

Custom Retriever

In [4]:
from langchain_core.runnables import chain
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings=OpenAIEmbeddings(model='text-embedding-3-small')

vector_db=Chroma(
    collection_name='vectordb',
    embedding_function=embeddings,
    persist_directory='vector_store'
)
sample_docs = [
    Document(
        page_content="RAG reduces the cost of fine-tuning by grounding LLMs with external data.",
        metadata={"topic": "RAG", "difficulty": "Easy"}
    ),
    Document(
        page_content="LangChain uses vector embeddings to enable semantic search over documents.",
        metadata={"topic": "LangChain", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="Vector embeddings convert text into high-dimensional numerical representations.",
        metadata={"topic": "Embeddings", "difficulty": "Easy"}
    ),
    Document(
        page_content="FAISS is commonly used as a vector store for fast similarity search in RAG systems.",
        metadata={"topic": "VectorDB", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="Chunking improves retrieval accuracy by splitting large documents into smaller pieces.",
        metadata={"topic": "Text Chunking", "difficulty": "Easy"}
    ),
    Document(
        page_content="Cosine similarity is widely used to measure closeness between embedding vectors.",
        metadata={"topic": "Similarity Search", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="RAG pipelines consist of retrievers, prompt templates, and language models.",
        metadata={"topic": "RAG Architecture", "difficulty": "Easy"}
    ),
    Document(
        page_content="Dense Passage Retrieval enables semantic search using bi-encoder architectures.",
        metadata={"topic": "Retrieval", "difficulty": "Advanced"}
    ),
    Document(
        page_content="Prompt engineering helps control LLM outputs without retraining the model.",
        metadata={"topic": "Prompt Engineering", "difficulty": "Easy"}
    ),
    Document(
        page_content="LangChain supports chaining, memory, tools, and agents for complex workflows.",
        metadata={"topic": "LangChain", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="Hybrid search combines keyword-based and vector-based retrieval techniques.",
        metadata={"topic": "Search", "difficulty": "Advanced"}
    ),
    Document(
        page_content="Metadata filtering allows retrieval of documents based on structured attributes.",
        metadata={"topic": "Metadata", "difficulty": "Intermediate"}
    ),
    Document(
        page_content="Reranking improves answer quality by reordering retrieved documents using cross-encoders.",
        metadata={"topic": "RAG Optimization", "difficulty": "Advanced"}
    ),
]

vector_db.add_documents(sample_docs)

@chain
def custom_retriever(query:str):
    """
    Custom retriever rules:
    1)Get relevant Docs from the Vector Store
    2)Get them Filtered By metadata
    3)Return Top Results
    """

    results=vector_db.similarity_search(query,k=5)
    filtered_docs=[doc for doc in results if doc.metadata.get('topic')=='RAG']
    return filtered_docs[:2]

query="How can we reduce the Cost of Fine Tuning"

result=custom_retriever.invoke(query)
print(result[0].page_content)


RAG reduces the cost of fine-tuning by grounding LLMs with external data.


Arxiv Retriever

In [11]:
from langchain_community.retrievers import ArxivRetriever

arxiv_retriever=ArxivRetriever(load_max_docs=5,load_all_available_meta=True)

query="LLM"

result=arxiv_retriever.invoke(query)
for src in result:
    print(src.metadata)

{'Entry ID': 'http://arxiv.org/abs/2306.05212v1', 'Published': datetime.date(2023, 6, 8), 'Title': 'RETA-LLM: A Retrieval-Augmented Large Language Model Toolkit', 'Authors': 'Jiongnan Liu, Jiajie Jin, Zihan Wang, Jiehan Cheng, Zhicheng Dou, Ji-Rong Wen'}
{'Entry ID': 'http://arxiv.org/abs/2407.07093v1', 'Published': datetime.date(2024, 7, 9), 'Title': 'FBI-LLM: Scaling Up Fully Binarized LLMs from Scratch via Autoregressive Distillation', 'Authors': 'Liqun Ma, Mingjie Sun, Zhiqiang Shen'}
{'Entry ID': 'http://arxiv.org/abs/2408.13006v2', 'Published': datetime.date(2025, 3, 30), 'Title': 'Systematic Evaluation of LLM-as-a-Judge in LLM Alignment Tasks: Explainable Metrics and Diverse Prompt Templates', 'Authors': 'Hui Wei, Shenghua He, Tian Xia, Fei Liu, Andy Wong, Jingyang Lin, Mei Han'}


Batch arxiv query

In [None]:
queries=[
"LLM",
"SLM",
"Finetuning",
"RAG"
]

arxiv_retriever=ArxivRetriever(load_max_docs=3,load_all_available_meta=True)
result=arxiv_retriever.batch(queries)                                                   # for each of the queries 3 docs are retrieved

for index in range(0,len(result)):
    print(result[index][1])

page_content='This work presents a Fully BInarized Large Language Model (FBI-LLM), demonstrating for the first time how to train a large-scale binary language model from scratch (not the partial binary or ternary LLM like BitNet b1.58) to match the performance of its full-precision counterparts (e.g., FP16 or BF16) in transformer-based LLMs. It achieves this by employing an autoregressive distillation (AD) loss with maintaining equivalent model dimensions (130M, 1.3B, 7B) and training data volume as regular LLM pretraining, while delivering competitive results in terms of perplexity and task-specific effectiveness. Intriguingly, by analyzing the training trajectory, we find that the pretrained weight is not necessary for training binarized LLMs from scratch. This research encourages a new computational framework and may facilitate the future design of specialized hardware tailored for fully 1-bit LLMs. We make all models, code, and training dataset fully accessible and transparent to s

Tavily Retriever

In [2]:
from langchain_community.retrievers import TavilySearchAPIRetriever

retriever=TavilySearchAPIRetriever(k=3)
query="What is LLM"

result=retriever.invoke(query)
print(result)

[Document(metadata={'title': 'What is a large language model (LLM)?', 'source': 'https://ask.library.arizona.edu/faq/407985', 'score': 0.99957615, 'images': []}, page_content='# What is a large language model (LLM)? A large language model (LLM) is a type of artificial intelligence that can generate human language and perform related tasks. LLMs can perform various language tasks, such as answering questions, summarizing text, translating between languages, and writing content. This refers to models that are trained on vast amounts of data and can be adapted to a wide range of tasks and operations, , not just working with language. * Beyond ChatGPT: other useful language models. ## What is a large language model (LLM)? A large language model (LLM) is a type of artificial intelligence that can generate human language and perform related tasks. LLMs can perform various language tasks, such as answering questions, summarizing text, translating between languages, and writing content. This r

QA chain using retrievers

In [10]:
from langchain_community.retrievers import WikipediaRetriever
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough,RunnableLambda
from langchain_core.prompts import PromptTemplate

llm=ChatOpenAI(model='gpt-3.5-turbo',temperature=0.5)
retriever=WikipediaRetriever(top_k_results=3,doc_content_chars_max=1500)

prompt_template="""
Answers the Questions Based on the context from Wipepedia.
comntext:{context}
Question:{Question}

Answer:
"""
prompt=PromptTemplate.from_template(prompt_template)
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

docs=RunnableLambda(format_docs)

chain=({"context":retriever|docs,"Question":RunnablePassthrough()}|prompt|llm|StrOutputParser())

result=chain.invoke("What id Life")
print(result)


The Volkswagen ID. Life is a VW concept car intended to foresee an upcoming supermini car for the ID. series, potentially even replacing the Volkswagen Polo in early plans.


QA chain using multiple retrievers

In [12]:
from langchain_community.retrievers import WikipediaRetriever,ArxivRetriever
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough,RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

llm=ChatOpenAI(model='gpt-3.5-turbo')

prompt_template="""
Answer the Question Based on both Context from Wikepedia and Arxiv
Context:{Context}

Question:{Question}

Answer:
"""
wiki_retriever=WikipediaRetriever(top_k_results=3)
arxiv_retriever=ArxivRetriever(top_k_results=3)
def multiple_custom_retriever(query:str):
    all_docs=[]
    wiki=wiki_retriever.invoke(query)
    all_docs.append("========Wikipedia Information============")
    all_docs.extend(doc.page_content[:500] for doc in wiki)
    arxiv=arxiv_retriever.invoke(query)
    all_docs.append("========Arxiv Information============")
    all_docs.extend(doc.page_content[:500] for doc in arxiv)

    return "\n\n".join(all_docs)

custom_retriever=RunnableLambda(multiple_custom_retriever)

prompts=PromptTemplate.from_template(prompt_template)

chain=({"Context":custom_retriever,"Question":RunnablePassthrough()}|prompts|llm|StrOutputParser())
query="What is Cow?"
result=chain.invoke(query)
print(result)



Based on the provided information from both Wikipedia and Arxiv, a cow can be interpreted in several ways:

1. In the context of the cartoon "Cow tools" and the animated sitcom "Cow and Chicken", a cow is a farm animal typically associated with milk production and having anthropomorphic characteristics in fictional settings.

2. In the HuCow BDSM subculture, a cow is a role-played character where individuals act as dairy cows, emphasizing themes of submission and objectification.

3. From a broader perspective discussed in the Arxiv article, the concept of a cow can be seen as a symbol representing the evolving understanding of fundamental concepts in science and philosophy, where interpretations of what is considered "fundamental" may change over time based on theoretical and experimental advances.


Tavily+Wikipedia

In [13]:
from langchain_community.retrievers import WikipediaRetriever,TavilySearchAPIRetriever
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough,RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

llm=ChatOpenAI(model='gpt-3.5-turbo')

prompt_template="""
Answer the Question Based on both Context from Wikepedia and Arxiv
Context:{Context}

Question:{Question}

Answer:
"""
wiki_retriever=WikipediaRetriever(top_k_results=3)
tavily_retriever=TavilySearchAPIRetriever(k=3)
def multiple_custom_retriever(query:str):
    all_docs=[]
    wiki=wiki_retriever.invoke(query)
    all_docs.append("========Wikipedia Information============")
    all_docs.extend(doc.page_content[:500] for doc in wiki)
    tavily=tavily_retriever.invoke(query)
    all_docs.append("========Arxiv Information============")
    all_docs.extend(doc.page_content[:500] for doc in tavily)

    return "\n\n".join(all_docs)

custom_retriever=RunnableLambda(multiple_custom_retriever)

prompts=PromptTemplate.from_template(prompt_template)

chain=({"Context":custom_retriever,"Question":RunnablePassthrough()}|prompts|llm|StrOutputParser())
query="What is Cow?"
result=chain.invoke(query)
print(result)



Based on the combined context from Wikipedia and Arxiv, a cow can refer to a female animal of the species cattle that is kept on farms for its milk. It can also refer to the mature female of various other mammals. Additionally, in the context of BDSM subculture, HuCow, people roleplay as dairy cows.
