## RAG_AGENTS

---

In [2]:
!pip install -Uqq smolagents

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.6/104.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h

In [5]:
!git config --global credential.helper store

In [6]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [7]:
## Basic retrieval with DuckduckGo

from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel

#@ Initializing the search tool
search_tool = DuckDuckGoSearchTool()

#@ Initializing the model
model = HfApiModel()

agent = CodeAgent(
    model = model, 
    tools = [search_tool]
)

response = agent.run("Search for Papers related to RAG from ArXiv from year 2020 onwards")

print(response)

{'papers': [{'title': 'Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks', 'link': 'https://arxiv.org/abs/2005.11401'}, {'title': 'DFA-RAG: Conversational Semantic Router for Large Language Model with Definite Finite Automaton', 'link': 'https://arxiv.org/abs/2501.03114'}, {'title': 'RAVEN: In-Context Learning with Retrieval Augmented Encoder-Decoder Language Models', 'link': 'https://arxiv.org/abs/2203.15556'}, {'title': 'Universal Information Extraction with Meta-Pretrained Self-Retrieval', 'link': 'https://arxiv.org/abs/2212.09065'}, {'title': 'Unlimiformer: Long-Range Transformers with Unlimited Length Input', 'link': 'https://arxiv.org/abs/2302.13793'}, {'title': 'Nonparametric Masked Language Modeling', 'link': 'https://arxiv.org/abs/2307.11684'}, {'title': 'Enhancing Retrieval-Augmented Generation: A Study of Best Practices', 'link': 'https://arxiv.org/abs/2501.07391'}]}


#### The agent follows this process:

1. **Analyzes the Request**: Alfred’s agent identifies the key elements of the query—luxury superhero-themed party planning, with focus on decor, entertainment, and catering.
2. **Performs Retrieval**: The agent leverages DuckDuckGo to search for the most relevant and up-to-date information, ensuring it aligns with Alfred’s refined preferences for a luxurious event.
3. **Synthesizes Information**: After gathering the results, the agent processes them into a cohesive, actionable plan for Alfred, covering all aspects of the party.
4. **Stores for Future Reference**: The agent stores the retrieved information for easy access when planning future events, optimizing efficiency in subsequent tasks.

In [15]:
!pip install -Uqq langchain langchain_community rank_bm25

In [16]:
## Custom KnowledgeBase Tools

from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from smolagents import Tool
from langchain_community.retrievers import BM25Retriever
from smolagents import CodeAgent, HfApiModel

#@ tool using class way:
class ResearchPaperHelperTool(Tool):
    name = "research_paper_helper"
    description = "Uses relevant sources from arxiv and other research paper publishing sites to retrieve the papers based on what user needs."
    inputs = {
        "query":{
            "type": "string",
            "description": "The query to perform. This should be a query related to searching on web about the topic user wants to collect and analyse about"
        }
    }
    output_type = "string"

    def __init__(self, docs, **kwargs):
        super().__init__(**kwargs)
        self.retriever = BM25Retriever.from_documents(
            docs, k=8  #Retrieves 8 research papers/docs
        )

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "Your search query must be a string"

        docs = self.retriever.invoke(
            query,
        )
        return "\nRetrieved ideas:\n" + "".join(
            [
                f"\n\n===== Idea {str(i+1)} =====\n" + doc.page_content
                for i, doc in enumerate(docs)
            ]
        )

# Research guidelines should be outside the class
research_guidelines = [
    {"text": "Attention Is All You Need by Vaswani et al. introduces the transformer architecture that relies entirely on attention mechanisms, dispensing with recurrence and convolutions. The paper demonstrates how this architecture achieves superior translation quality while being more parallelizable and requiring significantly less training time. The model proposes multi-head attention where different representation subspaces can attend to different parts of the sequence simultaneously.", "source": "arxiv.org/abs/1706.03762"},
    {"text": "GPT-3: Language Models are Few-Shot Learners by Brown et al. examines how scaling language models to 175 billion parameters enables them to perform well on a variety of NLP tasks without task-specific fine-tuning. The research demonstrates that large language models can learn to perform tasks from just a few examples or from natural language instructions, approaching human-like few-shot learning capabilities.", "source": "arxiv.org/abs/2005.14165"},
    {"text": "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding by Devlin et al. introduces a language representation model that pre-trains deep bidirectional representations by jointly conditioning on both left and right context. The paper shows how pre-trained BERT representations can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks.", "source": "arxiv.org/abs/1810.04805"},
    {"text": "Deep Residual Learning for Image Recognition by He et al. presents residual learning framework to ease the training of networks that are substantially deeper than those used previously. The research shows that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. This approach won the ILSVRC 2015 classification competition with an error rate of 3.57%.", "source": "arxiv.org/abs/1512.03385"},
    {"text": "AlphaFold 2: Highly accurate protein structure prediction with AlphaFold by Jumper et al. demonstrates a solution to the protein folding problem, predicting protein structures with atomic accuracy even for proteins with no similar structure in the database. The system was trained on publicly available protein structures and achieved unprecedented levels of accuracy in the CASP14 protein structure prediction challenge.", "source": "nature.com/articles/s41586-021-03819-2"},
    {"text": "Federated Learning: Strategies for Improving Communication Efficiency by Konečný et al. proposes two techniques to reduce communication costs in federated learning systems: structured updates and sketched updates. The research demonstrates how these methods can reduce communication costs by up to two orders of magnitude while maintaining model accuracy, making federated learning more practical for mobile and edge devices.", "source": "arxiv.org/abs/1610.05492"},
    {"text": "GANs: Generative Adversarial Networks by Goodfellow et al. introduces a framework for estimating generative models via an adversarial process. The approach trains two models simultaneously: a generative model that captures the data distribution and a discriminative model that estimates the probability that a sample came from the training data rather than the generator. The paper demonstrates the potential of this framework through qualitative and quantitative evaluation of the generated samples.", "source": "arxiv.org/abs/1406.2661"},
    {"text": "Quantum Supremacy Using a Programmable Superconducting Processor by Arute et al. demonstrates quantum supremacy using a programmable superconducting processor named Sycamore. The research shows that their quantum computer performed a specific calculation that is beyond the practical capabilities of classical computers. The Sycamore processor takes about 200 seconds to sample one instance of a quantum circuit a million times, while the same task would take a state-of-the-art classical supercomputer approximately 10,000 years.", "source": "nature.com/articles/s41586-019-1666-5"},
    {"text": "Distributed Representations of Words and Phrases and their Compositionality by Mikolov et al. introduces techniques for learning high-quality distributed vector representations that capture precise syntactic and semantic word relationships. The paper presents the skip-gram model, hierarchical softmax, and negative sampling, which together significantly improve the quality of the vectors and the training speed. These representations can be used for many NLP tasks and set the foundation for modern word embeddings.", "source": "arxiv.org/abs/1310.4546"},
    {"text": "U-Net: Convolutional Networks for Biomedical Image Segmentation by Ronneberger et al. presents a network and training strategy that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization. The network has been applied successfully to several biomedical segmentation applications and won multiple segmentation contests.", "source": "arxiv.org/abs/1505.04597"},
    {"text": "DALL·E: Creating Images from Text by Ramesh et al. demonstrates a neural network that creates images from text descriptions. The model is trained on text–image pairs and can generate novel visual compositions that correspond to unusual text prompts. The research shows how the model can combine concepts, attributes, and objects together in plausible ways, even for scenarios unlikely to occur in the real world.", "source": "arxiv.org/abs/2102.12092"},
    {"text": "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift by Ioffe and Szegedy introduces a technique that addresses the problem of internal covariate shift in deep neural networks. The paper shows how normalizing layer inputs for each mini-batch can dramatically accelerate training of deep neural networks. This approach allows the use of higher learning rates and less careful initialization, and in some cases eliminates the need for dropout.", "source": "arxiv.org/abs/1502.03167"},
    {"text": "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks by Tan and Le presents a new scaling method that uniformly scales all dimensions of depth, width, and resolution using a simple yet highly effective compound coefficient. The research demonstrates that this approach improves model efficiency and accuracy, leading to state-of-the-art performance on multiple image classification benchmarks while being up to 8.4x smaller and 6.1x faster than previous convolutional networks.", "source": "arxiv.org/abs/1905.11946"},
    {"text": "CLIP: Learning Transferable Visual Models From Natural Language Supervision by Radford et al. presents an efficient method to learn visual concepts from natural language supervision. The approach leverages a dataset of 400 million image-text pairs collected from the internet to train models that can be applied to virtually any visual classification task. The research demonstrates the model's zero-shot capabilities, performing on par with task-specific supervised models across a range of image classification datasets.", "source": "arxiv.org/abs/2103.00020"},
    {"text": "Reinforcement Learning with Human Feedback by Christiano et al. presents a method for training reinforcement learning agents from human feedback. The approach uses human preferences as a reward signal, optimizing policies without requiring a hand-crafted reward function. The research demonstrates that this method can be applied successfully to complex tasks like robotic manipulation and playing video games, where traditional reward functions are difficult to specify.", "source": "arxiv.org/abs/1706.03741"}
]

# Convert documents to LangChain Document objects
source_docs = [
    Document(page_content=doc["text"], metadata={"source": doc["source"]})
    for doc in research_guidelines
]

# Split the documents into smaller chunks for more efficient search
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    add_start_index=True,
    strip_whitespace=True,
    separators=["\n\n", "\n", ".", " ", ""],
)
docs_processed = text_splitter.split_documents(source_docs)

# Create the retriever tool
research_paper_retriever = ResearchPaperHelperTool(docs_processed)

# Initialize the agent
agent = CodeAgent(tools=[research_paper_retriever], model=HfApiModel())

# Example usage
response = agent.run(
    "Find research papers about transformer architecture in natural language processing."
)
print(response)

['Attention Is All You Need by Vaswani et al. - This paper introduces the transformer architecture that relies entirely on attention mechanisms, dispensing with recurrence and convolutions. The paper demonstrates how this architecture achieves superior translation quality while being more parallelizable and requiring significantly less training time.', 'GPT-3: Language Models are Few-Shot Learners by Brown et al. - This paper examines how scaling language models to 175 billion parameters enables them to perform well on a variety of NLP tasks without task-specific fine-tuning. The transformer architecture is a key component of these large language models.']
