# Retrieval Augmented Generation (RAG) Tool

The ability to retrieve relevant context before giving an answer provides huge performance boost to LLM performance. Here shows how you can build a simple tool that retrieve information from pdf files under the same folder.

RAG can be broken down into 4 steps.
1. Input is converted into the `input_embedding` vector.
2. The `input_embedding` vector is used to retrieve relevant documents by calculating the distance between the `input_embedding` vector and each `document_embedding` vector.
    2.1 This process is optionally accelerated by creating an index, which is what most vector databases manage for you.
3. The relevant documents (those that have a close distance with the `input_embedding` vector) are returned alongside its metadata.
4. The returned documents are appended to the LLM prompt.

In the above example, only sessions that are semantically different (i.e. the Analyst Report) are used to create the `document_embedding` vector. Adding non natural language data to construct the embedding vector could result in lower performance.

In [None]:
from hashlib import sha256
import os
# XEntropy will send a Webhook-Secret header to your endpoint for authentication.
webhook_secret = sha256(b'YOUR OWN SEED').hexdigest()

# Replace with your own 
env = {
    "WEBHOOK_SECRET":webhook_secret,
    'AZURE_OPENAI_API_BASE':os.environ.get('AZURE_OPENAI_API_BASE'), # YOUR AZURE_OPENAI_API_BASE
    'AZURE_OPENAI_API_KEY':os.environ.get('AZURE_OPENAI_API_KEY') # YOUR AZURE_OPENAI_API_KEY
}

with open('.env', 'w') as f:
    f.write('\n'.join([f'{key}={value}' for key, value in env.items()]))

In [None]:
import requests
import pandas
from tqdm import tqdm

# We will download some etf analyst reports to demonstrate RAG
etf_dataframe = pandas.read_csv('rag-tool/etf_list.csv', sep='\t')
for etf in tqdm(etf_dataframe.Symbol.values):
    with open(f'rag-tool/pdfs/{etf}.pdf', 'wb') as f:
        response = requests.get(f'https://etfdb.com/advisor_reports/{etf}/')
        f.write(response.content)

In [None]:
from pypdf import PdfReader
# Take a look at the extracted text
reader = PdfReader("rag-tool/pdfs/QQQ.pdf")
page = reader.pages[0]
print(page.extract_text())

In [None]:
# We will use the text under the Analyst Report session to create the embedding.
def extract_full_text(path):
    reader = PdfReader(path)
    page = reader.pages[0]
    return page.extract_text()


def extract_analyst_report(text):
    return text.split('Analyst Report')[1].split('Performance Data')[0]


etf_dataframe['Full Text'] = etf_dataframe.Symbol.apply(
    lambda x: extract_full_text(f'rag-tool/pdfs/{x}.pdf'))
etf_dataframe['Analyst Report'] = etf_dataframe['Full Text'].apply(
    lambda x: extract_analyst_report(x))

In [None]:
# Convert natural language into embeddings
# https://docs.trychroma.com/embeddings details multiple method of doing so.
# In this tutorial we will continue with Azure OpenAI Ada 2.

def ada2_embedding(text):
    azure_base = os.environ.get('AZURE_OPENAI_API_BASE')
    response = requests.post(
        f'{azure_base}/openai/deployments/ada2/embeddings?api-version=2023-07-01-preview',
        json={
            "input": text
        },
        headers={
            "Content-Type": "application/json",
            "api-key": os.environ.get('AZURE_OPENAI_API_KEY')
        }
    ).json()
    return response.get('data')[0].get('embedding')


etf_dataframe['embedding'] = etf_dataframe['Analyst Report'].apply(
    ada2_embedding)

In [None]:
# save the dataframe to parquet
# it will be loaded to ChromaDB in server.py
etf_dataframe.to_parquet('rag-tool/etf.parquet')

Execute the following to start a FastAPI server on your virtual machine. As vector database can consume large amount of memory, when under resource constrain you can consider using managed vector database on the cloud.
```bash
cd rag-tool
uvicorn server:app --host 0.0.0.0 --port 80 --reload
```

In [None]:
# Test if the server is working
url = f'http://IP_OF_YOUR_VIRTUAL_MACHINE/retrieve'
response = requests.post(
    url,
    json={
        'etf_description': 'Focus investment on semiconductor stock.'
    },
    headers={
        'Webhook-Secret': webhook_secret
    }
)
response.json()

In [None]:
from siumai.tool import Tool
from pydantic import BaseModel
# publish the tool to XEntropy
class ETFDescription(BaseModel):
    etf_description: str


etf_search = Tool(
    api_key=os.environ.get('XENTROPY_API_KEY'),
    name='etf_search',
    description='Search for ETFs that satisfy the given description.',
    endpoint=url,
    input_model=ETFDescription,
    price=1,  # xentropy_credit per request. 0 means free to use. 1 USD = 100,000 xentropy_credit
    free_quota=100,  # number of free uses per user per day
)

tool_upload = etf_search.publish(
    webhook_secret=webhook_secret,
    # set to True, if you want the tool to be searchable and usable by other users on XEntropy.
    public=False
)

In [None]:
# Test if the tool works
etf_search = Tool.load(
    tool_upload["name"],
    api_key=os.environ.get('XENTROPY_API_KEY'))
# you are not charged for using your own tool
etf_search.run(etf_description='Focus investment on semiconductor stock.')

In [None]:
# Try equipping the tool to an agents and check its effects
import autogen
llm_config = {
    "functions": [
        # added the new tool here
        {
            "name": etf_search.name,
            "description": etf_search.description,
            "parameters": etf_search.input_model_schema(),
        },
    ],
    "config_list": autogen.config_list_from_models(model_list=['gpt-35']),
    "timeout": 120,
}

# Construct the agent with the new config
chatbot = autogen.AssistantAgent(
    name="chatbot",
    system_message="Use the functions you have been provided with. Reply TERMINATE when the task is done.",
    llm_config=llm_config,
    is_termination_msg=lambda x: x.get("content", "") == ""
)

user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: x.get("content", "") and x.get(
        "content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=2,
)

# Register the new functions to the user_proxy
user_proxy.register_function(
    {
        etf_search.name: etf_search.run,
    }
)

In [None]:
user_proxy.initiate_chat(
    chatbot,
    message="Suggest me ETFs to gain exposure in semiconductors.",
)